ArmNN
 21.08
NeonTensorHandleTests.cpp File Reference
#include <Graph.hpp>
#include <Network.hpp>
#include <neon/NeonTensorHandle.hpp>
#include <neon/NeonTensorHandleFactory.hpp>
#include <armnn/utility/NumericCast.hpp>
#include <armnn/utility/PolymorphicDowncast.hpp>
#include <test/GraphUtils.hpp>
#include <arm_compute/runtime/Allocator.h>
#include <backendsCommon/test/CommonTestUtils.hpp>
#include <doctest/doctest.h>
#include <armnn/utility/Assert.hpp>

Go to the source code of this file.

Functions

 TEST_SUITE ("NeonTensorHandleTests")
 

Function Documentation

◆ TEST_SUITE()

TEST_SUITE ( "NeonTensorHandleTests"  )

Definition at line 21 of file NeonTensorHandleTests.cpp.

References armnn::Abs, ARMNN_ASSERT, armnn::Average, armnn::Concat, Connect(), IOutputSlot::Connect(), armnn::CpuAcc, INetwork::Create(), IRuntime::Create(), armnn::CreateDescriptorForConcatenation(), NeonTensorHandleFactory::CreateTensorHandle(), armnn::ElementwiseUnary, armnn::Float32, OutputHandler::GetData(), armnn::GetGraphForTesting(), IConnectableLayer::GetInputSlot(), TensorShape::GetNumDimensions(), OutputSlot::GetOutputHandler(), IConnectableLayer::GetOutputSlot(), TensorInfo::GetShape(), armnn::IgnoreValue, armnn::info, SoftmaxDescriptor::m_Beta, Pooling2dDescriptor::m_PadBottom, Pooling2dDescriptor::m_PaddingMethod, Pooling2dDescriptor::m_PadLeft, Pooling2dDescriptor::m_PadRight, Pooling2dDescriptor::m_PadTop, Pooling2dDescriptor::m_PoolHeight, Pooling2dDescriptor::m_PoolType, Pooling2dDescriptor::m_PoolWidth, Pooling2dDescriptor::m_StrideX, Pooling2dDescriptor::m_StrideY, armnn::Malloc, BaseMemoryManager::Offset, armnn::Optimize(), armnn::PaddingRequired, armnn::Pooling2d, IOutputSlot::SetTensorInfo(), and ViewsDescriptor::SetViewSize().

22 {
23 using namespace armnn;
24 
25 TEST_CASE("NeonTensorHandleGetCapabilitiesNoPadding")
26 {
27  std::shared_ptr<NeonMemoryManager> memoryManager = std::make_shared<NeonMemoryManager>();
28  NeonTensorHandleFactory handleFactory(memoryManager);
29 
30  INetworkPtr network(INetwork::Create());
31 
32  // Add the layers
33  IConnectableLayer* input = network->AddInputLayer(0);
34  SoftmaxDescriptor descriptor;
35  descriptor.m_Beta = 1.0f;
36  IConnectableLayer* softmax = network->AddSoftmaxLayer(descriptor);
37  IConnectableLayer* output = network->AddOutputLayer(2);
38 
39  // Establish connections
40  input->GetOutputSlot(0).Connect(softmax->GetInputSlot(0));
41  softmax->GetOutputSlot(0).Connect(output->GetInputSlot(0));
42 
43  // No padding required for input
44  std::vector<Capability> capabilities = handleFactory.GetCapabilities(input,
45  softmax,
47  CHECK(capabilities.empty());
48 
49  // No padding required for Softmax
50  capabilities = handleFactory.GetCapabilities(softmax, output, CapabilityClass::PaddingRequired);
51  CHECK(capabilities.empty());
52 
53  // No padding required for output
54  capabilities = handleFactory.GetCapabilities(output, nullptr, CapabilityClass::PaddingRequired);
55  CHECK(capabilities.empty());
56 }
57 
58 TEST_CASE("NeonTensorHandleGetCapabilitiesPadding")
59 {
60  std::shared_ptr<NeonMemoryManager> memoryManager = std::make_shared<NeonMemoryManager>();
61  NeonTensorHandleFactory handleFactory(memoryManager);
62 
63  INetworkPtr network(INetwork::Create());
64 
65  // Add the layers
66  IConnectableLayer* input = network->AddInputLayer(0);
67  Pooling2dDescriptor descriptor;
68  IConnectableLayer* pooling = network->AddPooling2dLayer(descriptor);
69  IConnectableLayer* output = network->AddOutputLayer(2);
70 
71  // Establish connections
72  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
73  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
74 
75  // No padding required for input
76  std::vector<Capability> capabilities = handleFactory.GetCapabilities(input,
77  pooling,
79  CHECK(capabilities.empty());
80 
81  // No padding required for output
82  capabilities = handleFactory.GetCapabilities(output, nullptr, CapabilityClass::PaddingRequired);
83  CHECK(capabilities.empty());
84 
85  // Padding required for Pooling2d
86  capabilities = handleFactory.GetCapabilities(pooling, output, CapabilityClass::PaddingRequired);
87  CHECK(capabilities.size() == 1);
88  CHECK((capabilities[0].m_CapabilityClass == CapabilityClass::PaddingRequired));
89  CHECK(capabilities[0].m_Value);
90 }
91 
92 TEST_CASE("ConcatOnXorYSubTensorsNoPaddingRequiredTest")
93 {
95 
96  // Set up tensor infos
97  const armnn::TensorInfo inputInfo = armnn::TensorInfo({2, 3, 2, 2}, armnn::DataType::Float32);
98  const armnn::TensorInfo intermediateInfo = armnn::TensorInfo({2, 3, 2, 2}, armnn::DataType::Float32);
99  const armnn::TensorInfo outputInfo = armnn::TensorInfo({2, 3, 4, 2}, armnn::DataType::Float32);
100 
102 
103  // Create the network
104  armnn::IConnectableLayer* const input0Layer = net->AddInputLayer(0, "input_0");
105  input0Layer->GetOutputSlot(0).SetTensorInfo(inputInfo);
106  armnn::IConnectableLayer* elementwiseUnaryLayer0 = net->AddElementwiseUnaryLayer(descriptor, "elementwiseUnary_0");
107  elementwiseUnaryLayer0->GetOutputSlot(0).SetTensorInfo(intermediateInfo);
108  input0Layer->GetOutputSlot(0).Connect(elementwiseUnaryLayer0->GetInputSlot(0));
109 
110  armnn::IConnectableLayer* const input1Layer = net->AddInputLayer(1, "input_1");
111  input1Layer->GetOutputSlot(0).SetTensorInfo(inputInfo);
112  armnn::IConnectableLayer* elementwiseUnaryLayer1 = net->AddElementwiseUnaryLayer(descriptor, "elementwiseUnary_1");
113  elementwiseUnaryLayer1->GetOutputSlot(0).SetTensorInfo(intermediateInfo);
114  input1Layer->GetOutputSlot(0).Connect(elementwiseUnaryLayer1->GetInputSlot(0));
115 
116  std::array<armnn::TensorShape, 2> concatInputShapes = { intermediateInfo.GetShape(), intermediateInfo.GetShape() };
117  armnn::IConnectableLayer* const concatLayer = net->AddConcatLayer(armnn::CreateDescriptorForConcatenation(
118  concatInputShapes.begin(), concatInputShapes.end(), 2), "concatenation");
119  concatLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
120  elementwiseUnaryLayer0->GetOutputSlot(0).Connect(concatLayer->GetInputSlot(0));
121  elementwiseUnaryLayer1->GetOutputSlot(0).Connect(concatLayer->GetInputSlot(1));
122 
123  armnn::IConnectableLayer* const outputLayer = net->AddOutputLayer(0, "output");
124  concatLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
125 
128 
129  std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
130  armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
131 
132  const armnn::Graph& theGraph = GetGraphForTesting(optimizedNet.get());
133 
134  // Load graph into runtime
135  armnn::NetworkId networkIdentifier;
136  runtime->LoadNetwork(networkIdentifier, std::move(optimizedNet));
137 
138  // now check the concat how many sub-tensors it is using..
139  auto TraceSubTensorHandleAncestry = [](armnn::ITensorHandle* const subTensorHandle)
140  {
141  if (subTensorHandle && subTensorHandle->GetParent())
142  {
143  return true;
144  }
145  return false;
146  };
147 
148  for (auto&& layer : theGraph)
149  {
150  if(layer->GetType() == armnn::LayerType::Concat)
151  {
152  unsigned int numberOfSubTensors = 0;
153  for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
154  {
155  const armnn::OutputSlot* slot = layer->GetInputSlot(i).GetConnectedOutputSlot();
156  if (TraceSubTensorHandleAncestry(slot->GetOutputHandler().GetData()))
157  {
158  ++numberOfSubTensors;
159  }
160  }
161  // sub-tensors should be supported in this configuration
162  ARMNN_ASSERT(numberOfSubTensors > 0);
163  }
164  }
165 }
166 
167 TEST_CASE("ConcatonXorYPaddingRequiredTest")
168 {
170 
171  // Set up tensor infos
172  const armnn::TensorInfo inputInfo = armnn::TensorInfo({2, 3, 2, 2}, armnn::DataType::Float32);
173  const armnn::TensorInfo intermediateInfo = armnn::TensorInfo({2, 3, 2, 2}, armnn::DataType::Float32);
174  const armnn::TensorInfo outputInfo = armnn::TensorInfo({2, 3, 4, 2}, armnn::DataType::Float32);
175 
176  armnn::Pooling2dDescriptor descriptor;
178  descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3;
179  descriptor.m_StrideX = descriptor.m_StrideY = 1;
180  descriptor.m_PadLeft = 1;
181  descriptor.m_PadRight = 1;
182  descriptor.m_PadTop = 1;
183  descriptor.m_PadBottom = 1;
184  descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue;
185 
186  // Create the network
187  armnn::IConnectableLayer* const input0Layer = net->AddInputLayer(0, "input_0");
188  input0Layer->GetOutputSlot(0).SetTensorInfo(inputInfo);
189  armnn::IConnectableLayer* pooling2dLayer0 = net->AddPooling2dLayer(descriptor, "pooling2d_0");
190  pooling2dLayer0->GetOutputSlot(0).SetTensorInfo(intermediateInfo);
191  input0Layer->GetOutputSlot(0).Connect(pooling2dLayer0->GetInputSlot(0));
192 
193  armnn::IConnectableLayer* const input1Layer = net->AddInputLayer(1, "input_1");
194  input1Layer->GetOutputSlot(0).SetTensorInfo(inputInfo);
195  armnn::IConnectableLayer* pooling2dLayer1 = net->AddPooling2dLayer(descriptor, "pooling2d_1");
196  pooling2dLayer1->GetOutputSlot(0).SetTensorInfo(intermediateInfo);
197  input1Layer->GetOutputSlot(0).Connect(pooling2dLayer1->GetInputSlot(0));
198 
199  std::array<armnn::TensorShape, 2> concatInputShapes = { intermediateInfo.GetShape(), intermediateInfo.GetShape() };
200  armnn::IConnectableLayer* const concatLayer = net->AddConcatLayer(armnn::CreateDescriptorForConcatenation(
201  concatInputShapes.begin(), concatInputShapes.end(), 2), "concatenation");
202  concatLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
203  pooling2dLayer0->GetOutputSlot(0).Connect(concatLayer->GetInputSlot(0));
204  pooling2dLayer1->GetOutputSlot(0).Connect(concatLayer->GetInputSlot(1));
205 
206  armnn::IConnectableLayer* const outputLayer = net->AddOutputLayer(0, "output");
207  concatLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
208 
211 
212  std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
213  armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
214 
215  const armnn::Graph& theGraph = GetGraphForTesting(optimizedNet.get());
216 
217  // Load graph into runtime
218  armnn::NetworkId networkIdentifier;
219  runtime->LoadNetwork(networkIdentifier, std::move(optimizedNet));
220 
221  // now check the concat how many sub-tensors it is using..
222  auto TraceSubTensorHandleAncestry = [](armnn::ITensorHandle* const subTensorHandle)
223  {
224  if (subTensorHandle && subTensorHandle->GetParent())
225  {
226  return true;
227  }
228  return false;
229  };
230 
231  unsigned int numberOfSubTensors = 0;
232  for (auto&& layer : theGraph)
233  {
234  if(layer->GetType() == armnn::LayerType::Concat)
235  {
236  for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
237  {
238  const armnn::OutputSlot* slot = layer->GetInputSlot(i).GetConnectedOutputSlot();
239  if (TraceSubTensorHandleAncestry(slot->GetOutputHandler().GetData()))
240  {
241  ++numberOfSubTensors;
242  }
243  }
244  }
245  }
246  // sub-tensors should not be supported in this configuration
247  ARMNN_ASSERT(numberOfSubTensors == 0);
248 }
249 
250 TEST_CASE("SplitteronXorYNoPaddingRequiredTest")
251 {
252  using namespace armnn;
253 
254  unsigned int splitAxis = 2;
255  unsigned int numSplit = 2;
256 
257  const TensorShape& inputShape = { 2, 3, 4, 2 };
258  const armnn::TensorInfo intermediateInfo = armnn::TensorInfo({ 2, 3, 2, 2 }, armnn::DataType::Float32);
259  const std::vector<TensorShape> outputShapes{{ 2, 3, 2, 2 },
260  { 2, 3, 2, 2 }};
261  const float qScale = 1.0f;
262  const int32_t qOffset = 0;
263 
264  // Creates structures for input & output.
265  std::vector<float> inputData{
266  1, 2,
267  3, 4,
268  5, 6,
269  7, 8,
270  9, 10,
271  11, 12,
272  13, 14,
273  15, 16,
274  17, 18,
275  19, 20,
276  21, 22,
277  23, 24,
278  25, 26,
279  27, 28,
280  29, 30,
281  31, 32,
282  33, 34,
283  35, 36,
284  37, 38,
285  39, 40,
286  41, 42,
287  43, 44,
288  45, 46,
289  47, 48
290  };
291 
292  std::vector<float> expectedOutput0{
293  1, 2,
294  3, 4,
295  9, 10,
296  11, 12,
297  17, 18,
298  19, 20,
299  25, 26,
300  27, 28,
301  33, 34,
302  35, 36,
303  41, 42,
304  43, 44
305  };
306 
307  std::vector<float> expectedOutput1{
308  5, 6,
309  7, 8,
310  13, 14,
311  15, 16,
312  21, 22,
313  23, 24,
314  29, 30,
315  31, 32,
316  37, 38,
317  39, 40,
318  45, 46,
319  47, 48
320  };
321 
322  // Builds up the structure of the network.
324 
325  TensorInfo inputTensorInfo(inputShape, armnn::DataType::Float32, qScale, qOffset);
326 
328 
329  // Splitter
330  std::vector<unsigned int> splitterDimSizes(inputShape.GetNumDimensions());
331 
332  // Add current input shape to splitterDimSizes
333  for (unsigned int i = 0; i < inputShape.GetNumDimensions(); ++i)
334  {
335  splitterDimSizes[i] = inputTensorInfo.GetShape()[i];
336  }
337 
338  if (splitterDimSizes[splitAxis] % numSplit != 0)
339  {
340  throw ParseException("Number of splits must evenly divide the dimension");
341  }
342 
343  splitterDimSizes[splitAxis] /= numSplit;
344 
345  SplitterDescriptor splitDesc(numSplit, inputShape.GetNumDimensions());
346 
347  for (unsigned int g = 0; g < numSplit; ++g)
348  {
349  // Set the size of the views.
350  for (unsigned int dimIdx = 0; dimIdx < splitterDimSizes.size(); ++dimIdx)
351  {
352  splitDesc.SetViewSize(g, dimIdx, splitterDimSizes[dimIdx]);
353  }
354  splitDesc.SetViewOriginCoord(g, splitAxis, splitterDimSizes[splitAxis] * g);
355  }
356  IConnectableLayer* input = net->AddInputLayer(0, "input");
357  IConnectableLayer* elementWiseUnary0 = net->AddElementwiseUnaryLayer(descriptor, "elementwiseunary_0");
358  IConnectableLayer* elementWiseUnary1 = net->AddElementwiseUnaryLayer(descriptor, "elementwiseunary_0");
359  IConnectableLayer* splitter = net->AddSplitterLayer(splitDesc, "splitter");
360 
361  // Connections
362  Connect(input, splitter, inputTensorInfo, 0, 0);
363  Connect(splitter, elementWiseUnary0, intermediateInfo, 0, 0);
364  Connect(splitter, elementWiseUnary1, intermediateInfo, 1, 0);
365 
366  std::vector<IConnectableLayer*> pooling2dLayers{elementWiseUnary0, elementWiseUnary1};
367 
368  for (unsigned int i = 0; i < outputShapes.size(); ++i)
369  {
370  TensorInfo outputTensorInfo(outputShapes[i], armnn::DataType::Float32, qScale, qOffset);
371  IConnectableLayer* output = net->AddOutputLayer(armnn::numeric_cast<LayerBindingId>(i));
372  Connect(pooling2dLayers[i], output, outputTensorInfo, 0, 0);
373  }
374 
375  std::map<int, std::vector<float>> inputTensorData = {{ 0,inputData }};
376  std::map<int, std::vector<float>> expectedOutputData = {{ 0, expectedOutput0 }, { 1, expectedOutput1 }};
377 
380 
381  std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
382  armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
383 
384  const armnn::Graph& theGraph = GetGraphForTesting(optimizedNet.get());
385 
386  // Load graph into runtime
387  armnn::NetworkId networkIdentifier;
388  runtime->LoadNetwork(networkIdentifier, std::move(optimizedNet));
389 
390  // now check the concat how many sub-tensors it is using..
391  auto TraceSubTensorHandleAncestry = [](armnn::ITensorHandle* const subTensorHandle)
392  {
393  if (subTensorHandle && subTensorHandle->GetParent())
394  {
395  return true;
396  }
397  return false;
398  };
399 
400  for (auto&& layer : theGraph)
401  {
402  if(layer->GetType() == armnn::LayerType::ElementwiseUnary)
403  {
404  unsigned int numberOfSubTensors = 0;
405  for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
406  {
407  const armnn::OutputSlot* slot = layer->GetInputSlot(i).GetConnectedOutputSlot();
408  if (TraceSubTensorHandleAncestry(slot->GetOutputHandler().GetData()))
409  {
410  ++numberOfSubTensors;
411  }
412  }
413  // sub-tensors should be supported in this configuration
414  ARMNN_ASSERT(numberOfSubTensors > 0);
415  }
416  }
417 
418  InputTensors inputTensors;
419  inputTensors.reserve(inputTensorData.size());
420  for (auto&& it : inputTensorData)
421  {
422  inputTensors.push_back({it.first,
423  ConstTensor(runtime->GetInputTensorInfo(networkIdentifier, it.first), it.second.data())});
424  }
425  OutputTensors outputTensors;
426  outputTensors.reserve(expectedOutputData.size());
427  std::map<int, std::vector<float>> outputStorage;
428  for (auto&& it : expectedOutputData)
429  {
430  std::vector<float> out(it.second.size());
431  outputStorage.emplace(it.first, out);
432  outputTensors.push_back({it.first,
433  Tensor(runtime->GetOutputTensorInfo(networkIdentifier, it.first),
434  outputStorage.at(it.first).data())});
435  }
436 
437  // Does the inference.
438  runtime->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors);
439 
440  // Checks the results.
441  float tolerance = 0.000001f;
442  for (auto&& it : expectedOutputData)
443  {
444  std::vector<float> out = outputStorage.at(it.first);
445  for (unsigned int i = 0; i < out.size(); ++i)
446  {
447  CHECK_MESSAGE(Compare<armnn::DataType::Float32>(it.second[i], out[i], tolerance) == true,
448  "Actual output: " << out[i] << ". Expected output:" << it.second[i]);
449 
450  }
451  }
452 }
453 
454 TEST_CASE("SplitteronXorYPaddingRequiredTest")
455 {
456  using namespace armnn;
457 
458  unsigned int splitAxis = 2;
459  unsigned int numSplit = 2;
460 
461  const TensorShape& inputShape = { 1, 1, 4, 4 };
462  const armnn::TensorInfo intermediateInfo = armnn::TensorInfo({ 1, 1, 2, 4 }, armnn::DataType::Float32);
463  const std::vector<TensorShape> outputShapes{{ 1, 1, 2, 4 },
464  { 1, 1, 2, 4 }};
465 
466  const float qScale = 1.0f;
467  const int32_t qOffset = 0;
468 
469  // Creates structures for input & output.
470  std::vector<float> inputData{
471  9.0f, 27.0f, 18.0f, 36.0f,
472  18.0f, 9.0f, 18.0f, 9.0f,
473  27.0f, 18.0f, 9.0f, 27.0f,
474  9.0f, 27.0f, 9.0f, 18.0f,
475  };
476 
477  std::vector<float> expectedOutput0{
478  7.0f, 11.0f, 13.0f, 9.0f,
479  7.0f, 11.0f, 13.0f, 9.0f
480  };
481 
482  std::vector<float> expectedOutput1{
483  9.0f, 11.0f, 12.0f, 7.0f,
484  9.0f, 11.0f, 12.0f, 7.0f
485  };
486 
487  // Builds up the structure of the network.
489 
490  TensorInfo inputTensorInfo(inputShape, armnn::DataType::Float32, qScale, qOffset);
491 
492  // Pooling
493  armnn::Pooling2dDescriptor descriptor;
495  descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3;
496  descriptor.m_StrideX = descriptor.m_StrideY = 1;
497  descriptor.m_PadLeft = 1;
498  descriptor.m_PadRight = 1;
499  descriptor.m_PadTop = 1;
500  descriptor.m_PadBottom = 1;
502 
503  // Splitter
504  std::vector<unsigned int> splitterDimSizes(inputShape.GetNumDimensions());
505 
506  // Add current input shape to splitterDimSizes
507  for (unsigned int i = 0; i < inputShape.GetNumDimensions(); ++i)
508  {
509  splitterDimSizes[i] = inputTensorInfo.GetShape()[i];
510  }
511 
512  if (splitterDimSizes[splitAxis] % numSplit != 0)
513  {
514  throw ParseException("Number of splits must evenly divide the dimension");
515  }
516 
517  splitterDimSizes[splitAxis] /= numSplit;
518 
519  SplitterDescriptor splitDesc(numSplit, inputShape.GetNumDimensions());
520 
521  for (unsigned int g = 0; g < numSplit; ++g)
522  {
523  // Set the size of the views.
524  for (unsigned int dimIdx = 0; dimIdx < splitterDimSizes.size(); ++dimIdx)
525  {
526  splitDesc.SetViewSize(g, dimIdx, splitterDimSizes[dimIdx]);
527  }
528  splitDesc.SetViewOriginCoord(g, splitAxis, splitterDimSizes[splitAxis] * g);
529  }
530 
531  IConnectableLayer* input = net->AddInputLayer(0, "input");
532  IConnectableLayer* pooling2d0 = net->AddPooling2dLayer(descriptor, "pooling2d_0");
533  IConnectableLayer* pooling2d1 = net->AddPooling2dLayer(descriptor, "pooling2d_1");
534  IConnectableLayer* splitter = net->AddSplitterLayer(splitDesc, "splitter");
535 
536  // Connections
537  Connect(input, splitter, inputTensorInfo, 0, 0);
538  Connect(splitter, pooling2d0, intermediateInfo, 0, 0);
539  Connect(splitter, pooling2d1, intermediateInfo, 1, 0);
540 
541  std::vector<IConnectableLayer*> pooling2dLayers{pooling2d0, pooling2d1};
542 
543  for (unsigned int i = 0; i < outputShapes.size(); ++i)
544  {
545  TensorInfo outputTensorInfo(outputShapes[i], armnn::DataType::Float32, qScale, qOffset);
546  IConnectableLayer* output = net->AddOutputLayer(armnn::numeric_cast<LayerBindingId>(i));
547  Connect(pooling2dLayers[i], output, outputTensorInfo, 0, 0);
548  }
549 
550  std::map<int, std::vector<float>> inputTensorData = {{ 0,inputData }};
551  std::map<int, std::vector<float>> expectedOutputData = {{ 0, expectedOutput0 }, { 1, expectedOutput1 }};
552 
555 
556  std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
557  armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
558 
559  const armnn::Graph& theGraph = GetGraphForTesting(optimizedNet.get());
560 
561  // Load graph into runtime
562  armnn::NetworkId networkIdentifier;
563  runtime->LoadNetwork(networkIdentifier, std::move(optimizedNet));
564 
565  // now check the concat how many sub-tensors it is using..
566  auto TraceSubTensorHandleAncestry = [](armnn::ITensorHandle* const subTensorHandle)
567  {
568  if (subTensorHandle && subTensorHandle->GetParent())
569  {
570  return true;
571  }
572  return false;
573  };
574 
575  for (auto&& layer : theGraph)
576  {
577  if(layer->GetType() == armnn::LayerType::Pooling2d)
578  {
579  unsigned int numberOfSubTensors = 0;
580  for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
581  {
582  const armnn::OutputSlot* slot = layer->GetInputSlot(i).GetConnectedOutputSlot();
583  if (TraceSubTensorHandleAncestry(slot->GetOutputHandler().GetData()))
584  {
585  ++numberOfSubTensors;
586  }
587  }
588  // sub-tensors should be supported in this configuration
589  ARMNN_ASSERT(numberOfSubTensors == 0);
590  }
591  }
592 
593  InputTensors inputTensors;
594  inputTensors.reserve(inputTensorData.size());
595  for (auto&& it : inputTensorData)
596  {
597  inputTensors.push_back({it.first,
598  ConstTensor(runtime->GetInputTensorInfo(networkIdentifier, it.first), it.second.data())});
599  }
600  OutputTensors outputTensors;
601  outputTensors.reserve(expectedOutputData.size());
602  std::map<int, std::vector<float>> outputStorage;
603  for (auto&& it : expectedOutputData)
604  {
605  std::vector<float> out(it.second.size());
606  outputStorage.emplace(it.first, out);
607  outputTensors.push_back({it.first,
608  Tensor(runtime->GetOutputTensorInfo(networkIdentifier, it.first),
609  outputStorage.at(it.first).data())});
610  }
611 
612  // Does the inference.
613  runtime->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors);
614 
615  // Checks the results.
616  float tolerance = 0.000001f;
617  for (auto&& it : expectedOutputData)
618  {
619  std::vector<float> out = outputStorage.at(it.first);
620  for (unsigned int i = 0; i < out.size(); ++i)
621  {
622  CHECK_MESSAGE(Compare<armnn::DataType::Float32>(it.second[i], out[i], tolerance) == true,
623  "Actual output: " << out[i] << ". Expected output:" << it.second[i]);
624 
625  }
626  }
627 }
628 
629 TEST_CASE("NeonTensorHandleFactoryMemoryManaged")
630 {
631  std::shared_ptr<NeonMemoryManager> memoryManager = std::make_shared<NeonMemoryManager>(
632  std::make_unique<arm_compute::Allocator>(),
634  NeonTensorHandleFactory handleFactory(memoryManager);
635  TensorInfo info({ 1, 1, 2, 1 }, DataType::Float32);
636 
637  // create TensorHandle with memory managed
638  auto handle = handleFactory.CreateTensorHandle(info, true);
639  handle->Manage();
640  handle->Allocate();
641 
642  memoryManager->Acquire();
643  {
644  float* buffer = reinterpret_cast<float*>(handle->Map());
645  CHECK(buffer != nullptr); // Yields a valid pointer
646  buffer[0] = 1.5f;
647  buffer[1] = 2.5f;
648  CHECK(buffer[0] == 1.5f); // Memory is writable and readable
649  CHECK(buffer[1] == 2.5f); // Memory is writable and readable
650  }
651  memoryManager->Release();
652 
653  memoryManager->Acquire();
654  {
655  float* buffer = reinterpret_cast<float*>(handle->Map());
656  CHECK(buffer != nullptr); // Yields a valid pointer
657  buffer[0] = 3.5f;
658  buffer[1] = 4.5f;
659  CHECK(buffer[0] == 3.5f); // Memory is writable and readable
660  CHECK(buffer[1] == 4.5f); // Memory is writable and readable
661  }
662  memoryManager->Release();
663 
664  float testPtr[2] = { 2.5f, 5.5f };
665  // Cannot import as import is disabled
666  CHECK_THROWS_AS(handle->Import(static_cast<void*>(testPtr), MemorySource::Malloc), MemoryImportException);
667 }
668 
669 TEST_CASE("NeonTensorHandleFactoryImport")
670 {
671  std::shared_ptr<NeonMemoryManager> memoryManager = std::make_shared<NeonMemoryManager>(
672  std::make_unique<arm_compute::Allocator>(),
674  NeonTensorHandleFactory handleFactory(memoryManager);
675  TensorInfo info({ 1, 1, 2, 1 }, DataType::Float32);
676 
677  // create TensorHandle without memory managed
678  auto handle = handleFactory.CreateTensorHandle(info, false);
679  handle->Manage();
680  handle->Allocate();
681  memoryManager->Acquire();
682 
683  // No buffer allocated when import is enabled
684  CHECK((PolymorphicDowncast<NeonTensorHandle*>(handle.get()))->GetTensor().buffer() == nullptr);
685 
686  float testPtr[2] = { 2.5f, 5.5f };
687  // Correctly import
688  CHECK(handle->Import(static_cast<void*>(testPtr), MemorySource::Malloc));
689  float* buffer = reinterpret_cast<float*>(handle->Map());
690  CHECK(buffer != nullptr); // Yields a valid pointer after import
691  CHECK(buffer == testPtr); // buffer is pointing to testPtr
692  // Memory is writable and readable with correct value
693  CHECK(buffer[0] == 2.5f);
694  CHECK(buffer[1] == 5.5f);
695  buffer[0] = 3.5f;
696  buffer[1] = 10.0f;
697  CHECK(buffer[0] == 3.5f);
698  CHECK(buffer[1] == 10.0f);
699  memoryManager->Release();
700 }
701 
702 TEST_CASE("NeonTensorHandleSupportsInPlaceComputation")
703 {
704  std::shared_ptr<NeonMemoryManager> memoryManager = std::make_shared<NeonMemoryManager>();
705  NeonTensorHandleFactory handleFactory(memoryManager);
706 
707  // NeonTensorHandleFactory supports InPlaceComputation
708  ARMNN_ASSERT(handleFactory.SupportsInPlaceComputation());
709 }
710 
711 }
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:39
A ViewsDescriptor for the SplitterLayer.
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:61
uint32_t m_PadBottom
Padding bottom value in the height dimension.
uint32_t m_PadLeft
Padding left value in the width dimension.
uint32_t m_PoolWidth
Pooling width value.
float m_Beta
Exponentiation value.
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:30
PaddingMethod m_PaddingMethod
The padding method to be used. (Exclude, IgnoreValue).
uint32_t m_PadTop
Padding top value in the height dimension.
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:360
Copyright (c) 2021 ARM Limited and Contributors.
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:319
uint32_t m_PoolHeight
Pooling height value.
uint32_t m_PadRight
Padding right value in the width dimension.
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1613
Status SetViewSize(uint32_t view, uint32_t coord, uint32_t value)
Set the size of the views.
int NetworkId
Definition: IRuntime.hpp:24
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:361
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:173
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
ITensorHandle * GetData() const
Gets the allocated tensor memory.
Graph & GetGraphForTesting(IOptimizedNetwork *optNet)
Definition: TestUtils.cpp:25
A ElementwiseUnaryDescriptor for the ElementwiseUnaryLayer.
Definition: Descriptors.hpp:98
PoolingAlgorithm m_PoolType
The pooling algorithm to use (Max. Average, L2).
The padding fields count, but are ignored.
CPU Execution: NEON: ArmCompute.
unsigned int GetNumDimensions() const
Function that returns the tensor rank.
Definition: Tensor.cpp:174
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
const OutputHandler & GetOutputHandler() const
Definition: Layer.hpp:119
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
void Connect(armnn::IConnectableLayer *from, armnn::IConnectableLayer *to, const armnn::TensorInfo &tensorInfo, unsigned int fromIndex, unsigned int toIndex)
Definition: TestUtils.cpp:12
OriginsDescriptor CreateDescriptorForConcatenation(TensorShapeIt first, TensorShapeIt last, unsigned int concatenationDimension)
Convenience template to create an OriginsDescriptor to use when creating a ConcatLayer for performing...
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:172
virtual int Connect(IInputSlot &destination)=0
A Pooling2dDescriptor for the Pooling2dLayer.
static INetworkPtr Create(NetworkOptions networkOptions={})
Definition: Network.cpp:530
A SoftmaxDescriptor for the SoftmaxLayer.
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.