ArmNN
 22.02
NeonTensorHandleTests.cpp File Reference
#include <Graph.hpp>
#include <Network.hpp>
#include <neon/NeonTensorHandle.hpp>
#include <neon/NeonTensorHandleFactory.hpp>
#include <armnn/utility/NumericCast.hpp>
#include <armnn/utility/PolymorphicDowncast.hpp>
#include <GraphUtils.hpp>
#include <arm_compute/runtime/Allocator.h>
#include <CommonTestUtils.hpp>
#include <doctest/doctest.h>
#include <armnn/utility/Assert.hpp>

Go to the source code of this file.

Functions

 TEST_SUITE ("NeonTensorHandleTests")
 

Function Documentation

◆ TEST_SUITE()

TEST_SUITE ( "NeonTensorHandleTests"  )

Definition at line 21 of file NeonTensorHandleTests.cpp.

References armnn::Abs, ARMNN_ASSERT, armnn::Average, armnn::Concat, Connect(), IOutputSlot::Connect(), armnn::CpuAcc, IRuntime::Create(), INetwork::Create(), armnn::CreateDescriptorForConcatenation(), NeonTensorHandleFactory::CreateTensorHandle(), armnn::ElementwiseUnary, armnn::Float32, OutputHandler::GetData(), armnn::GetGraphForTesting(), IConnectableLayer::GetInputSlot(), TensorShape::GetNumDimensions(), OutputSlot::GetOutputHandler(), IConnectableLayer::GetOutputSlot(), TensorInfo::GetShape(), armnn::IgnoreValue, armnn::info, SoftmaxDescriptor::m_Beta, Pooling2dDescriptor::m_PadBottom, Pooling2dDescriptor::m_PaddingMethod, Pooling2dDescriptor::m_PadLeft, Pooling2dDescriptor::m_PadRight, Pooling2dDescriptor::m_PadTop, Pooling2dDescriptor::m_PoolHeight, Pooling2dDescriptor::m_PoolType, Pooling2dDescriptor::m_PoolWidth, Pooling2dDescriptor::m_StrideX, Pooling2dDescriptor::m_StrideY, armnn::Malloc, BaseMemoryManager::Offset, armnn::Optimize(), armnn::PaddingRequired, armnn::Pooling2d, TensorInfo::SetConstant(), IOutputSlot::SetTensorInfo(), and ViewsDescriptor::SetViewSize().

22 {
23 using namespace armnn;
24 
25 TEST_CASE("NeonTensorHandleGetCapabilitiesNoPadding")
26 {
27  std::shared_ptr<NeonMemoryManager> memoryManager = std::make_shared<NeonMemoryManager>();
28  NeonTensorHandleFactory handleFactory(memoryManager);
29 
30  INetworkPtr network(INetwork::Create());
31 
32  // Add the layers
33  IConnectableLayer* input = network->AddInputLayer(0);
34  SoftmaxDescriptor descriptor;
35  descriptor.m_Beta = 1.0f;
36  IConnectableLayer* softmax = network->AddSoftmaxLayer(descriptor);
37  IConnectableLayer* output = network->AddOutputLayer(2);
38 
39  // Establish connections
40  input->GetOutputSlot(0).Connect(softmax->GetInputSlot(0));
41  softmax->GetOutputSlot(0).Connect(output->GetInputSlot(0));
42 
43  // No padding required for input
44  std::vector<Capability> capabilities = handleFactory.GetCapabilities(input,
45  softmax,
47  CHECK(capabilities.empty());
48 
49  // No padding required for Softmax
50  capabilities = handleFactory.GetCapabilities(softmax, output, CapabilityClass::PaddingRequired);
51  CHECK(capabilities.empty());
52 
53  // No padding required for output
54  capabilities = handleFactory.GetCapabilities(output, nullptr, CapabilityClass::PaddingRequired);
55  CHECK(capabilities.empty());
56 }
57 
58 TEST_CASE("NeonTensorHandleGetCapabilitiesPadding")
59 {
60  std::shared_ptr<NeonMemoryManager> memoryManager = std::make_shared<NeonMemoryManager>();
61  NeonTensorHandleFactory handleFactory(memoryManager);
62 
63  INetworkPtr network(INetwork::Create());
64 
65  // Add the layers
66  IConnectableLayer* input = network->AddInputLayer(0);
67  Pooling2dDescriptor descriptor;
68  IConnectableLayer* pooling = network->AddPooling2dLayer(descriptor);
69  IConnectableLayer* output = network->AddOutputLayer(2);
70 
71  // Establish connections
72  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
73  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
74 
75  // No padding required for input
76  std::vector<Capability> capabilities = handleFactory.GetCapabilities(input,
77  pooling,
79  CHECK(capabilities.empty());
80 
81  // No padding required for output
82  capabilities = handleFactory.GetCapabilities(output, nullptr, CapabilityClass::PaddingRequired);
83  CHECK(capabilities.empty());
84 
85  // Padding required for Pooling2d
86  capabilities = handleFactory.GetCapabilities(pooling, output, CapabilityClass::PaddingRequired);
87  CHECK(capabilities.size() == 1);
88  CHECK((capabilities[0].m_CapabilityClass == CapabilityClass::PaddingRequired));
89  CHECK(capabilities[0].m_Value);
90 }
91 
92 TEST_CASE("ConcatOnXorYSubTensorsNoPaddingRequiredTest")
93 {
95 
96  // Set up tensor infos
97  const armnn::TensorInfo inputInfo = armnn::TensorInfo({2, 3, 2, 2}, armnn::DataType::Float32);
98  const armnn::TensorInfo intermediateInfo = armnn::TensorInfo({2, 3, 2, 2}, armnn::DataType::Float32);
99  const armnn::TensorInfo outputInfo = armnn::TensorInfo({2, 3, 4, 2}, armnn::DataType::Float32);
100 
102 
103  // Create the network
104  armnn::IConnectableLayer* const input0Layer = net->AddInputLayer(0, "input_0");
105  input0Layer->GetOutputSlot(0).SetTensorInfo(inputInfo);
106  armnn::IConnectableLayer* elementwiseUnaryLayer0 = net->AddElementwiseUnaryLayer(descriptor, "elementwiseUnary_0");
107  elementwiseUnaryLayer0->GetOutputSlot(0).SetTensorInfo(intermediateInfo);
108  input0Layer->GetOutputSlot(0).Connect(elementwiseUnaryLayer0->GetInputSlot(0));
109 
110  armnn::IConnectableLayer* const input1Layer = net->AddInputLayer(1, "input_1");
111  input1Layer->GetOutputSlot(0).SetTensorInfo(inputInfo);
112  armnn::IConnectableLayer* elementwiseUnaryLayer1 = net->AddElementwiseUnaryLayer(descriptor, "elementwiseUnary_1");
113  elementwiseUnaryLayer1->GetOutputSlot(0).SetTensorInfo(intermediateInfo);
114  input1Layer->GetOutputSlot(0).Connect(elementwiseUnaryLayer1->GetInputSlot(0));
115 
116  std::array<armnn::TensorShape, 2> concatInputShapes = { intermediateInfo.GetShape(), intermediateInfo.GetShape() };
117  armnn::IConnectableLayer* const concatLayer = net->AddConcatLayer(armnn::CreateDescriptorForConcatenation(
118  concatInputShapes.begin(), concatInputShapes.end(), 2), "concatenation");
119  concatLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
120  elementwiseUnaryLayer0->GetOutputSlot(0).Connect(concatLayer->GetInputSlot(0));
121  elementwiseUnaryLayer1->GetOutputSlot(0).Connect(concatLayer->GetInputSlot(1));
122 
123  armnn::IConnectableLayer* const outputLayer = net->AddOutputLayer(0, "output");
124  concatLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
125 
128 
129  std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
130  armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
131 
132  const armnn::Graph& theGraph = GetGraphForTesting(optimizedNet.get());
133 
134  // Load graph into runtime
135  armnn::NetworkId networkIdentifier;
136  runtime->LoadNetwork(networkIdentifier, std::move(optimizedNet));
137 
138  // now check the concat how many sub-tensors it is using..
139  auto TraceSubTensorHandleAncestry = [](armnn::ITensorHandle* const subTensorHandle)
140  {
141  if (subTensorHandle && subTensorHandle->GetParent())
142  {
143  return true;
144  }
145  return false;
146  };
147 
148  for (auto&& layer : theGraph)
149  {
150  if(layer->GetType() == armnn::LayerType::Concat)
151  {
152  unsigned int numberOfSubTensors = 0;
153  for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
154  {
155  const armnn::OutputSlot* slot = layer->GetInputSlot(i).GetConnectedOutputSlot();
156  if (TraceSubTensorHandleAncestry(slot->GetOutputHandler().GetData()))
157  {
158  ++numberOfSubTensors;
159  }
160  }
161  // sub-tensors should be supported in this configuration
162  ARMNN_ASSERT(numberOfSubTensors > 0);
163  }
164  }
165 }
166 
167 TEST_CASE("ConcatonXorYPaddingRequiredTest")
168 {
170 
171  // Set up tensor infos
172  const armnn::TensorInfo inputInfo = armnn::TensorInfo({2, 3, 2, 2}, armnn::DataType::Float32);
173  const armnn::TensorInfo intermediateInfo = armnn::TensorInfo({2, 3, 2, 2}, armnn::DataType::Float32);
174  const armnn::TensorInfo outputInfo = armnn::TensorInfo({2, 3, 4, 2}, armnn::DataType::Float32);
175 
176  armnn::Pooling2dDescriptor descriptor;
178  descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3;
179  descriptor.m_StrideX = descriptor.m_StrideY = 1;
180  descriptor.m_PadLeft = 1;
181  descriptor.m_PadRight = 1;
182  descriptor.m_PadTop = 1;
183  descriptor.m_PadBottom = 1;
184  descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue;
185 
186  // Create the network
187  armnn::IConnectableLayer* const input0Layer = net->AddInputLayer(0, "input_0");
188  input0Layer->GetOutputSlot(0).SetTensorInfo(inputInfo);
189  armnn::IConnectableLayer* pooling2dLayer0 = net->AddPooling2dLayer(descriptor, "pooling2d_0");
190  pooling2dLayer0->GetOutputSlot(0).SetTensorInfo(intermediateInfo);
191  input0Layer->GetOutputSlot(0).Connect(pooling2dLayer0->GetInputSlot(0));
192 
193  armnn::IConnectableLayer* const input1Layer = net->AddInputLayer(1, "input_1");
194  input1Layer->GetOutputSlot(0).SetTensorInfo(inputInfo);
195  armnn::IConnectableLayer* pooling2dLayer1 = net->AddPooling2dLayer(descriptor, "pooling2d_1");
196  pooling2dLayer1->GetOutputSlot(0).SetTensorInfo(intermediateInfo);
197  input1Layer->GetOutputSlot(0).Connect(pooling2dLayer1->GetInputSlot(0));
198 
199  std::array<armnn::TensorShape, 2> concatInputShapes = { intermediateInfo.GetShape(), intermediateInfo.GetShape() };
200  armnn::IConnectableLayer* const concatLayer = net->AddConcatLayer(armnn::CreateDescriptorForConcatenation(
201  concatInputShapes.begin(), concatInputShapes.end(), 2), "concatenation");
202  concatLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
203  pooling2dLayer0->GetOutputSlot(0).Connect(concatLayer->GetInputSlot(0));
204  pooling2dLayer1->GetOutputSlot(0).Connect(concatLayer->GetInputSlot(1));
205 
206  armnn::IConnectableLayer* const outputLayer = net->AddOutputLayer(0, "output");
207  concatLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
208 
211 
212  std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
213  armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
214 
215  const armnn::Graph& theGraph = GetGraphForTesting(optimizedNet.get());
216 
217  // Load graph into runtime
218  armnn::NetworkId networkIdentifier;
219  runtime->LoadNetwork(networkIdentifier, std::move(optimizedNet));
220 
221  // now check the concat how many sub-tensors it is using..
222  auto TraceSubTensorHandleAncestry = [](armnn::ITensorHandle* const subTensorHandle)
223  {
224  if (subTensorHandle && subTensorHandle->GetParent())
225  {
226  return true;
227  }
228  return false;
229  };
230 
231  unsigned int numberOfSubTensors = 0;
232  for (auto&& layer : theGraph)
233  {
234  if(layer->GetType() == armnn::LayerType::Concat)
235  {
236  for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
237  {
238  const armnn::OutputSlot* slot = layer->GetInputSlot(i).GetConnectedOutputSlot();
239  if (TraceSubTensorHandleAncestry(slot->GetOutputHandler().GetData()))
240  {
241  ++numberOfSubTensors;
242  }
243  }
244  }
245  }
246  // sub-tensors should not be supported in this configuration
247  ARMNN_ASSERT(numberOfSubTensors == 0);
248 }
249 
250 TEST_CASE("SplitteronXorYNoPaddingRequiredTest")
251 {
252  using namespace armnn;
253 
254  unsigned int splitAxis = 2;
255  unsigned int numSplit = 2;
256 
257  const TensorShape& inputShape = { 2, 3, 4, 2 };
258  const armnn::TensorInfo intermediateInfo = armnn::TensorInfo({ 2, 3, 2, 2 }, armnn::DataType::Float32);
259  const std::vector<TensorShape> outputShapes{{ 2, 3, 2, 2 },
260  { 2, 3, 2, 2 }};
261  const float qScale = 1.0f;
262  const int32_t qOffset = 0;
263 
264  // Creates structures for input & output.
265  std::vector<float> inputData{
266  1, 2,
267  3, 4,
268  5, 6,
269  7, 8,
270  9, 10,
271  11, 12,
272  13, 14,
273  15, 16,
274  17, 18,
275  19, 20,
276  21, 22,
277  23, 24,
278  25, 26,
279  27, 28,
280  29, 30,
281  31, 32,
282  33, 34,
283  35, 36,
284  37, 38,
285  39, 40,
286  41, 42,
287  43, 44,
288  45, 46,
289  47, 48
290  };
291 
292  std::vector<float> expectedOutput0{
293  1, 2,
294  3, 4,
295  9, 10,
296  11, 12,
297  17, 18,
298  19, 20,
299  25, 26,
300  27, 28,
301  33, 34,
302  35, 36,
303  41, 42,
304  43, 44
305  };
306 
307  std::vector<float> expectedOutput1{
308  5, 6,
309  7, 8,
310  13, 14,
311  15, 16,
312  21, 22,
313  23, 24,
314  29, 30,
315  31, 32,
316  37, 38,
317  39, 40,
318  45, 46,
319  47, 48
320  };
321 
322  // Builds up the structure of the network.
324 
325  TensorInfo inputTensorInfo(inputShape, armnn::DataType::Float32, qScale, qOffset);
326 
328 
329  // Splitter
330  std::vector<unsigned int> splitterDimSizes(inputShape.GetNumDimensions());
331 
332  // Add current input shape to splitterDimSizes
333  for (unsigned int i = 0; i < inputShape.GetNumDimensions(); ++i)
334  {
335  splitterDimSizes[i] = inputTensorInfo.GetShape()[i];
336  }
337 
338  if (splitterDimSizes[splitAxis] % numSplit != 0)
339  {
340  throw ParseException("Number of splits must evenly divide the dimension");
341  }
342 
343  splitterDimSizes[splitAxis] /= numSplit;
344 
345  SplitterDescriptor splitDesc(numSplit, inputShape.GetNumDimensions());
346 
347  for (unsigned int g = 0; g < numSplit; ++g)
348  {
349  // Set the size of the views.
350  for (unsigned int dimIdx = 0; dimIdx < splitterDimSizes.size(); ++dimIdx)
351  {
352  splitDesc.SetViewSize(g, dimIdx, splitterDimSizes[dimIdx]);
353  }
354  splitDesc.SetViewOriginCoord(g, splitAxis, splitterDimSizes[splitAxis] * g);
355  }
356  IConnectableLayer* input = net->AddInputLayer(0, "input");
357  IConnectableLayer* elementWiseUnary0 = net->AddElementwiseUnaryLayer(descriptor, "elementwiseunary_0");
358  IConnectableLayer* elementWiseUnary1 = net->AddElementwiseUnaryLayer(descriptor, "elementwiseunary_0");
359  IConnectableLayer* splitter = net->AddSplitterLayer(splitDesc, "splitter");
360 
361  // Connections
362  Connect(input, splitter, inputTensorInfo, 0, 0);
363  Connect(splitter, elementWiseUnary0, intermediateInfo, 0, 0);
364  Connect(splitter, elementWiseUnary1, intermediateInfo, 1, 0);
365 
366  std::vector<IConnectableLayer*> pooling2dLayers{elementWiseUnary0, elementWiseUnary1};
367 
368  for (unsigned int i = 0; i < outputShapes.size(); ++i)
369  {
370  TensorInfo outputTensorInfo(outputShapes[i], armnn::DataType::Float32, qScale, qOffset);
371  IConnectableLayer* output = net->AddOutputLayer(armnn::numeric_cast<LayerBindingId>(i));
372  Connect(pooling2dLayers[i], output, outputTensorInfo, 0, 0);
373  }
374 
375  std::map<int, std::vector<float>> inputTensorData = {{ 0,inputData }};
376  std::map<int, std::vector<float>> expectedOutputData = {{ 0, expectedOutput0 }, { 1, expectedOutput1 }};
377 
380 
381  std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
382  armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
383 
384  const armnn::Graph& theGraph = GetGraphForTesting(optimizedNet.get());
385 
386  // Load graph into runtime
387  armnn::NetworkId networkIdentifier;
388  runtime->LoadNetwork(networkIdentifier, std::move(optimizedNet));
389 
390  // now check the concat how many sub-tensors it is using..
391  auto TraceSubTensorHandleAncestry = [](armnn::ITensorHandle* const subTensorHandle)
392  {
393  if (subTensorHandle && subTensorHandle->GetParent())
394  {
395  return true;
396  }
397  return false;
398  };
399 
400  for (auto&& layer : theGraph)
401  {
402  if(layer->GetType() == armnn::LayerType::ElementwiseUnary)
403  {
404  unsigned int numberOfSubTensors = 0;
405  for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
406  {
407  const armnn::OutputSlot* slot = layer->GetInputSlot(i).GetConnectedOutputSlot();
408  if (TraceSubTensorHandleAncestry(slot->GetOutputHandler().GetData()))
409  {
410  ++numberOfSubTensors;
411  }
412  }
413  // sub-tensors should be supported in this configuration
414  ARMNN_ASSERT(numberOfSubTensors > 0);
415  }
416  }
417 
418  InputTensors inputTensors;
419  inputTensors.reserve(inputTensorData.size());
420  for (auto&& it : inputTensorData)
421  {
422  TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(networkIdentifier, it.first);
423  inputTensorInfo.SetConstant(true);
424  inputTensors.push_back({it.first,
425  ConstTensor(inputTensorInfo, it.second.data())});
426  }
427  OutputTensors outputTensors;
428  outputTensors.reserve(expectedOutputData.size());
429  std::map<int, std::vector<float>> outputStorage;
430  for (auto&& it : expectedOutputData)
431  {
432  std::vector<float> out(it.second.size());
433  outputStorage.emplace(it.first, out);
434  outputTensors.push_back({it.first,
435  Tensor(runtime->GetOutputTensorInfo(networkIdentifier, it.first),
436  outputStorage.at(it.first).data())});
437  }
438 
439  // Does the inference.
440  runtime->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors);
441 
442  // Checks the results.
443  float tolerance = 0.000001f;
444  for (auto&& it : expectedOutputData)
445  {
446  std::vector<float> out = outputStorage.at(it.first);
447  for (unsigned int i = 0; i < out.size(); ++i)
448  {
449  CHECK_MESSAGE(Compare<armnn::DataType::Float32>(it.second[i], out[i], tolerance) == true,
450  "Actual output: " << out[i] << ". Expected output:" << it.second[i]);
451 
452  }
453  }
454 }
455 
456 TEST_CASE("SplitteronXorYPaddingRequiredTest")
457 {
458  using namespace armnn;
459 
460  unsigned int splitAxis = 2;
461  unsigned int numSplit = 2;
462 
463  const TensorShape& inputShape = { 1, 1, 4, 4 };
464  const armnn::TensorInfo intermediateInfo = armnn::TensorInfo({ 1, 1, 2, 4 }, armnn::DataType::Float32);
465  const std::vector<TensorShape> outputShapes{{ 1, 1, 2, 4 },
466  { 1, 1, 2, 4 }};
467 
468  const float qScale = 1.0f;
469  const int32_t qOffset = 0;
470 
471  // Creates structures for input & output.
472  std::vector<float> inputData{
473  9.0f, 27.0f, 18.0f, 36.0f,
474  18.0f, 9.0f, 18.0f, 9.0f,
475  27.0f, 18.0f, 9.0f, 27.0f,
476  9.0f, 27.0f, 9.0f, 18.0f,
477  };
478 
479  std::vector<float> expectedOutput0{
480  7.0f, 11.0f, 13.0f, 9.0f,
481  7.0f, 11.0f, 13.0f, 9.0f
482  };
483 
484  std::vector<float> expectedOutput1{
485  9.0f, 11.0f, 12.0f, 7.0f,
486  9.0f, 11.0f, 12.0f, 7.0f
487  };
488 
489  // Builds up the structure of the network.
491 
492  TensorInfo inputTensorInfo(inputShape, armnn::DataType::Float32, qScale, qOffset);
493 
494  // Pooling
495  armnn::Pooling2dDescriptor descriptor;
497  descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3;
498  descriptor.m_StrideX = descriptor.m_StrideY = 1;
499  descriptor.m_PadLeft = 1;
500  descriptor.m_PadRight = 1;
501  descriptor.m_PadTop = 1;
502  descriptor.m_PadBottom = 1;
504 
505  // Splitter
506  std::vector<unsigned int> splitterDimSizes(inputShape.GetNumDimensions());
507 
508  // Add current input shape to splitterDimSizes
509  for (unsigned int i = 0; i < inputShape.GetNumDimensions(); ++i)
510  {
511  splitterDimSizes[i] = inputTensorInfo.GetShape()[i];
512  }
513 
514  if (splitterDimSizes[splitAxis] % numSplit != 0)
515  {
516  throw ParseException("Number of splits must evenly divide the dimension");
517  }
518 
519  splitterDimSizes[splitAxis] /= numSplit;
520 
521  SplitterDescriptor splitDesc(numSplit, inputShape.GetNumDimensions());
522 
523  for (unsigned int g = 0; g < numSplit; ++g)
524  {
525  // Set the size of the views.
526  for (unsigned int dimIdx = 0; dimIdx < splitterDimSizes.size(); ++dimIdx)
527  {
528  splitDesc.SetViewSize(g, dimIdx, splitterDimSizes[dimIdx]);
529  }
530  splitDesc.SetViewOriginCoord(g, splitAxis, splitterDimSizes[splitAxis] * g);
531  }
532 
533  IConnectableLayer* input = net->AddInputLayer(0, "input");
534  IConnectableLayer* pooling2d0 = net->AddPooling2dLayer(descriptor, "pooling2d_0");
535  IConnectableLayer* pooling2d1 = net->AddPooling2dLayer(descriptor, "pooling2d_1");
536  IConnectableLayer* splitter = net->AddSplitterLayer(splitDesc, "splitter");
537 
538  // Connections
539  Connect(input, splitter, inputTensorInfo, 0, 0);
540  Connect(splitter, pooling2d0, intermediateInfo, 0, 0);
541  Connect(splitter, pooling2d1, intermediateInfo, 1, 0);
542 
543  std::vector<IConnectableLayer*> pooling2dLayers{pooling2d0, pooling2d1};
544 
545  for (unsigned int i = 0; i < outputShapes.size(); ++i)
546  {
547  TensorInfo outputTensorInfo(outputShapes[i], armnn::DataType::Float32, qScale, qOffset);
548  IConnectableLayer* output = net->AddOutputLayer(armnn::numeric_cast<LayerBindingId>(i));
549  Connect(pooling2dLayers[i], output, outputTensorInfo, 0, 0);
550  }
551 
552  std::map<int, std::vector<float>> inputTensorData = {{ 0,inputData }};
553  std::map<int, std::vector<float>> expectedOutputData = {{ 0, expectedOutput0 }, { 1, expectedOutput1 }};
554 
557 
558  std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
559  armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
560 
561  const armnn::Graph& theGraph = GetGraphForTesting(optimizedNet.get());
562 
563  // Load graph into runtime
564  armnn::NetworkId networkIdentifier;
565  runtime->LoadNetwork(networkIdentifier, std::move(optimizedNet));
566 
567  // now check the concat how many sub-tensors it is using..
568  auto TraceSubTensorHandleAncestry = [](armnn::ITensorHandle* const subTensorHandle)
569  {
570  if (subTensorHandle && subTensorHandle->GetParent())
571  {
572  return true;
573  }
574  return false;
575  };
576 
577  for (auto&& layer : theGraph)
578  {
579  if(layer->GetType() == armnn::LayerType::Pooling2d)
580  {
581  unsigned int numberOfSubTensors = 0;
582  for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
583  {
584  const armnn::OutputSlot* slot = layer->GetInputSlot(i).GetConnectedOutputSlot();
585  if (TraceSubTensorHandleAncestry(slot->GetOutputHandler().GetData()))
586  {
587  ++numberOfSubTensors;
588  }
589  }
590  // sub-tensors should be supported in this configuration
591  ARMNN_ASSERT(numberOfSubTensors == 0);
592  }
593  }
594 
595  InputTensors inputTensors;
596  inputTensors.reserve(inputTensorData.size());
597  for (auto&& it : inputTensorData)
598  {
599  TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(networkIdentifier, it.first);
600  inputTensorInfo.SetConstant(true);
601  inputTensors.push_back({it.first,
602  ConstTensor(inputTensorInfo, it.second.data())});
603  }
604  OutputTensors outputTensors;
605  outputTensors.reserve(expectedOutputData.size());
606  std::map<int, std::vector<float>> outputStorage;
607  for (auto&& it : expectedOutputData)
608  {
609  std::vector<float> out(it.second.size());
610  outputStorage.emplace(it.first, out);
611  outputTensors.push_back({it.first,
612  Tensor(runtime->GetOutputTensorInfo(networkIdentifier, it.first),
613  outputStorage.at(it.first).data())});
614  }
615 
616  // Does the inference.
617  runtime->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors);
618 
619  // Checks the results.
620  float tolerance = 0.000001f;
621  for (auto&& it : expectedOutputData)
622  {
623  std::vector<float> out = outputStorage.at(it.first);
624  for (unsigned int i = 0; i < out.size(); ++i)
625  {
626  CHECK_MESSAGE(Compare<armnn::DataType::Float32>(it.second[i], out[i], tolerance) == true,
627  "Actual output: " << out[i] << ". Expected output:" << it.second[i]);
628 
629  }
630  }
631 }
632 
633 TEST_CASE("NeonTensorHandleFactoryMemoryManaged")
634 {
635  std::shared_ptr<NeonMemoryManager> memoryManager = std::make_shared<NeonMemoryManager>(
636  std::make_unique<arm_compute::Allocator>(),
638  NeonTensorHandleFactory handleFactory(memoryManager);
639  TensorInfo info({ 1, 1, 2, 1 }, DataType::Float32);
640 
641  // create TensorHandle with memory managed
642  auto handle = handleFactory.CreateTensorHandle(info, true);
643  handle->Manage();
644  handle->Allocate();
645 
646  memoryManager->Acquire();
647  {
648  float* buffer = reinterpret_cast<float*>(handle->Map());
649  CHECK(buffer != nullptr); // Yields a valid pointer
650  buffer[0] = 1.5f;
651  buffer[1] = 2.5f;
652  CHECK(buffer[0] == 1.5f); // Memory is writable and readable
653  CHECK(buffer[1] == 2.5f); // Memory is writable and readable
654  }
655  memoryManager->Release();
656 
657  memoryManager->Acquire();
658  {
659  float* buffer = reinterpret_cast<float*>(handle->Map());
660  CHECK(buffer != nullptr); // Yields a valid pointer
661  buffer[0] = 3.5f;
662  buffer[1] = 4.5f;
663  CHECK(buffer[0] == 3.5f); // Memory is writable and readable
664  CHECK(buffer[1] == 4.5f); // Memory is writable and readable
665  }
666  memoryManager->Release();
667 
668  float testPtr[2] = { 2.5f, 5.5f };
669  // Cannot import as import is disabled
670  CHECK_THROWS_AS(handle->Import(static_cast<void*>(testPtr), MemorySource::Malloc), MemoryImportException);
671 }
672 
673 TEST_CASE("NeonTensorHandleFactoryImport")
674 {
675  std::shared_ptr<NeonMemoryManager> memoryManager = std::make_shared<NeonMemoryManager>(
676  std::make_unique<arm_compute::Allocator>(),
678  NeonTensorHandleFactory handleFactory(memoryManager);
679  TensorInfo info({ 1, 1, 2, 1 }, DataType::Float32);
680 
681  // create TensorHandle without memory managed
682  auto handle = handleFactory.CreateTensorHandle(info, false);
683  handle->Manage();
684  handle->Allocate();
685  memoryManager->Acquire();
686 
687  // No buffer allocated when import is enabled
688  CHECK((PolymorphicDowncast<NeonTensorHandle*>(handle.get()))->GetTensor().buffer() == nullptr);
689 
690  float testPtr[2] = { 2.5f, 5.5f };
691  // Correctly import
692  CHECK(handle->Import(static_cast<void*>(testPtr), MemorySource::Malloc));
693  float* buffer = reinterpret_cast<float*>(handle->Map());
694  CHECK(buffer != nullptr); // Yields a valid pointer after import
695  CHECK(buffer == testPtr); // buffer is pointing to testPtr
696  // Memory is writable and readable with correct value
697  CHECK(buffer[0] == 2.5f);
698  CHECK(buffer[1] == 5.5f);
699  buffer[0] = 3.5f;
700  buffer[1] = 10.0f;
701  CHECK(buffer[0] == 3.5f);
702  CHECK(buffer[1] == 10.0f);
703  memoryManager->Release();
704 }
705 
706 TEST_CASE("NeonTensorHandleCanBeImported")
707 {
708  std::shared_ptr<NeonMemoryManager> memoryManager = std::make_shared<NeonMemoryManager>(
709  std::make_unique<arm_compute::Allocator>(),
711  NeonTensorHandleFactory handleFactory(memoryManager);
712  TensorInfo info({ 1, 1, 2, 1 }, DataType::Float32);
713 
714  // create TensorHandle (Memory Managed status is irrelevant)
715  auto handle = handleFactory.CreateTensorHandle(info, false);
716 
717  // Create an aligned buffer
718  float alignedBuffer[2] = { 2.5f, 5.5f };
719  // Check aligned buffers return true
720  CHECK(handle->CanBeImported(&alignedBuffer, MemorySource::Malloc) == true);
721 
722  // Create a misaligned buffer from the aligned one
723  float* misalignedBuffer = reinterpret_cast<float*>(reinterpret_cast<char*>(alignedBuffer) + 1);
724  // Check misaligned buffers return false
725  CHECK(handle->CanBeImported(static_cast<void*>(misalignedBuffer), MemorySource::Malloc) == false);
726 }
727 
728 TEST_CASE("NeonTensorHandleSupportsInPlaceComputation")
729 {
730  std::shared_ptr<NeonMemoryManager> memoryManager = std::make_shared<NeonMemoryManager>();
731  NeonTensorHandleFactory handleFactory(memoryManager);
732 
733  // NeonTensorHandleFactory supports InPlaceComputation
734  ARMNN_ASSERT(handleFactory.SupportsInPlaceComputation());
735 }
736 
737 }
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:40
A ViewsDescriptor for the SplitterLayer.
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:66
uint32_t m_PadBottom
Padding bottom value in the height dimension.
const TensorShape & GetShape() const
Definition: Tensor.hpp:191
uint32_t m_PadLeft
Padding left value in the width dimension.
uint32_t m_PoolWidth
Pooling width value.
float m_Beta
Exponentiation value.
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:31
PaddingMethod m_PaddingMethod
The padding method to be used. (Exclude, IgnoreValue).
uint32_t m_PadTop
Padding top value in the height dimension.
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:392
Copyright (c) 2021 ARM Limited and Contributors.
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:319
uint32_t m_PoolHeight
Pooling height value.
uint32_t m_PadRight
Padding right value in the width dimension.
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1680
Status SetViewSize(uint32_t view, uint32_t coord, uint32_t value)
Set the size of the views.
int NetworkId
Definition: IRuntime.hpp:25
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:393
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:242
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
ITensorHandle * GetData() const
Gets the allocated tensor memory.
Graph & GetGraphForTesting(IOptimizedNetwork *optNet)
Definition: TestUtils.cpp:47
A ElementwiseUnaryDescriptor for the ElementwiseUnaryLayer.
PoolingAlgorithm m_PoolType
The pooling algorithm to use (Max. Average, L2).
The padding fields count, but are ignored.
CPU Execution: NEON: ArmCompute.
unsigned int GetNumDimensions() const
Function that returns the tensor rank.
Definition: Tensor.cpp:174
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
Definition: Tensor.cpp:516
const OutputHandler & GetOutputHandler() const
Definition: Layer.hpp:124
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
void Connect(armnn::IConnectableLayer *from, armnn::IConnectableLayer *to, const armnn::TensorInfo &tensorInfo, unsigned int fromIndex, unsigned int toIndex)
Definition: TestUtils.cpp:12
OriginsDescriptor CreateDescriptorForConcatenation(TensorShapeIt first, TensorShapeIt last, unsigned int concatenationDimension)
Convenience template to create an OriginsDescriptor to use when creating a ConcatLayer for performing...
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:241
virtual int Connect(IInputSlot &destination)=0
A Pooling2dDescriptor for the Pooling2dLayer.
static INetworkPtr Create(NetworkOptions networkOptions={})
Definition: Network.cpp:492
A SoftmaxDescriptor for the SoftmaxLayer.
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.