ArmNN
 21.02
NeonCreateWorkloadTests.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
7 
13 
15 
20 
21 BOOST_AUTO_TEST_SUITE(CreateWorkloadNeon)
22 
23 namespace
24 {
25 
26 boost::test_tools::predicate_result CompareIAclTensorHandleShape(IAclTensorHandle* tensorHandle,
27  std::initializer_list<unsigned int> expectedDimensions)
28 {
29  return CompareTensorHandleShape<IAclTensorHandle>(tensorHandle, expectedDimensions);
30 }
31 
32 bool TestNeonTensorHandleInfo(armnn::IAclTensorHandle* handle, const armnn::TensorInfo& expectedInfo)
33 {
34  using namespace armnn::armcomputetensorutils;
35 
36  const arm_compute::ITensorInfo* handleInfo = handle->GetTensor().info();
37  const arm_compute::TensorInfo expectedAclInfo = BuildArmComputeTensorInfo(expectedInfo);
38 
39  if (handleInfo->data_type() != expectedAclInfo.data_type())
40  {
41  return false;
42  }
43 
44  if (handleInfo->num_dimensions() != expectedAclInfo.num_dimensions())
45  {
46  return false;
47  }
48 
49  if (handleInfo->quantization_info() != expectedAclInfo.quantization_info())
50  {
51  return false;
52  }
53 
54  for (std::size_t d = 0; d < expectedAclInfo.num_dimensions(); ++d)
55  {
56  if (handleInfo->dimension(d) != expectedAclInfo.dimension(d))
57  {
58  return false;
59  }
60  }
61 
62  return true;
63 }
64 
65 } // namespace
66 
67 template <typename armnn::DataType DataType>
68 static void NeonCreateActivationWorkloadTest()
69 {
70  Graph graph;
71  NeonWorkloadFactory factory =
72  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
73 
74  auto workload = CreateActivationWorkloadTest<NeonActivationWorkload, DataType>(factory, graph);
75 
76  // Checks that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest).
77  ActivationQueueDescriptor queueDescriptor = workload->GetData();
78  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
79  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
80  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({1, 1}, DataType)));
81  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 1}, DataType)));
82 }
83 
84 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
85 BOOST_AUTO_TEST_CASE(CreateActivationFloat16Workload)
86 {
87  NeonCreateActivationWorkloadTest<DataType::Float16>();
88 }
89 #endif
90 
91 BOOST_AUTO_TEST_CASE(CreateActivationFloatWorkload)
92 {
93  NeonCreateActivationWorkloadTest<DataType::Float32>();
94 }
95 
96 template <typename WorkloadType,
97  typename DescriptorType,
98  typename LayerType,
100 static void NeonCreateElementwiseWorkloadTest()
101 {
102  Graph graph;
103  NeonWorkloadFactory factory =
104  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
105 
106  auto workload = CreateElementwiseWorkloadTest<WorkloadType, DescriptorType, LayerType, DataType>(factory, graph);
107 
108  DescriptorType queueDescriptor = workload->GetData();
109  auto inputHandle1 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
110  auto inputHandle2 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
111  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
112  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle1, TensorInfo({2, 3}, DataType)));
113  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle2, TensorInfo({2, 3}, DataType)));
114  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 3}, DataType)));
115 }
116 
117 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
118 BOOST_AUTO_TEST_CASE(CreateAdditionFloat16Workload)
119 {
120  NeonCreateElementwiseWorkloadTest<NeonAdditionWorkload,
124 }
125 #endif
126 
127 BOOST_AUTO_TEST_CASE(CreateAdditionFloatWorkload)
128 {
129  NeonCreateElementwiseWorkloadTest<NeonAdditionWorkload,
133 }
134 
135 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
136 BOOST_AUTO_TEST_CASE(CreateSubtractionFloat16Workload)
137 {
138  NeonCreateElementwiseWorkloadTest<NeonSubtractionWorkload,
142 }
143 #endif
144 
145 BOOST_AUTO_TEST_CASE(CreateSubtractionFloatWorkload)
146 {
147  NeonCreateElementwiseWorkloadTest<NeonSubtractionWorkload,
151 }
152 
153 BOOST_AUTO_TEST_CASE(CreateSubtractionUint8Workload)
154 {
155  NeonCreateElementwiseWorkloadTest<NeonSubtractionWorkload,
159 }
160 
161 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
162 BOOST_AUTO_TEST_CASE(CreateMultiplicationFloat16Workload)
163 {
164  NeonCreateElementwiseWorkloadTest<NeonMultiplicationWorkload,
168 }
169 #endif
170 
171 BOOST_AUTO_TEST_CASE(CreateMultiplicationFloatWorkload)
172 {
173  NeonCreateElementwiseWorkloadTest<NeonMultiplicationWorkload,
177 }
178 
179 BOOST_AUTO_TEST_CASE(CreateMultiplicationUint8Workload)
180 {
181  NeonCreateElementwiseWorkloadTest<NeonMultiplicationWorkload,
185 }
186 
187 BOOST_AUTO_TEST_CASE(CreateDivisionFloatWorkloadTest)
188 {
189  NeonCreateElementwiseWorkloadTest<NeonDivisionWorkload,
193 }
194 
195 template <typename BatchNormalizationWorkloadType, typename armnn::DataType DataType>
196 static void NeonCreateBatchNormalizationWorkloadTest(DataLayout dataLayout)
197 {
198  Graph graph;
199  NeonWorkloadFactory factory =
200  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
201 
202  auto workload = CreateBatchNormalizationWorkloadTest<BatchNormalizationWorkloadType, DataType>
203  (factory, graph, dataLayout);
204 
205  // Checks that outputs and inputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest).
206  BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData();
207  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
208  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
209 
210  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 4, 4} : TensorShape{2, 4, 4, 3};
211  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 4, 4} : TensorShape{2, 4, 4, 3};
212 
213  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
214  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
215 }
216 
217 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
218 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloat16NchwWorkload)
219 {
220  NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationWorkload, DataType::Float16>(DataLayout::NCHW);
221 }
222 
223 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloat16NhwcWorkload)
224 {
225  NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationWorkload, DataType::Float16>(DataLayout::NHWC);
226 }
227 #endif
228 
229 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloatNchwWorkload)
230 {
231  NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationWorkload, DataType::Float32>(DataLayout::NCHW);
232 }
233 
234 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloatNhwcWorkload)
235 {
236  NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationWorkload, DataType::Float32>(DataLayout::NHWC);
237 }
238 
239 template <typename armnn::DataType DataType>
240 static void NeonCreateConvolution2dWorkloadTest(DataLayout dataLayout = DataLayout::NCHW)
241 {
242  Graph graph;
243  NeonWorkloadFactory factory =
244  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
245 
246  auto workload = CreateConvolution2dWorkloadTest<NeonConvolution2dWorkload, DataType>(factory, graph, dataLayout);
247 
248  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 8, 16} : TensorShape{2, 8, 16, 3};
249  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 2, 2, 10} : TensorShape{2, 2, 10, 2};
250 
251  // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest).
252  Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
253  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
254  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
255  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
256  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
257 }
258 
259 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
260 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NchwWorkload)
261 {
262  NeonCreateConvolution2dWorkloadTest<DataType::Float16>();
263 }
264 
265 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NhwcWorkload)
266 {
267  NeonCreateConvolution2dWorkloadTest<DataType::Float16>(DataLayout::NHWC);
268 }
269 
270 #endif
271 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNchwWorkload)
272 {
273  NeonCreateConvolution2dWorkloadTest<DataType::Float32>();
274 }
275 
276 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNhwcWorkload)
277 {
278  NeonCreateConvolution2dWorkloadTest<DataType::Float32>(DataLayout::NHWC);
279 }
280 
281 BOOST_AUTO_TEST_CASE(CreateConvolution2dFastMathEnabledWorkload)
282 {
283  Graph graph;
284  using ModelOptions = std::vector<BackendOptions>;
285  ModelOptions modelOptions = {};
286  BackendOptions cpuAcc("CpuAcc",
287  {
288  { "FastMathEnabled", true }
289  });
290  modelOptions.push_back(cpuAcc);
291  NeonWorkloadFactory factory =
292  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager(), modelOptions);
293 
294  auto workload =
295  CreateConvolution2dWorkloadFastMathTest<NeonConvolution2dWorkload, armnn::DataType::Float32>(factory,
296  graph,
298  modelOptions);
299 
300  ARMNN_ASSERT(workload != nullptr);
301  auto conv2dWorkload = PolymorphicDowncast<NeonConvolution2dWorkload*>(workload.get());
302  IgnoreUnused(conv2dWorkload);
303  ARMNN_ASSERT(conv2dWorkload != nullptr);
304  ARMNN_ASSERT(conv2dWorkload->GetConvolutionMethod() == arm_compute::ConvolutionMethod::WINOGRAD);
305 }
306 
307 template <typename armnn::DataType DataType>
308 static void NeonCreateDepthWiseConvolutionWorkloadTest(DataLayout dataLayout)
309 {
310  Graph graph;
311  NeonWorkloadFactory factory =
312  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
313 
314  auto workload = CreateDepthwiseConvolution2dWorkloadTest<NeonDepthwiseConvolutionWorkload,
315  DataType>(factory, graph, dataLayout);
316 
317  // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
318  DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData();
319  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
320  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
321 
322  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
323  : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
324  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
325  : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
326 
327  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
328  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
329 }
330 
331 BOOST_AUTO_TEST_CASE(CreateDepthWiseConvolution2dFloat32NhwcWorkload)
332 {
333  NeonCreateDepthWiseConvolutionWorkloadTest<DataType::Float32>(DataLayout::NHWC);
334 }
335 
336 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
337 BOOST_AUTO_TEST_CASE(CreateDepthWiseConvolution2dFloat16NhwcWorkload)
338 {
339  NeonCreateDepthWiseConvolutionWorkloadTest<DataType::Float16>(DataLayout::NHWC);
340 }
341 #endif
342 
343 template <typename FullyConnectedWorkloadType, typename armnn::DataType DataType>
344 static void NeonCreateFullyConnectedWorkloadTest()
345 {
346  Graph graph;
347  NeonWorkloadFactory factory =
348  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
349 
350  auto workload = CreateFullyConnectedWorkloadTest<FullyConnectedWorkloadType, DataType>(factory, graph);
351 
352  // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest).
353  FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
354  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
355  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
356 
357  // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest).
358  float inputsQScale = DataType == armnn::DataType::QAsymmU8 ? 1.0f : 0.0;
359  float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 0.0;
360  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({3, 1, 4, 5}, DataType, inputsQScale)));
361  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({3, 7}, DataType, outputQScale)));
362 }
363 
364 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
365 BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloat16Workload)
366 {
367  NeonCreateFullyConnectedWorkloadTest<NeonFullyConnectedWorkload, DataType::Float16>();
368 }
369 #endif
370 
371 BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloatWorkload)
372 {
373  NeonCreateFullyConnectedWorkloadTest<NeonFullyConnectedWorkload, DataType::Float32>();
374 }
375 
376 BOOST_AUTO_TEST_CASE(CreateFullyConnectedQAsymmU8Workload)
377 {
378  NeonCreateFullyConnectedWorkloadTest<NeonFullyConnectedWorkload, DataType::QAsymmU8>();
379 }
380 
381 BOOST_AUTO_TEST_CASE(CreateFullyConnectedQAsymmS8Workload)
382 {
383  NeonCreateFullyConnectedWorkloadTest<NeonFullyConnectedWorkload, DataType::QAsymmS8>();
384 }
385 
386 template <typename NormalizationWorkloadType, typename armnn::DataType DataType>
387 static void NeonCreateNormalizationWorkloadTest(DataLayout dataLayout)
388 {
389  Graph graph;
390  NeonWorkloadFactory factory =
391  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
392 
393  auto workload = CreateNormalizationWorkloadTest<NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
394 
395  // Checks that outputs and inputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
396  NormalizationQueueDescriptor queueDescriptor = workload->GetData();
397  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
398  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
399 
400  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 5, 5, 1} : TensorShape{3, 1, 5, 5};
401  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 5, 5, 1} : TensorShape{3, 1, 5, 5};
402 
403  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
404  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
405 }
406 
407 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
408 BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16NchwWorkload)
409 {
410  NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float16>(DataLayout::NCHW);
411 }
412 
413 BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16NhwcWorkload)
414 {
415  NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float16>(DataLayout::NHWC);
416 }
417 #endif
418 
419 BOOST_AUTO_TEST_CASE(CreateNormalizationFloatNchwWorkload)
420 {
421  NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float32>(DataLayout::NCHW);
422 }
423 
424 BOOST_AUTO_TEST_CASE(CreateNormalizationFloatNhwcWorkload)
425 {
426  NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float32>(DataLayout::NHWC);
427 }
428 
429 
430 template <typename armnn::DataType DataType>
431 static void NeonCreatePooling2dWorkloadTest(DataLayout dataLayout = DataLayout::NCHW)
432 {
433  Graph graph;
434  NeonWorkloadFactory factory =
435  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
436 
437  auto workload = CreatePooling2dWorkloadTest<NeonPooling2dWorkload, DataType>(factory, graph, dataLayout);
438 
439  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 2, 5, 5} : TensorShape{3, 5, 5, 2};
440  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 2, 2, 4} : TensorShape{3, 2, 4, 2};
441 
442  // Checks that outputs and inputs are as we expect them (see definition of CreatePooling2dWorkloadTest).
443  Pooling2dQueueDescriptor queueDescriptor = workload->GetData();
444  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
445  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
446  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
447  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
448 }
449 
450 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
451 BOOST_AUTO_TEST_CASE(CreatePooling2dFloat16Workload)
452 {
453  NeonCreatePooling2dWorkloadTest<DataType::Float16>();
454 }
455 #endif
456 
457 BOOST_AUTO_TEST_CASE(CreatePooling2dFloatNchwWorkload)
458 {
459  NeonCreatePooling2dWorkloadTest<DataType::Float32>(DataLayout::NCHW);
460 }
461 
462 BOOST_AUTO_TEST_CASE(CreatePooling2dFloatNhwcWorkload)
463 {
464  NeonCreatePooling2dWorkloadTest<DataType::Float32>(DataLayout::NHWC);
465 }
466 
467 BOOST_AUTO_TEST_CASE(CreatePooling2dUint8NchwWorkload)
468 {
469  NeonCreatePooling2dWorkloadTest<DataType::QAsymmU8>(DataLayout::NCHW);
470 }
471 
472 BOOST_AUTO_TEST_CASE(CreatePooling2dUint8NhwcWorkload)
473 {
474  NeonCreatePooling2dWorkloadTest<DataType::QAsymmU8>(DataLayout::NHWC);
475 }
476 
477 static void NeonCreatePreluWorkloadTest(const armnn::TensorShape& inputShape,
478  const armnn::TensorShape& alphaShape,
479  const armnn::TensorShape& outputShape,
480  armnn::DataType dataType)
481 {
482  Graph graph;
483  NeonWorkloadFactory factory =
484  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
485 
486  auto workload = CreatePreluWorkloadTest<NeonPreluWorkload>(factory,
487  graph,
488  inputShape,
489  alphaShape,
490  outputShape,
491  dataType);
492 
493  // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest).
494  PreluQueueDescriptor queueDescriptor = workload->GetData();
495  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
496  auto alphaHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
497  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
498  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, dataType)));
499  BOOST_TEST(TestNeonTensorHandleInfo(alphaHandle, TensorInfo(alphaShape, dataType)));
500  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, dataType)));
501 }
502 
503 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
504  BOOST_AUTO_TEST_CASE(CreatePreluFloat16Workload)
505 {
506  NeonCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float16);
507 }
508 #endif
509 
510 BOOST_AUTO_TEST_CASE(CreatePreluFloatWorkload)
511 {
512  NeonCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float32);
513 }
514 
515 BOOST_AUTO_TEST_CASE(CreatePreluUint8Workload)
516 {
517  NeonCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::QAsymmU8);
518 }
519 
520 template <typename armnn::DataType DataType>
521 static void NeonCreateReshapeWorkloadTest()
522 {
523  Graph graph;
524  NeonWorkloadFactory factory =
525  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
526 
527  auto workload = CreateReshapeWorkloadTest<NeonReshapeWorkload, DataType>(factory, graph);
528 
529  // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest).
530  ReshapeQueueDescriptor queueDescriptor = workload->GetData();
531  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
532  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
533  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({4, 1}, DataType)));
534  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 4}, DataType)));
535 }
536 
537 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
538 BOOST_AUTO_TEST_CASE(CreateReshapeFloat16Workload)
539 {
540  NeonCreateReshapeWorkloadTest<DataType::Float16>();
541 }
542 #endif
543 
544 BOOST_AUTO_TEST_CASE(CreateReshapeFloatWorkload)
545 {
546  NeonCreateReshapeWorkloadTest<DataType::Float32>();
547 }
548 
549 BOOST_AUTO_TEST_CASE(CreateReshapeUint8Workload)
550 {
551  NeonCreateReshapeWorkloadTest<DataType::QAsymmU8>();
552 }
553 
554 template <typename ResizeWorkloadType, armnn::DataType DataType>
555 static void NeonCreateResizeWorkloadTest(DataLayout dataLayout)
556 {
557  Graph graph;
558  NeonWorkloadFactory factory =
559  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
560  auto workload = CreateResizeBilinearWorkloadTest<ResizeWorkloadType, DataType>(factory, graph, dataLayout);
561 
562  auto queueDescriptor = workload->GetData();
563 
564  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
565  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
566 
567  switch (dataLayout)
568  {
569  case DataLayout::NHWC:
570  BOOST_TEST(CompareIAclTensorHandleShape(inputHandle, { 2, 4, 4, 3 }));
571  BOOST_TEST(CompareIAclTensorHandleShape(outputHandle, { 2, 2, 2, 3 }));
572  break;
573  case DataLayout::NCHW:
574  default:
575  BOOST_TEST(CompareIAclTensorHandleShape(inputHandle, { 2, 3, 4, 4 }));
576  BOOST_TEST(CompareIAclTensorHandleShape(outputHandle, { 2, 3, 2, 2 }));
577  }
578 }
579 
580 BOOST_AUTO_TEST_CASE(CreateResizeFloat32NchwWorkload)
581 {
582  NeonCreateResizeWorkloadTest<NeonResizeWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
583 }
584 
585 BOOST_AUTO_TEST_CASE(CreateResizeUint8NchwWorkload)
586 {
587  NeonCreateResizeWorkloadTest<NeonResizeWorkload, armnn::DataType::QAsymmU8>(DataLayout::NCHW);
588 }
589 
590 BOOST_AUTO_TEST_CASE(CreateResizeFloat32NhwcWorkload)
591 {
592  NeonCreateResizeWorkloadTest<NeonResizeWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
593 }
594 
595 BOOST_AUTO_TEST_CASE(CreateResizeUint8NhwcWorkload)
596 {
597  NeonCreateResizeWorkloadTest<NeonResizeWorkload, armnn::DataType::QAsymmU8>(DataLayout::NHWC);
598 }
599 
600 template <typename SoftmaxWorkloadType, typename armnn::DataType DataType>
601 static void NeonCreateSoftmaxWorkloadTest()
602 {
603  Graph graph;
604  NeonWorkloadFactory factory =
605  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
606 
607  auto workload = CreateSoftmaxWorkloadTest<SoftmaxWorkloadType, DataType>(factory, graph);
608 
609  // Checks that outputs and inputs are as we expect them (see definition of CreateSoftmaxWorkloadTest).
610  SoftmaxQueueDescriptor queueDescriptor = workload->GetData();
611  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
612  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
613  armnn::TensorInfo tensorInfo({4, 1}, DataType);
615  {
616  tensorInfo.SetQuantizationOffset(0);
617  tensorInfo.SetQuantizationScale(1.f / 256);
618  }
620  {
621  tensorInfo.SetQuantizationOffset(-128);
622  tensorInfo.SetQuantizationScale(1.f / 256);
623  }
624  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, tensorInfo));
625  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, tensorInfo));
626 }
627 
628 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
629 BOOST_AUTO_TEST_CASE(CreateSoftmaxFloat16Workload)
630 {
631  NeonCreateSoftmaxWorkloadTest<NeonSoftmaxWorkload, DataType::Float16>();
632 }
633 #endif
634 
635 BOOST_AUTO_TEST_CASE(CreateSoftmaxFloatWorkload)
636 {
637  NeonCreateSoftmaxWorkloadTest<NeonSoftmaxWorkload, DataType::Float32>();
638 }
639 
640 BOOST_AUTO_TEST_CASE(CreateSoftmaxQAsymmU8Workload)
641 {
642  NeonCreateSoftmaxWorkloadTest<NeonSoftmaxWorkload, DataType::QAsymmU8>();
643 }
644 
645 BOOST_AUTO_TEST_CASE(CreateSoftmaxQAsymmS8Workload)
646 {
647  NeonCreateSoftmaxWorkloadTest<NeonSoftmaxWorkload, DataType::QAsymmS8>();
648 }
649 
650 template <typename SpaceToDepthWorkloadType, typename armnn::DataType DataType>
651 static void NeonSpaceToDepthWorkloadTest()
652 {
653  Graph graph;
654  NeonWorkloadFactory factory =
655  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
656 
657  auto workload = CreateSpaceToDepthWorkloadTest<SpaceToDepthWorkloadType, DataType>(factory, graph);
658 
659  SpaceToDepthQueueDescriptor queueDescriptor = workload->GetData();
660  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
661  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
662 
663  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({ 1, 2, 2, 1 }, DataType)));
664  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({ 1, 1, 1, 4 }, DataType)));
665 }
666 
667 BOOST_AUTO_TEST_CASE(CreateSpaceToDepthFloat32Workload)
668 {
669  NeonSpaceToDepthWorkloadTest<NeonSpaceToDepthWorkload, armnn::DataType::Float32>();
670 }
671 
672 BOOST_AUTO_TEST_CASE(CreateSpaceToDepthFloat16Workload)
673 {
674  NeonSpaceToDepthWorkloadTest<NeonSpaceToDepthWorkload, armnn::DataType::Float16>();
675 }
676 
677 BOOST_AUTO_TEST_CASE(CreateSpaceToDepthQAsymm8Workload)
678 {
679  NeonSpaceToDepthWorkloadTest<NeonSpaceToDepthWorkload, armnn::DataType::QAsymmU8>();
680 }
681 
682 BOOST_AUTO_TEST_CASE(CreateSpaceToDepthQSymm16Workload)
683 {
684  NeonSpaceToDepthWorkloadTest<NeonSpaceToDepthWorkload, armnn::DataType::QSymmS16>();
685 }
686 
687 BOOST_AUTO_TEST_CASE(CreateSplitterWorkload)
688 {
689  Graph graph;
690  NeonWorkloadFactory factory =
691  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
692 
693  auto workload = CreateSplitterWorkloadTest<NeonSplitterWorkload, DataType::Float32>(factory, graph);
694 
695  // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest).
696  SplitterQueueDescriptor queueDescriptor = workload->GetData();
697  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
698  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({5, 7, 7}, DataType::Float32)));
699 
700  auto outputHandle0 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
701  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle0, TensorInfo({1, 7, 7}, DataType::Float32)));
702 
703  auto outputHandle1 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
704  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle1, TensorInfo({2, 7, 7}, DataType::Float32)));
705 
706  auto outputHandle2 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[2]);
707  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle2, TensorInfo({2, 7, 7}, DataType::Float32)));
708 }
709 
710 BOOST_AUTO_TEST_CASE(CreateSplitterConcat)
711 {
712  // Tests that it is possible to decide which output of the splitter layer
713  // should be lined to which input of the concat layer.
714  // We tested that is is possible to specify 0th output
715  // of the splitter to be the 1st input to the concat, and the 1st output of the splitter to be 0th input
716  // of the concat.
717 
718  Graph graph;
719  NeonWorkloadFactory factory =
720  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
721 
722  auto workloads =
723  CreateSplitterConcatWorkloadTest<NeonSplitterWorkload, NeonConcatWorkload,
724  DataType::Float32>(factory, graph);
725 
726  auto wlSplitter = std::move(workloads.first);
727  auto wlConcat = std::move(workloads.second);
728 
729  //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction.
730  armnn::IAclTensorHandle* sOut0 = dynamic_cast<armnn::IAclTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
731  armnn::IAclTensorHandle* sOut1 = dynamic_cast<armnn::IAclTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
732  armnn::IAclTensorHandle* mIn0 = dynamic_cast<armnn::IAclTensorHandle*>(wlConcat->GetData().m_Inputs[0]);
733  armnn::IAclTensorHandle* mIn1 = dynamic_cast<armnn::IAclTensorHandle*>(wlConcat->GetData().m_Inputs[1]);
734 
735  BOOST_TEST(sOut0);
736  BOOST_TEST(sOut1);
737  BOOST_TEST(mIn0);
738  BOOST_TEST(mIn1);
739 
740  bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0);
741 
742  BOOST_TEST(validDataPointers);
743 }
744 
745 BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputs)
746 {
747  // Tests that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer.
748  // We created a splitter with two outputs. That each of those outputs is used by two different activation layers
749 
750  Graph graph;
751  NeonWorkloadFactory factory =
752  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
753 
754  std::unique_ptr<NeonSplitterWorkload> wlSplitter;
755  std::unique_ptr<NeonActivationWorkload> wlActiv0_0;
756  std::unique_ptr<NeonActivationWorkload> wlActiv0_1;
757  std::unique_ptr<NeonActivationWorkload> wlActiv1_0;
758  std::unique_ptr<NeonActivationWorkload> wlActiv1_1;
759 
760  CreateSplitterMultipleInputsOneOutputWorkloadTest<NeonSplitterWorkload,
761  NeonActivationWorkload, DataType::Float32>(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1,
762  wlActiv1_0, wlActiv1_1);
763 
764  armnn::IAclTensorHandle* sOut0 = dynamic_cast<armnn::IAclTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
765  armnn::IAclTensorHandle* sOut1 = dynamic_cast<armnn::IAclTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
766  armnn::IAclTensorHandle* activ0_0Im = dynamic_cast<armnn::IAclTensorHandle*>(wlActiv0_0->GetData().m_Inputs[0]);
767  armnn::IAclTensorHandle* activ0_1Im = dynamic_cast<armnn::IAclTensorHandle*>(wlActiv0_1->GetData().m_Inputs[0]);
768  armnn::IAclTensorHandle* activ1_0Im = dynamic_cast<armnn::IAclTensorHandle*>(wlActiv1_0->GetData().m_Inputs[0]);
769  armnn::IAclTensorHandle* activ1_1Im = dynamic_cast<armnn::IAclTensorHandle*>(wlActiv1_1->GetData().m_Inputs[0]);
770 
771 
772  BOOST_TEST(sOut0);
773  BOOST_TEST(sOut1);
774  BOOST_TEST(activ0_0Im);
775  BOOST_TEST(activ0_1Im);
776  BOOST_TEST(activ1_0Im);
777  BOOST_TEST(activ1_1Im);
778 
779  bool validDataPointers = (sOut0 == activ0_0Im) && (sOut0 == activ0_1Im) &&
780  (sOut1 == activ1_0Im) && (sOut1 == activ1_1Im);
781 
782  BOOST_TEST(validDataPointers);
783 }
784 
785 #if defined(ARMNNREF_ENABLED)
786 
787 // This test unit needs the reference backend, it's not available if the reference backend is not built
788 
789 BOOST_AUTO_TEST_CASE(CreateMemCopyWorkloadsNeon)
790 {
791  NeonWorkloadFactory factory =
792  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
793  CreateMemCopyWorkloads<IAclTensorHandle>(factory);
794 }
795 
796 #endif
797 
798 template <typename L2NormalizationWorkloadType, typename armnn::DataType DataType>
799 static void NeonCreateL2NormalizationWorkloadTest(DataLayout dataLayout)
800 {
801  Graph graph;
802  NeonWorkloadFactory factory =
803  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
804 
805  auto workload =
806  CreateL2NormalizationWorkloadTest<L2NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
807 
808  // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
809  L2NormalizationQueueDescriptor queueDescriptor = workload->GetData();
810  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
811  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
812 
813  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ?
814  TensorShape{ 5, 20, 50, 67 } : TensorShape{ 5, 50, 67, 20 };
815  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ?
816  TensorShape{ 5, 20, 50, 67 } : TensorShape{ 5, 50, 67, 20 };
817 
818  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
819  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
820 }
821 
822 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
823 BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat16NchwWorkload)
824 {
825  NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float16>(DataLayout::NCHW);
826 }
827 
828 BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat16NhwcWorkload)
829 {
830  NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float16>(DataLayout::NHWC);
831 }
832 #endif
833 
834 BOOST_AUTO_TEST_CASE(CreateL2NormalizationNchwWorkload)
835 {
836  NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float32>(DataLayout::NCHW);
837 }
838 
839 BOOST_AUTO_TEST_CASE(CreateL2NormalizationNhwcWorkload)
840 {
841  NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float32>(DataLayout::NHWC);
842 }
843 
844 template <typename LogSoftmaxWorkloadType, typename armnn::DataType DataType>
845 static void NeonCreateLogSoftmaxWorkloadTest()
846 {
847  Graph graph;
848  NeonWorkloadFactory factory =
849  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
850 
851  auto workload = CreateLogSoftmaxWorkloadTest<LogSoftmaxWorkloadType, DataType>(factory, graph);
852 
853  // Checks that outputs and inputs are as we expect them (see definition of CreateLogSoftmaxWorkloadTest).
854  LogSoftmaxQueueDescriptor queueDescriptor = workload->GetData();
855  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
856  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
857  armnn::TensorInfo tensorInfo({4, 1}, DataType);
858 
859  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, tensorInfo));
860  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, tensorInfo));
861 }
862 
863 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
864 BOOST_AUTO_TEST_CASE(CreateLogSoftmaxFloat16Workload)
865 {
866  NeonCreateLogSoftmaxWorkloadTest<NeonLogSoftmaxWorkload, DataType::Float16>();
867 }
868 #endif
869 
870 BOOST_AUTO_TEST_CASE(CreateLogSoftmaxFloatWorkload)
871 {
872  NeonCreateLogSoftmaxWorkloadTest<NeonLogSoftmaxWorkload, DataType::Float32>();
873 }
874 
875 template <typename LstmWorkloadType>
876 static void NeonCreateLstmWorkloadTest()
877 {
878  Graph graph;
879  NeonWorkloadFactory factory =
880  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
881 
882  auto workload = CreateLstmWorkloadTest<LstmWorkloadType>(factory, graph);
883 
884  LstmQueueDescriptor queueDescriptor = workload->GetData();
885 
886  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
887  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
888 
889  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({ 2, 2 }, DataType::Float32)));
890  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({ 2, 4 }, DataType::Float32)));
891 }
892 
893 BOOST_AUTO_TEST_CASE(CreateLSTMWorkloadFloatWorkload)
894 {
895  NeonCreateLstmWorkloadTest<NeonLstmFloatWorkload>();
896 }
897 
898 template <typename ConcatWorkloadType, armnn::DataType DataType>
899 static void NeonCreateConcatWorkloadTest(std::initializer_list<unsigned int> outputShape,
900  unsigned int concatAxis)
901 {
902  Graph graph;
903  NeonWorkloadFactory factory =
904  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
905 
906  auto workload = CreateConcatWorkloadTest<ConcatWorkloadType, DataType>(factory, graph, outputShape, concatAxis);
907 
908  ConcatQueueDescriptor queueDescriptor = workload->GetData();
909  auto inputHandle0 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
910  auto inputHandle1 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
911  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
912 
913  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle0, TensorInfo({ 2, 3, 2, 5 }, DataType)));
914  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle1, TensorInfo({ 2, 3, 2, 5 }, DataType)));
915  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
916 }
917 
918 BOOST_AUTO_TEST_CASE(CreateConcatDim0Float32Workload)
919 {
920  NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::Float32>({ 4, 3, 2, 5 }, 0);
921 }
922 
923 BOOST_AUTO_TEST_CASE(CreateConcatDim1Float32Workload)
924 {
925  NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::Float32>({ 2, 6, 2, 5 }, 1);
926 }
927 
928 BOOST_AUTO_TEST_CASE(CreateConcatDim3Float32Workload)
929 {
930  NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::Float32>({ 2, 3, 2, 10 }, 3);
931 }
932 
933 BOOST_AUTO_TEST_CASE(CreateConcatDim0Uint8Workload)
934 {
935  NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::QAsymmU8>({ 4, 3, 2, 5 }, 0);
936 }
937 
938 BOOST_AUTO_TEST_CASE(CreateConcatDim1Uint8Workload)
939 {
940  NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::QAsymmU8>({ 2, 6, 2, 5 }, 1);
941 }
942 
943 BOOST_AUTO_TEST_CASE(CreateConcatDim3Uint8Workload)
944 {
945  NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::QAsymmU8>({ 2, 3, 2, 10 }, 3);
946 }
947 
948 template <armnn::DataType DataType>
949 static void NeonCreateStackWorkloadTest(const std::initializer_list<unsigned int>& inputShape,
950  const std::initializer_list<unsigned int>& outputShape,
951  unsigned int axis,
952  unsigned int numInputs)
953 {
954  armnn::Graph graph;
955  NeonWorkloadFactory factory =
956  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
957 
958  auto workload = CreateStackWorkloadTest<NeonStackWorkload, DataType>(factory,
959  graph,
960  TensorShape(inputShape),
961  TensorShape(outputShape),
962  axis,
963  numInputs);
964 
965  // Check inputs and output are as expected
966  StackQueueDescriptor queueDescriptor = workload->GetData();
967  for (unsigned int i = 0; i < numInputs; ++i)
968  {
969  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[i]);
970  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
971  }
972  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
973  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
974 }
975 
976 BOOST_AUTO_TEST_CASE(CreateStackFloat32Workload)
977 {
978  NeonCreateStackWorkloadTest<armnn::DataType::Float32>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
979 }
980 
981 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
982 BOOST_AUTO_TEST_CASE(CreateStackFloat16Workload)
983 {
984  NeonCreateStackWorkloadTest<armnn::DataType::Float16>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
985 }
986 #endif
987 
988 BOOST_AUTO_TEST_CASE(CreateStackUint8Workload)
989 {
990  NeonCreateStackWorkloadTest<armnn::DataType::QAsymmU8>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
991 }
992 
993 template <typename QuantizedLstmWorkloadType>
994 static void NeonCreateQuantizedLstmWorkloadTest()
995 {
996  Graph graph;
997  NeonWorkloadFactory factory = NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
998 
999  auto workload = CreateQuantizedLstmWorkloadTest<QuantizedLstmWorkloadType>(factory, graph);
1000 
1001  QuantizedLstmQueueDescriptor queueDescriptor = workload->GetData();
1002 
1003  IAclTensorHandle* inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
1004  BOOST_TEST((inputHandle->GetShape() == TensorShape({2, 2})));
1005  BOOST_TEST((inputHandle->GetDataType() == arm_compute::DataType::QASYMM8));
1006 
1007  IAclTensorHandle* cellStateInHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
1008  BOOST_TEST((cellStateInHandle->GetShape() == TensorShape({2, 4})));
1009  BOOST_TEST((cellStateInHandle->GetDataType() == arm_compute::DataType::QSYMM16));
1010 
1011  IAclTensorHandle* outputStateInHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[2]);
1012  BOOST_TEST((outputStateInHandle->GetShape() == TensorShape({2, 4})));
1013  BOOST_TEST((outputStateInHandle->GetDataType() == arm_compute::DataType::QASYMM8));
1014 
1015  IAclTensorHandle* cellStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
1016  BOOST_TEST((cellStateOutHandle->GetShape() == TensorShape({2, 4})));
1017  BOOST_TEST((cellStateOutHandle->GetDataType() == arm_compute::DataType::QSYMM16));
1018 
1019  IAclTensorHandle* outputStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
1020  BOOST_TEST((outputStateOutHandle->GetShape() == TensorShape({2, 4})));
1021  BOOST_TEST((outputStateOutHandle->GetDataType() == arm_compute::DataType::QASYMM8));
1022 }
1023 
1024 BOOST_AUTO_TEST_CASE(CreateQuantizedLstmWorkload)
1025 {
1026  NeonCreateQuantizedLstmWorkloadTest<NeonQuantizedLstmWorkload>();
1027 }
1028 
1029 template <typename QLstmWorkloadType>
1030 static void NeonCreateQLstmWorkloadTest()
1031 {
1032  Graph graph;
1033  NeonWorkloadFactory factory = NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
1034 
1035  auto workload = CreateQLstmWorkloadTest<QLstmWorkloadType>(factory, graph);
1036  QLstmQueueDescriptor queueDescriptor = workload->GetData();
1037 
1038  IAclTensorHandle* inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
1039  BOOST_TEST((inputHandle->GetShape() == TensorShape({2, 4})));
1040  BOOST_TEST((inputHandle->GetDataType() == arm_compute::DataType::QASYMM8_SIGNED));
1041 
1042  IAclTensorHandle* cellStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
1043  BOOST_TEST((cellStateOutHandle->GetShape() == TensorShape({2, 4})));
1044  BOOST_TEST((cellStateOutHandle->GetDataType() == arm_compute::DataType::QSYMM16));
1045 
1046  IAclTensorHandle* outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[2]);
1047  BOOST_TEST((outputHandle->GetShape() == TensorShape({2, 4})));
1048  BOOST_TEST((outputHandle->GetDataType() == arm_compute::DataType::QASYMM8_SIGNED));
1049 }
1050 
1051 BOOST_AUTO_TEST_CASE(CreateQLstmWorkloadTest)
1052 {
1053  NeonCreateQLstmWorkloadTest<NeonQLstmWorkload>();
1054 }
1055 
BOOST_AUTO_TEST_SUITE(TensorflowLiteParser)
virtual arm_compute::ITensor & GetTensor()=0
DataLayout
Definition: Types.hpp:50
std::vector< BackendOptions > ModelOptions
void IgnoreUnused(Ts &&...)
virtual arm_compute::DataType GetDataType() const =0
DataType
Definition: Types.hpp:32
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
BOOST_AUTO_TEST_CASE(CheckConvolution2dLayer)
virtual TensorShape GetShape() const =0
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
This layer represents an addition operation.
Struct for the users to pass backend specific options.
BOOST_AUTO_TEST_SUITE_END()
This layer represents a subtraction operation.
std::vector< ITensorHandle * > m_Outputs
This layer represents a division operation.
void SetQuantizationOffset(int32_t offset)
Definition: Tensor.cpp:480
std::vector< ITensorHandle * > m_Inputs
This layer represents a multiplication operation.
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below...
Definition: Types.hpp:419