ArmNN
 20.08
NeonCreateWorkloadTests.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
7 
11 
13 
18 
19 BOOST_AUTO_TEST_SUITE(CreateWorkloadNeon)
20 
21 namespace
22 {
23 
24 boost::test_tools::predicate_result CompareIAclTensorHandleShape(IAclTensorHandle* tensorHandle,
25  std::initializer_list<unsigned int> expectedDimensions)
26 {
27  return CompareTensorHandleShape<IAclTensorHandle>(tensorHandle, expectedDimensions);
28 }
29 
30 bool TestNeonTensorHandleInfo(armnn::IAclTensorHandle* handle, const armnn::TensorInfo& expectedInfo)
31 {
32  using namespace armnn::armcomputetensorutils;
33 
34  const arm_compute::ITensorInfo* handleInfo = handle->GetTensor().info();
35  const arm_compute::TensorInfo expectedAclInfo = BuildArmComputeTensorInfo(expectedInfo);
36 
37  if (handleInfo->data_type() != expectedAclInfo.data_type())
38  {
39  return false;
40  }
41 
42  if (handleInfo->num_dimensions() != expectedAclInfo.num_dimensions())
43  {
44  return false;
45  }
46 
47  if (handleInfo->quantization_info() != expectedAclInfo.quantization_info())
48  {
49  return false;
50  }
51 
52  for (std::size_t d = 0; d < expectedAclInfo.num_dimensions(); ++d)
53  {
54  if (handleInfo->dimension(d) != expectedAclInfo.dimension(d))
55  {
56  return false;
57  }
58  }
59 
60  return true;
61 }
62 
63 } // namespace
64 
65 template <typename armnn::DataType DataType>
66 static void NeonCreateActivationWorkloadTest()
67 {
68  Graph graph;
69  NeonWorkloadFactory factory =
70  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
71 
72  auto workload = CreateActivationWorkloadTest<NeonActivationWorkload, DataType>(factory, graph);
73 
74  // Checks that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest).
75  ActivationQueueDescriptor queueDescriptor = workload->GetData();
76  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
77  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
78  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({1, 1}, DataType)));
79  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 1}, DataType)));
80 }
81 
82 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
83 BOOST_AUTO_TEST_CASE(CreateActivationFloat16Workload)
84 {
85  NeonCreateActivationWorkloadTest<DataType::Float16>();
86 }
87 #endif
88 
89 BOOST_AUTO_TEST_CASE(CreateActivationFloatWorkload)
90 {
91  NeonCreateActivationWorkloadTest<DataType::Float32>();
92 }
93 
94 template <typename WorkloadType,
95  typename DescriptorType,
96  typename LayerType,
98 static void NeonCreateElementwiseWorkloadTest()
99 {
100  Graph graph;
101  NeonWorkloadFactory factory =
102  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
103 
104  auto workload = CreateElementwiseWorkloadTest<WorkloadType, DescriptorType, LayerType, DataType>(factory, graph);
105 
106  DescriptorType queueDescriptor = workload->GetData();
107  auto inputHandle1 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
108  auto inputHandle2 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
109  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
110  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle1, TensorInfo({2, 3}, DataType)));
111  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle2, TensorInfo({2, 3}, DataType)));
112  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 3}, DataType)));
113 }
114 
115 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
116 BOOST_AUTO_TEST_CASE(CreateAdditionFloat16Workload)
117 {
118  NeonCreateElementwiseWorkloadTest<NeonAdditionWorkload,
122 }
123 #endif
124 
125 BOOST_AUTO_TEST_CASE(CreateAdditionFloatWorkload)
126 {
127  NeonCreateElementwiseWorkloadTest<NeonAdditionWorkload,
131 }
132 
133 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
134 BOOST_AUTO_TEST_CASE(CreateSubtractionFloat16Workload)
135 {
136  NeonCreateElementwiseWorkloadTest<NeonSubtractionWorkload,
140 }
141 #endif
142 
143 BOOST_AUTO_TEST_CASE(CreateSubtractionFloatWorkload)
144 {
145  NeonCreateElementwiseWorkloadTest<NeonSubtractionWorkload,
149 }
150 
151 BOOST_AUTO_TEST_CASE(CreateSubtractionUint8Workload)
152 {
153  NeonCreateElementwiseWorkloadTest<NeonSubtractionWorkload,
157 }
158 
159 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
160 BOOST_AUTO_TEST_CASE(CreateMultiplicationFloat16Workload)
161 {
162  NeonCreateElementwiseWorkloadTest<NeonMultiplicationWorkload,
166 }
167 #endif
168 
169 BOOST_AUTO_TEST_CASE(CreateMultiplicationFloatWorkload)
170 {
171  NeonCreateElementwiseWorkloadTest<NeonMultiplicationWorkload,
175 }
176 
177 BOOST_AUTO_TEST_CASE(CreateMultiplicationUint8Workload)
178 {
179  NeonCreateElementwiseWorkloadTest<NeonMultiplicationWorkload,
183 }
184 
185 BOOST_AUTO_TEST_CASE(CreateDivisionFloatWorkloadTest)
186 {
187  NeonCreateElementwiseWorkloadTest<NeonDivisionWorkload,
191 }
192 
193 template <typename BatchNormalizationWorkloadType, typename armnn::DataType DataType>
194 static void NeonCreateBatchNormalizationWorkloadTest(DataLayout dataLayout)
195 {
196  Graph graph;
197  NeonWorkloadFactory factory =
198  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
199 
200  auto workload = CreateBatchNormalizationWorkloadTest<BatchNormalizationWorkloadType, DataType>
201  (factory, graph, dataLayout);
202 
203  // Checks that outputs and inputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest).
204  BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData();
205  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
206  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
207 
208  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 4, 4} : TensorShape{2, 4, 4, 3};
209  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 4, 4} : TensorShape{2, 4, 4, 3};
210 
211  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
212  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
213 }
214 
215 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
216 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloat16NchwWorkload)
217 {
218  NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationWorkload, DataType::Float16>(DataLayout::NCHW);
219 }
220 
221 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloat16NhwcWorkload)
222 {
223  NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationWorkload, DataType::Float16>(DataLayout::NHWC);
224 }
225 #endif
226 
227 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloatNchwWorkload)
228 {
229  NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationWorkload, DataType::Float32>(DataLayout::NCHW);
230 }
231 
232 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloatNhwcWorkload)
233 {
234  NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationWorkload, DataType::Float32>(DataLayout::NHWC);
235 }
236 
237 template <typename armnn::DataType DataType>
238 static void NeonCreateConvolution2dWorkloadTest(DataLayout dataLayout = DataLayout::NCHW)
239 {
240  Graph graph;
241  NeonWorkloadFactory factory =
242  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
243 
244  auto workload = CreateConvolution2dWorkloadTest<NeonConvolution2dWorkload, DataType>(factory, graph, dataLayout);
245 
246  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 8, 16} : TensorShape{2, 8, 16, 3};
247  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 2, 2, 10} : TensorShape{2, 2, 10, 2};
248 
249  // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest).
250  Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
251  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
252  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
253  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
254  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
255 }
256 
257 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
258 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NchwWorkload)
259 {
260  NeonCreateConvolution2dWorkloadTest<DataType::Float16>();
261 }
262 
263 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NhwcWorkload)
264 {
265  NeonCreateConvolution2dWorkloadTest<DataType::Float16>(DataLayout::NHWC);
266 }
267 
268 #endif
269 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNchwWorkload)
270 {
271  NeonCreateConvolution2dWorkloadTest<DataType::Float32>();
272 }
273 
274 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNhwcWorkload)
275 {
276  NeonCreateConvolution2dWorkloadTest<DataType::Float32>(DataLayout::NHWC);
277 }
278 
279 template <typename armnn::DataType DataType>
280 static void NeonCreateDepthWiseConvolutionWorkloadTest(DataLayout dataLayout)
281 {
282  Graph graph;
283  NeonWorkloadFactory factory =
284  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
285 
286  auto workload = CreateDepthwiseConvolution2dWorkloadTest<NeonDepthwiseConvolutionWorkload,
287  DataType>(factory, graph, dataLayout);
288 
289  // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
290  DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData();
291  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
292  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
293 
294  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
295  : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
296  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
297  : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
298 
299  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
300  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
301 }
302 
303 BOOST_AUTO_TEST_CASE(CreateDepthWiseConvolution2dFloat32NhwcWorkload)
304 {
305  NeonCreateDepthWiseConvolutionWorkloadTest<DataType::Float32>(DataLayout::NHWC);
306 }
307 
308 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
309 BOOST_AUTO_TEST_CASE(CreateDepthWiseConvolution2dFloat16NhwcWorkload)
310 {
311  NeonCreateDepthWiseConvolutionWorkloadTest<DataType::Float16>(DataLayout::NHWC);
312 }
313 #endif
314 
315 template <typename FullyConnectedWorkloadType, typename armnn::DataType DataType>
316 static void NeonCreateFullyConnectedWorkloadTest()
317 {
318  Graph graph;
319  NeonWorkloadFactory factory =
320  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
321 
322  auto workload = CreateFullyConnectedWorkloadTest<FullyConnectedWorkloadType, DataType>(factory, graph);
323 
324  // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest).
325  FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
326  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
327  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
328 
329  // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest).
330  float inputsQScale = DataType == armnn::DataType::QAsymmU8 ? 1.0f : 0.0;
331  float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 0.0;
332  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({3, 1, 4, 5}, DataType, inputsQScale)));
333  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({3, 7}, DataType, outputQScale)));
334 }
335 
336 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
337 BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloat16Workload)
338 {
339  NeonCreateFullyConnectedWorkloadTest<NeonFullyConnectedWorkload, DataType::Float16>();
340 }
341 #endif
342 
343 BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloatWorkload)
344 {
345  NeonCreateFullyConnectedWorkloadTest<NeonFullyConnectedWorkload, DataType::Float32>();
346 }
347 
348 BOOST_AUTO_TEST_CASE(CreateFullyConnectedQAsymmU8Workload)
349 {
350  NeonCreateFullyConnectedWorkloadTest<NeonFullyConnectedWorkload, DataType::QAsymmU8>();
351 }
352 
353 BOOST_AUTO_TEST_CASE(CreateFullyConnectedQAsymmS8Workload)
354 {
355  NeonCreateFullyConnectedWorkloadTest<NeonFullyConnectedWorkload, DataType::QAsymmS8>();
356 }
357 
358 template <typename NormalizationWorkloadType, typename armnn::DataType DataType>
359 static void NeonCreateNormalizationWorkloadTest(DataLayout dataLayout)
360 {
361  Graph graph;
362  NeonWorkloadFactory factory =
363  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
364 
365  auto workload = CreateNormalizationWorkloadTest<NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
366 
367  // Checks that outputs and inputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
368  NormalizationQueueDescriptor queueDescriptor = workload->GetData();
369  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
370  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
371 
372  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 5, 5, 1} : TensorShape{3, 1, 5, 5};
373  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 5, 5, 1} : TensorShape{3, 1, 5, 5};
374 
375  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
376  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
377 }
378 
379 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
380 BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16NchwWorkload)
381 {
382  NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float16>(DataLayout::NCHW);
383 }
384 
385 BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16NhwcWorkload)
386 {
387  NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float16>(DataLayout::NHWC);
388 }
389 #endif
390 
391 BOOST_AUTO_TEST_CASE(CreateNormalizationFloatNchwWorkload)
392 {
393  NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float32>(DataLayout::NCHW);
394 }
395 
396 BOOST_AUTO_TEST_CASE(CreateNormalizationFloatNhwcWorkload)
397 {
398  NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float32>(DataLayout::NHWC);
399 }
400 
401 
402 template <typename armnn::DataType DataType>
403 static void NeonCreatePooling2dWorkloadTest(DataLayout dataLayout = DataLayout::NCHW)
404 {
405  Graph graph;
406  NeonWorkloadFactory factory =
407  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
408 
409  auto workload = CreatePooling2dWorkloadTest<NeonPooling2dWorkload, DataType>(factory, graph, dataLayout);
410 
411  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 2, 5, 5} : TensorShape{3, 5, 5, 2};
412  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 2, 2, 4} : TensorShape{3, 2, 4, 2};
413 
414  // Checks that outputs and inputs are as we expect them (see definition of CreatePooling2dWorkloadTest).
415  Pooling2dQueueDescriptor queueDescriptor = workload->GetData();
416  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
417  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
418  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
419  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
420 }
421 
422 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
423 BOOST_AUTO_TEST_CASE(CreatePooling2dFloat16Workload)
424 {
425  NeonCreatePooling2dWorkloadTest<DataType::Float16>();
426 }
427 #endif
428 
429 BOOST_AUTO_TEST_CASE(CreatePooling2dFloatNchwWorkload)
430 {
431  NeonCreatePooling2dWorkloadTest<DataType::Float32>(DataLayout::NCHW);
432 }
433 
434 BOOST_AUTO_TEST_CASE(CreatePooling2dFloatNhwcWorkload)
435 {
436  NeonCreatePooling2dWorkloadTest<DataType::Float32>(DataLayout::NHWC);
437 }
438 
439 BOOST_AUTO_TEST_CASE(CreatePooling2dUint8NchwWorkload)
440 {
441  NeonCreatePooling2dWorkloadTest<DataType::QAsymmU8>(DataLayout::NCHW);
442 }
443 
444 BOOST_AUTO_TEST_CASE(CreatePooling2dUint8NhwcWorkload)
445 {
446  NeonCreatePooling2dWorkloadTest<DataType::QAsymmU8>(DataLayout::NHWC);
447 }
448 
449 static void NeonCreatePreluWorkloadTest(const armnn::TensorShape& inputShape,
450  const armnn::TensorShape& alphaShape,
451  const armnn::TensorShape& outputShape,
452  armnn::DataType dataType)
453 {
454  Graph graph;
455  NeonWorkloadFactory factory =
456  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
457 
458  auto workload = CreatePreluWorkloadTest<NeonPreluWorkload>(factory,
459  graph,
460  inputShape,
461  alphaShape,
462  outputShape,
463  dataType);
464 
465  // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest).
466  PreluQueueDescriptor queueDescriptor = workload->GetData();
467  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
468  auto alphaHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
469  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
470  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, dataType)));
471  BOOST_TEST(TestNeonTensorHandleInfo(alphaHandle, TensorInfo(alphaShape, dataType)));
472  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, dataType)));
473 }
474 
475 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
476  BOOST_AUTO_TEST_CASE(CreatePreluFloat16Workload)
477 {
478  NeonCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float16);
479 }
480 #endif
481 
482 BOOST_AUTO_TEST_CASE(CreatePreluFloatWorkload)
483 {
484  NeonCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float32);
485 }
486 
487 BOOST_AUTO_TEST_CASE(CreatePreluUint8Workload)
488 {
489  NeonCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::QAsymmU8);
490 }
491 
492 template <typename armnn::DataType DataType>
493 static void NeonCreateReshapeWorkloadTest()
494 {
495  Graph graph;
496  NeonWorkloadFactory factory =
497  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
498 
499  auto workload = CreateReshapeWorkloadTest<NeonReshapeWorkload, DataType>(factory, graph);
500 
501  // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest).
502  ReshapeQueueDescriptor queueDescriptor = workload->GetData();
503  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
504  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
505  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({4, 1}, DataType)));
506  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 4}, DataType)));
507 }
508 
509 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
510 BOOST_AUTO_TEST_CASE(CreateReshapeFloat16Workload)
511 {
512  NeonCreateReshapeWorkloadTest<DataType::Float16>();
513 }
514 #endif
515 
516 BOOST_AUTO_TEST_CASE(CreateReshapeFloatWorkload)
517 {
518  NeonCreateReshapeWorkloadTest<DataType::Float32>();
519 }
520 
521 BOOST_AUTO_TEST_CASE(CreateReshapeUint8Workload)
522 {
523  NeonCreateReshapeWorkloadTest<DataType::QAsymmU8>();
524 }
525 
526 template <typename ResizeWorkloadType, armnn::DataType DataType>
527 static void NeonCreateResizeWorkloadTest(DataLayout dataLayout)
528 {
529  Graph graph;
530  NeonWorkloadFactory factory =
531  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
532  auto workload = CreateResizeBilinearWorkloadTest<ResizeWorkloadType, DataType>(factory, graph, dataLayout);
533 
534  auto queueDescriptor = workload->GetData();
535 
536  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
537  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
538 
539  switch (dataLayout)
540  {
541  case DataLayout::NHWC:
542  BOOST_TEST(CompareIAclTensorHandleShape(inputHandle, { 2, 4, 4, 3 }));
543  BOOST_TEST(CompareIAclTensorHandleShape(outputHandle, { 2, 2, 2, 3 }));
544  break;
545  case DataLayout::NCHW:
546  default:
547  BOOST_TEST(CompareIAclTensorHandleShape(inputHandle, { 2, 3, 4, 4 }));
548  BOOST_TEST(CompareIAclTensorHandleShape(outputHandle, { 2, 3, 2, 2 }));
549  }
550 }
551 
552 BOOST_AUTO_TEST_CASE(CreateResizeFloat32NchwWorkload)
553 {
554  NeonCreateResizeWorkloadTest<NeonResizeWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
555 }
556 
557 BOOST_AUTO_TEST_CASE(CreateResizeUint8NchwWorkload)
558 {
559  NeonCreateResizeWorkloadTest<NeonResizeWorkload, armnn::DataType::QAsymmU8>(DataLayout::NCHW);
560 }
561 
562 BOOST_AUTO_TEST_CASE(CreateResizeFloat32NhwcWorkload)
563 {
564  NeonCreateResizeWorkloadTest<NeonResizeWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
565 }
566 
567 BOOST_AUTO_TEST_CASE(CreateResizeUint8NhwcWorkload)
568 {
569  NeonCreateResizeWorkloadTest<NeonResizeWorkload, armnn::DataType::QAsymmU8>(DataLayout::NHWC);
570 }
571 
572 template <typename SoftmaxWorkloadType, typename armnn::DataType DataType>
573 static void NeonCreateSoftmaxWorkloadTest()
574 {
575  Graph graph;
576  NeonWorkloadFactory factory =
577  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
578 
579  auto workload = CreateSoftmaxWorkloadTest<SoftmaxWorkloadType, DataType>(factory, graph);
580 
581  // Checks that outputs and inputs are as we expect them (see definition of CreateSoftmaxWorkloadTest).
582  SoftmaxQueueDescriptor queueDescriptor = workload->GetData();
583  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
584  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
585  armnn::TensorInfo tensorInfo({4, 1}, DataType);
587  {
588  tensorInfo.SetQuantizationOffset(0);
589  tensorInfo.SetQuantizationScale(1.f / 256);
590  }
592  {
593  tensorInfo.SetQuantizationOffset(-128);
594  tensorInfo.SetQuantizationScale(1.f / 256);
595  }
596  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, tensorInfo));
597  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, tensorInfo));
598 }
599 
600 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
601 BOOST_AUTO_TEST_CASE(CreateSoftmaxFloat16Workload)
602 {
603  NeonCreateSoftmaxWorkloadTest<NeonSoftmaxWorkload, DataType::Float16>();
604 }
605 #endif
606 
607 BOOST_AUTO_TEST_CASE(CreateSoftmaxFloatWorkload)
608 {
609  NeonCreateSoftmaxWorkloadTest<NeonSoftmaxWorkload, DataType::Float32>();
610 }
611 
612 BOOST_AUTO_TEST_CASE(CreateSoftmaxQAsymmU8Workload)
613 {
614  NeonCreateSoftmaxWorkloadTest<NeonSoftmaxWorkload, DataType::QAsymmU8>();
615 }
616 
617 BOOST_AUTO_TEST_CASE(CreateSoftmaxQAsymmS8Workload)
618 {
619  NeonCreateSoftmaxWorkloadTest<NeonSoftmaxWorkload, DataType::QAsymmS8>();
620 }
621 
622 template <typename SpaceToDepthWorkloadType, typename armnn::DataType DataType>
623 static void NeonSpaceToDepthWorkloadTest()
624 {
625  Graph graph;
626  NeonWorkloadFactory factory =
627  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
628 
629  auto workload = CreateSpaceToDepthWorkloadTest<SpaceToDepthWorkloadType, DataType>(factory, graph);
630 
631  SpaceToDepthQueueDescriptor queueDescriptor = workload->GetData();
632  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
633  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
634 
635  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({ 1, 2, 2, 1 }, DataType)));
636  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({ 1, 1, 1, 4 }, DataType)));
637 }
638 
639 BOOST_AUTO_TEST_CASE(CreateSpaceToDepthFloat32Workload)
640 {
641  NeonSpaceToDepthWorkloadTest<NeonSpaceToDepthWorkload, armnn::DataType::Float32>();
642 }
643 
644 BOOST_AUTO_TEST_CASE(CreateSpaceToDepthFloat16Workload)
645 {
646  NeonSpaceToDepthWorkloadTest<NeonSpaceToDepthWorkload, armnn::DataType::Float16>();
647 }
648 
649 BOOST_AUTO_TEST_CASE(CreateSpaceToDepthQAsymm8Workload)
650 {
651  NeonSpaceToDepthWorkloadTest<NeonSpaceToDepthWorkload, armnn::DataType::QAsymmU8>();
652 }
653 
654 BOOST_AUTO_TEST_CASE(CreateSpaceToDepthQSymm16Workload)
655 {
656  NeonSpaceToDepthWorkloadTest<NeonSpaceToDepthWorkload, armnn::DataType::QSymmS16>();
657 }
658 
659 BOOST_AUTO_TEST_CASE(CreateSplitterWorkload)
660 {
661  Graph graph;
662  NeonWorkloadFactory factory =
663  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
664 
665  auto workload = CreateSplitterWorkloadTest<NeonSplitterWorkload, DataType::Float32>(factory, graph);
666 
667  // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest).
668  SplitterQueueDescriptor queueDescriptor = workload->GetData();
669  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
670  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({5, 7, 7}, DataType::Float32)));
671 
672  auto outputHandle0 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
673  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle0, TensorInfo({1, 7, 7}, DataType::Float32)));
674 
675  auto outputHandle1 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
676  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle1, TensorInfo({2, 7, 7}, DataType::Float32)));
677 
678  auto outputHandle2 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[2]);
679  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle2, TensorInfo({2, 7, 7}, DataType::Float32)));
680 }
681 
682 BOOST_AUTO_TEST_CASE(CreateSplitterConcat)
683 {
684  // Tests that it is possible to decide which output of the splitter layer
685  // should be lined to which input of the concat layer.
686  // We tested that is is possible to specify 0th output
687  // of the splitter to be the 1st input to the concat, and the 1st output of the splitter to be 0th input
688  // of the concat.
689 
690  Graph graph;
691  NeonWorkloadFactory factory =
692  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
693 
694  auto workloads =
695  CreateSplitterConcatWorkloadTest<NeonSplitterWorkload, NeonConcatWorkload,
696  DataType::Float32>(factory, graph);
697 
698  auto wlSplitter = std::move(workloads.first);
699  auto wlConcat = std::move(workloads.second);
700 
701  //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction.
702  armnn::IAclTensorHandle* sOut0 = dynamic_cast<armnn::IAclTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
703  armnn::IAclTensorHandle* sOut1 = dynamic_cast<armnn::IAclTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
704  armnn::IAclTensorHandle* mIn0 = dynamic_cast<armnn::IAclTensorHandle*>(wlConcat->GetData().m_Inputs[0]);
705  armnn::IAclTensorHandle* mIn1 = dynamic_cast<armnn::IAclTensorHandle*>(wlConcat->GetData().m_Inputs[1]);
706 
707  BOOST_TEST(sOut0);
708  BOOST_TEST(sOut1);
709  BOOST_TEST(mIn0);
710  BOOST_TEST(mIn1);
711 
712  bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0);
713 
714  BOOST_TEST(validDataPointers);
715 }
716 
717 BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputs)
718 {
719  // Tests that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer.
720  // We created a splitter with two outputs. That each of those outputs is used by two different activation layers
721 
722  Graph graph;
723  NeonWorkloadFactory factory =
724  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
725 
726  std::unique_ptr<NeonSplitterWorkload> wlSplitter;
727  std::unique_ptr<NeonActivationWorkload> wlActiv0_0;
728  std::unique_ptr<NeonActivationWorkload> wlActiv0_1;
729  std::unique_ptr<NeonActivationWorkload> wlActiv1_0;
730  std::unique_ptr<NeonActivationWorkload> wlActiv1_1;
731 
732  CreateSplitterMultipleInputsOneOutputWorkloadTest<NeonSplitterWorkload,
733  NeonActivationWorkload, DataType::Float32>(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1,
734  wlActiv1_0, wlActiv1_1);
735 
736  armnn::IAclTensorHandle* sOut0 = dynamic_cast<armnn::IAclTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
737  armnn::IAclTensorHandle* sOut1 = dynamic_cast<armnn::IAclTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
738  armnn::IAclTensorHandle* activ0_0Im = dynamic_cast<armnn::IAclTensorHandle*>(wlActiv0_0->GetData().m_Inputs[0]);
739  armnn::IAclTensorHandle* activ0_1Im = dynamic_cast<armnn::IAclTensorHandle*>(wlActiv0_1->GetData().m_Inputs[0]);
740  armnn::IAclTensorHandle* activ1_0Im = dynamic_cast<armnn::IAclTensorHandle*>(wlActiv1_0->GetData().m_Inputs[0]);
741  armnn::IAclTensorHandle* activ1_1Im = dynamic_cast<armnn::IAclTensorHandle*>(wlActiv1_1->GetData().m_Inputs[0]);
742 
743 
744  BOOST_TEST(sOut0);
745  BOOST_TEST(sOut1);
746  BOOST_TEST(activ0_0Im);
747  BOOST_TEST(activ0_1Im);
748  BOOST_TEST(activ1_0Im);
749  BOOST_TEST(activ1_1Im);
750 
751  bool validDataPointers = (sOut0 == activ0_0Im) && (sOut0 == activ0_1Im) &&
752  (sOut1 == activ1_0Im) && (sOut1 == activ1_1Im);
753 
754  BOOST_TEST(validDataPointers);
755 }
756 
757 #if defined(ARMNNREF_ENABLED)
758 
759 // This test unit needs the reference backend, it's not available if the reference backend is not built
760 
761 BOOST_AUTO_TEST_CASE(CreateMemCopyWorkloadsNeon)
762 {
763  NeonWorkloadFactory factory =
764  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
765  CreateMemCopyWorkloads<IAclTensorHandle>(factory);
766 }
767 
768 #endif
769 
770 template <typename L2NormalizationWorkloadType, typename armnn::DataType DataType>
771 static void NeonCreateL2NormalizationWorkloadTest(DataLayout dataLayout)
772 {
773  Graph graph;
774  NeonWorkloadFactory factory =
775  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
776 
777  auto workload =
778  CreateL2NormalizationWorkloadTest<L2NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
779 
780  // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
781  L2NormalizationQueueDescriptor queueDescriptor = workload->GetData();
782  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
783  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
784 
785  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ?
786  TensorShape{ 5, 20, 50, 67 } : TensorShape{ 5, 50, 67, 20 };
787  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ?
788  TensorShape{ 5, 20, 50, 67 } : TensorShape{ 5, 50, 67, 20 };
789 
790  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
791  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
792 }
793 
794 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
795 BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat16NchwWorkload)
796 {
797  NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float16>(DataLayout::NCHW);
798 }
799 
800 BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat16NhwcWorkload)
801 {
802  NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float16>(DataLayout::NHWC);
803 }
804 #endif
805 
806 BOOST_AUTO_TEST_CASE(CreateL2NormalizationNchwWorkload)
807 {
808  NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float32>(DataLayout::NCHW);
809 }
810 
811 BOOST_AUTO_TEST_CASE(CreateL2NormalizationNhwcWorkload)
812 {
813  NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float32>(DataLayout::NHWC);
814 }
815 
816 template <typename LogSoftmaxWorkloadType, typename armnn::DataType DataType>
817 static void NeonCreateLogSoftmaxWorkloadTest()
818 {
819  Graph graph;
820  NeonWorkloadFactory factory =
821  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
822 
823  auto workload = CreateLogSoftmaxWorkloadTest<LogSoftmaxWorkloadType, DataType>(factory, graph);
824 
825  // Checks that outputs and inputs are as we expect them (see definition of CreateLogSoftmaxWorkloadTest).
826  LogSoftmaxQueueDescriptor queueDescriptor = workload->GetData();
827  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
828  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
829  armnn::TensorInfo tensorInfo({4, 1}, DataType);
830 
831  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, tensorInfo));
832  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, tensorInfo));
833 }
834 
835 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
836 BOOST_AUTO_TEST_CASE(CreateLogSoftmaxFloat16Workload)
837 {
838  NeonCreateLogSoftmaxWorkloadTest<NeonLogSoftmaxWorkload, DataType::Float16>();
839 }
840 #endif
841 
842 BOOST_AUTO_TEST_CASE(CreateLogSoftmaxFloatWorkload)
843 {
844  NeonCreateLogSoftmaxWorkloadTest<NeonLogSoftmaxWorkload, DataType::Float32>();
845 }
846 
847 template <typename LstmWorkloadType>
848 static void NeonCreateLstmWorkloadTest()
849 {
850  Graph graph;
851  NeonWorkloadFactory factory =
852  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
853 
854  auto workload = CreateLstmWorkloadTest<LstmWorkloadType>(factory, graph);
855 
856  LstmQueueDescriptor queueDescriptor = workload->GetData();
857 
858  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
859  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
860 
861  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({ 2, 2 }, DataType::Float32)));
862  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({ 2, 4 }, DataType::Float32)));
863 }
864 
865 BOOST_AUTO_TEST_CASE(CreateLSTMWorkloadFloatWorkload)
866 {
867  NeonCreateLstmWorkloadTest<NeonLstmFloatWorkload>();
868 }
869 
870 template <typename ConcatWorkloadType, armnn::DataType DataType>
871 static void NeonCreateConcatWorkloadTest(std::initializer_list<unsigned int> outputShape,
872  unsigned int concatAxis)
873 {
874  Graph graph;
875  NeonWorkloadFactory factory =
876  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
877 
878  auto workload = CreateConcatWorkloadTest<ConcatWorkloadType, DataType>(factory, graph, outputShape, concatAxis);
879 
880  ConcatQueueDescriptor queueDescriptor = workload->GetData();
881  auto inputHandle0 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
882  auto inputHandle1 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
883  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
884 
885  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle0, TensorInfo({ 2, 3, 2, 5 }, DataType)));
886  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle1, TensorInfo({ 2, 3, 2, 5 }, DataType)));
887  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
888 }
889 
890 BOOST_AUTO_TEST_CASE(CreateConcatDim0Float32Workload)
891 {
892  NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::Float32>({ 4, 3, 2, 5 }, 0);
893 }
894 
895 BOOST_AUTO_TEST_CASE(CreateConcatDim1Float32Workload)
896 {
897  NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::Float32>({ 2, 6, 2, 5 }, 1);
898 }
899 
900 BOOST_AUTO_TEST_CASE(CreateConcatDim3Float32Workload)
901 {
902  NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::Float32>({ 2, 3, 2, 10 }, 3);
903 }
904 
905 BOOST_AUTO_TEST_CASE(CreateConcatDim0Uint8Workload)
906 {
907  NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::QAsymmU8>({ 4, 3, 2, 5 }, 0);
908 }
909 
910 BOOST_AUTO_TEST_CASE(CreateConcatDim1Uint8Workload)
911 {
912  NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::QAsymmU8>({ 2, 6, 2, 5 }, 1);
913 }
914 
915 BOOST_AUTO_TEST_CASE(CreateConcatDim3Uint8Workload)
916 {
917  NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::QAsymmU8>({ 2, 3, 2, 10 }, 3);
918 }
919 
920 template <armnn::DataType DataType>
921 static void NeonCreateStackWorkloadTest(const std::initializer_list<unsigned int>& inputShape,
922  const std::initializer_list<unsigned int>& outputShape,
923  unsigned int axis,
924  unsigned int numInputs)
925 {
926  armnn::Graph graph;
927  NeonWorkloadFactory factory =
928  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
929 
930  auto workload = CreateStackWorkloadTest<NeonStackWorkload, DataType>(factory,
931  graph,
932  TensorShape(inputShape),
933  TensorShape(outputShape),
934  axis,
935  numInputs);
936 
937  // Check inputs and output are as expected
938  StackQueueDescriptor queueDescriptor = workload->GetData();
939  for (unsigned int i = 0; i < numInputs; ++i)
940  {
941  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[i]);
942  BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
943  }
944  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
945  BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
946 }
947 
948 BOOST_AUTO_TEST_CASE(CreateStackFloat32Workload)
949 {
950  NeonCreateStackWorkloadTest<armnn::DataType::Float32>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
951 }
952 
953 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
954 BOOST_AUTO_TEST_CASE(CreateStackFloat16Workload)
955 {
956  NeonCreateStackWorkloadTest<armnn::DataType::Float16>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
957 }
958 #endif
959 
960 BOOST_AUTO_TEST_CASE(CreateStackUint8Workload)
961 {
962  NeonCreateStackWorkloadTest<armnn::DataType::QAsymmU8>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
963 }
964 
965 template <typename QuantizedLstmWorkloadType>
966 static void NeonCreateQuantizedLstmWorkloadTest()
967 {
968  Graph graph;
969  NeonWorkloadFactory factory = NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
970 
971  auto workload = CreateQuantizedLstmWorkloadTest<QuantizedLstmWorkloadType>(factory, graph);
972 
973  QuantizedLstmQueueDescriptor queueDescriptor = workload->GetData();
974 
975  IAclTensorHandle* inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
976  BOOST_TEST((inputHandle->GetShape() == TensorShape({2, 2})));
977  BOOST_TEST((inputHandle->GetDataType() == arm_compute::DataType::QASYMM8));
978 
979  IAclTensorHandle* cellStateInHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
980  BOOST_TEST((cellStateInHandle->GetShape() == TensorShape({2, 4})));
981  BOOST_TEST((cellStateInHandle->GetDataType() == arm_compute::DataType::QSYMM16));
982 
983  IAclTensorHandle* outputStateInHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[2]);
984  BOOST_TEST((outputStateInHandle->GetShape() == TensorShape({2, 4})));
985  BOOST_TEST((outputStateInHandle->GetDataType() == arm_compute::DataType::QASYMM8));
986 
987  IAclTensorHandle* cellStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
988  BOOST_TEST((cellStateOutHandle->GetShape() == TensorShape({2, 4})));
989  BOOST_TEST((cellStateOutHandle->GetDataType() == arm_compute::DataType::QSYMM16));
990 
991  IAclTensorHandle* outputStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
992  BOOST_TEST((outputStateOutHandle->GetShape() == TensorShape({2, 4})));
993  BOOST_TEST((outputStateOutHandle->GetDataType() == arm_compute::DataType::QASYMM8));
994 }
995 
996 BOOST_AUTO_TEST_CASE(CreateQuantizedLstmWorkload)
997 {
998  NeonCreateQuantizedLstmWorkloadTest<NeonQuantizedLstmWorkload>();
999 }
1000 
1001 template <typename QLstmWorkloadType>
1002 static void NeonCreateQLstmWorkloadTest()
1003 {
1004  Graph graph;
1005  NeonWorkloadFactory factory = NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
1006 
1007  auto workload = CreateQLstmWorkloadTest<QLstmWorkloadType>(factory, graph);
1008  QLstmQueueDescriptor queueDescriptor = workload->GetData();
1009 
1010  IAclTensorHandle* inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
1011  BOOST_TEST((inputHandle->GetShape() == TensorShape({2, 4})));
1012  BOOST_TEST((inputHandle->GetDataType() == arm_compute::DataType::QASYMM8_SIGNED));
1013 
1014  IAclTensorHandle* cellStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
1015  BOOST_TEST((cellStateOutHandle->GetShape() == TensorShape({2, 4})));
1016  BOOST_TEST((cellStateOutHandle->GetDataType() == arm_compute::DataType::QSYMM16));
1017 
1018  IAclTensorHandle* outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[2]);
1019  BOOST_TEST((outputHandle->GetShape() == TensorShape({2, 4})));
1020  BOOST_TEST((outputHandle->GetDataType() == arm_compute::DataType::QASYMM8_SIGNED));
1021 }
1022 
1023 BOOST_AUTO_TEST_CASE(CreateQLstmWorkloadTest)
1024 {
1025  NeonCreateQLstmWorkloadTest<NeonQLstmWorkload>();
1026 }
1027 
BOOST_AUTO_TEST_SUITE(TensorflowLiteParser)
virtual arm_compute::ITensor & GetTensor()=0
DataLayout
Definition: Types.hpp:49
virtual arm_compute::DataType GetDataType() const =0
DataType
Definition: Types.hpp:32
BOOST_AUTO_TEST_CASE(CheckConvolution2dLayer)
virtual TensorShape GetShape() const =0
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
This layer represents an addition operation.
BOOST_AUTO_TEST_SUITE_END()
This layer represents a subtraction operation.
std::vector< ITensorHandle * > m_Outputs
This layer represents a division operation.
void SetQuantizationOffset(int32_t offset)
Definition: Tensor.cpp:481
std::vector< ITensorHandle * > m_Inputs
This layer represents a multiplication operation.