ArmNN
 22.05.01
NeonCreateWorkloadTests.cpp File Reference

Go to the source code of this file.

Functions

 TEST_SUITE ("CreateWorkloadNeon")
 

Function Documentation

◆ TEST_SUITE()

TEST_SUITE ( "CreateWorkloadNeon"  )

Definition at line 23 of file NeonCreateWorkloadTests.cpp.

References IAclTensorHandle::GetTensor().

24 {
25 namespace
26 {
27 
28 armnn::PredicateResult CompareIAclTensorHandleShape(IAclTensorHandle* tensorHandle,
29  std::initializer_list<unsigned int> expectedDimensions)
30 {
31  return CompareTensorHandleShape<IAclTensorHandle>(tensorHandle, expectedDimensions);
32 }
33 
34 bool TestNeonTensorHandleInfo(armnn::IAclTensorHandle* handle, const armnn::TensorInfo& expectedInfo)
35 {
36  using namespace armnn::armcomputetensorutils;
37 
38  const arm_compute::ITensorInfo* handleInfo = handle->GetTensor().info();
39  const arm_compute::TensorInfo expectedAclInfo = BuildArmComputeTensorInfo(expectedInfo);
40 
41  if (handleInfo->data_type() != expectedAclInfo.data_type())
42  {
43  return false;
44  }
45 
46  if (handleInfo->num_dimensions() != expectedAclInfo.num_dimensions())
47  {
48  return false;
49  }
50 
51  if (handleInfo->quantization_info() != expectedAclInfo.quantization_info())
52  {
53  return false;
54  }
55 
56  for (std::size_t d = 0; d < expectedAclInfo.num_dimensions(); ++d)
57  {
58  if (handleInfo->dimension(d) != expectedAclInfo.dimension(d))
59  {
60  return false;
61  }
62  }
63 
64  return true;
65 }
66 
67 } // namespace
68 
69 template <typename armnn::DataType DataType>
70 static void NeonCreateActivationWorkloadTest()
71 {
72  Graph graph;
73  NeonWorkloadFactory factory =
74  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
75 
76  auto workload = CreateActivationWorkloadTest<NeonActivationWorkload, DataType>(factory, graph);
77 
78  // Checks that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest).
79  ActivationQueueDescriptor queueDescriptor = workload->GetData();
80  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
81  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
82  CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo({1, 1}, DataType)));
83  CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 1}, DataType)));
84 }
85 
86 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
87 TEST_CASE("CreateActivationFloat16Workload")
88 {
89  NeonCreateActivationWorkloadTest<DataType::Float16>();
90 }
91 #endif
92 
93 TEST_CASE("CreateActivationFloatWorkload")
94 {
95  NeonCreateActivationWorkloadTest<DataType::Float32>();
96 }
97 
98 template <typename WorkloadType,
99  typename DescriptorType,
100  typename LayerType,
102 static void NeonCreateElementwiseWorkloadTest()
103 {
104  Graph graph;
105  NeonWorkloadFactory factory =
106  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
107 
108  auto workload = CreateElementwiseWorkloadTest<WorkloadType, DescriptorType, LayerType, DataType>(factory, graph);
109 
110  DescriptorType queueDescriptor = workload->GetData();
111  auto inputHandle1 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
112  auto inputHandle2 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
113  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
114  CHECK(TestNeonTensorHandleInfo(inputHandle1, TensorInfo({2, 3}, DataType)));
115  CHECK(TestNeonTensorHandleInfo(inputHandle2, TensorInfo({2, 3}, DataType)));
116  CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 3}, DataType)));
117 }
118 
119 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
120 TEST_CASE("CreateAdditionFloat16Workload")
121 {
122  NeonCreateElementwiseWorkloadTest<NeonAdditionWorkload,
126 }
127 #endif
128 
129 TEST_CASE("CreateAdditionFloatWorkload")
130 {
131  NeonCreateElementwiseWorkloadTest<NeonAdditionWorkload,
135 }
136 
137 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
138 TEST_CASE("CreateSubtractionFloat16Workload")
139 {
140  NeonCreateElementwiseWorkloadTest<NeonSubtractionWorkload,
144 }
145 #endif
146 
147 TEST_CASE("CreateSubtractionFloatWorkload")
148 {
149  NeonCreateElementwiseWorkloadTest<NeonSubtractionWorkload,
153 }
154 
155 TEST_CASE("CreateSubtractionUint8Workload")
156 {
157  NeonCreateElementwiseWorkloadTest<NeonSubtractionWorkload,
161 }
162 
163 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
164 TEST_CASE("CreateMultiplicationFloat16Workload")
165 {
166  NeonCreateElementwiseWorkloadTest<NeonMultiplicationWorkload,
170 }
171 #endif
172 
173 TEST_CASE("CreateMultiplicationFloatWorkload")
174 {
175  NeonCreateElementwiseWorkloadTest<NeonMultiplicationWorkload,
179 }
180 
181 TEST_CASE("CreateMultiplicationUint8Workload")
182 {
183  NeonCreateElementwiseWorkloadTest<NeonMultiplicationWorkload,
187 }
188 
189 TEST_CASE("CreateDivisionFloatWorkloadTest")
190 {
191  NeonCreateElementwiseWorkloadTest<NeonDivisionWorkload,
195 }
196 
197 template <typename BatchNormalizationWorkloadType, typename armnn::DataType DataType>
198 static void NeonCreateBatchNormalizationWorkloadTest(DataLayout dataLayout)
199 {
200  Graph graph;
201  NeonWorkloadFactory factory =
202  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
203 
204  auto workload = CreateBatchNormalizationWorkloadTest<BatchNormalizationWorkloadType, DataType>
205  (factory, graph, dataLayout);
206 
207  // Checks that outputs and inputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest).
208  BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData();
209  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
210  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
211 
212  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 4, 4} : TensorShape{2, 4, 4, 3};
213  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 4, 4} : TensorShape{2, 4, 4, 3};
214 
215  CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
216  CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
217 }
218 
219 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
220 TEST_CASE("CreateBatchNormalizationFloat16NchwWorkload")
221 {
222  NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationWorkload, DataType::Float16>(DataLayout::NCHW);
223 }
224 
225 TEST_CASE("CreateBatchNormalizationFloat16NhwcWorkload")
226 {
227  NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationWorkload, DataType::Float16>(DataLayout::NHWC);
228 }
229 #endif
230 
231 TEST_CASE("CreateBatchNormalizationFloatNchwWorkload")
232 {
233  NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationWorkload, DataType::Float32>(DataLayout::NCHW);
234 }
235 
236 TEST_CASE("CreateBatchNormalizationFloatNhwcWorkload")
237 {
238  NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationWorkload, DataType::Float32>(DataLayout::NHWC);
239 }
240 
241 template <typename armnn::DataType DataType>
242 static void NeonCreateConvolution2dWorkloadTest(DataLayout dataLayout = DataLayout::NCHW)
243 {
244  Graph graph;
245  NeonWorkloadFactory factory =
246  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
247 
248  auto workload = CreateConvolution2dWorkloadTest<NeonConvolution2dWorkload, DataType>(factory, graph, dataLayout);
249 
250  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 8, 16} : TensorShape{2, 8, 16, 3};
251  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 2, 2, 10} : TensorShape{2, 2, 10, 2};
252 
253  // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest).
254  Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
255  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
256  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
257  CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
258  CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
259 }
260 
261 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
262 TEST_CASE("CreateConvolution2dFloat16NchwWorkload")
263 {
264  NeonCreateConvolution2dWorkloadTest<DataType::Float16>();
265 }
266 
267 TEST_CASE("CreateConvolution2dFloat16NhwcWorkload")
268 {
269  NeonCreateConvolution2dWorkloadTest<DataType::Float16>(DataLayout::NHWC);
270 }
271 
272 #endif
273 TEST_CASE("CreateConvolution2dFloatNchwWorkload")
274 {
275  NeonCreateConvolution2dWorkloadTest<DataType::Float32>();
276 }
277 
278 TEST_CASE("CreateConvolution2dFloatNhwcWorkload")
279 {
280  NeonCreateConvolution2dWorkloadTest<DataType::Float32>(DataLayout::NHWC);
281 }
282 
283 TEST_CASE("CreateConvolution2dFastMathEnabledWorkload")
284 {
285  Graph graph;
286  using ModelOptions = std::vector<BackendOptions>;
287  ModelOptions modelOptions = {};
288  BackendOptions cpuAcc("CpuAcc",
289  {
290  { "FastMathEnabled", true }
291  });
292  modelOptions.push_back(cpuAcc);
293  NeonWorkloadFactory factory =
294  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager(), modelOptions);
295 
296  auto workload =
297  CreateConvolution2dWorkloadFastMathTest<NeonConvolution2dWorkload, armnn::DataType::Float32>(factory,
298  graph,
300  modelOptions);
301 
302  ARMNN_ASSERT(workload != nullptr);
303  auto conv2dWorkload = PolymorphicDowncast<NeonConvolution2dWorkload*>(workload.get());
304  IgnoreUnused(conv2dWorkload);
305  ARMNN_ASSERT(conv2dWorkload != nullptr);
306  ARMNN_ASSERT(conv2dWorkload->GetConvolutionMethod() == arm_compute::ConvolutionMethod::WINOGRAD);
307 }
308 
309 template <typename armnn::DataType DataType>
310 static void NeonCreateDepthWiseConvolutionWorkloadTest(DataLayout dataLayout)
311 {
312  Graph graph;
313  NeonWorkloadFactory factory =
314  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
315 
316  auto workload = CreateDepthwiseConvolution2dWorkloadTest<NeonDepthwiseConvolutionWorkload,
317  DataType>(factory, graph, dataLayout);
318 
319  // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
320  DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData();
321  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
322  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
323 
324  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
325  : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
326  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
327  : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
328 
329  CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
330  CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
331 }
332 
333 TEST_CASE("CreateDepthWiseConvolution2dFloat32NhwcWorkload")
334 {
335  NeonCreateDepthWiseConvolutionWorkloadTest<DataType::Float32>(DataLayout::NHWC);
336 }
337 
338 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
339 TEST_CASE("CreateDepthWiseConvolution2dFloat16NhwcWorkload")
340 {
341  NeonCreateDepthWiseConvolutionWorkloadTest<DataType::Float16>(DataLayout::NHWC);
342 }
343 #endif
344 
345 template <typename FullyConnectedWorkloadType, typename armnn::DataType DataType>
346 static void NeonCreateFullyConnectedWorkloadTest()
347 {
348  Graph graph;
349  NeonWorkloadFactory factory =
350  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
351 
352  auto workload = CreateFullyConnectedWorkloadTest<FullyConnectedWorkloadType, DataType>(factory, graph);
353 
354  // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest).
355  FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
356  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
357  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
358 
359  // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest).
360  float inputsQScale = DataType == armnn::DataType::QAsymmU8 ? 1.0f : 0.0;
361  float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 0.0;
362  CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo({3, 1, 4, 5}, DataType, inputsQScale)));
363  CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo({3, 7}, DataType, outputQScale)));
364 }
365 
366 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
367 TEST_CASE("CreateFullyConnectedFloat16Workload")
368 {
369  NeonCreateFullyConnectedWorkloadTest<NeonFullyConnectedWorkload, DataType::Float16>();
370 }
371 #endif
372 
373 TEST_CASE("CreateFullyConnectedFloatWorkload")
374 {
375  NeonCreateFullyConnectedWorkloadTest<NeonFullyConnectedWorkload, DataType::Float32>();
376 }
377 
378 TEST_CASE("CreateFullyConnectedQAsymmU8Workload")
379 {
380  NeonCreateFullyConnectedWorkloadTest<NeonFullyConnectedWorkload, DataType::QAsymmU8>();
381 }
382 
383 TEST_CASE("CreateFullyConnectedQAsymmS8Workload")
384 {
385  NeonCreateFullyConnectedWorkloadTest<NeonFullyConnectedWorkload, DataType::QAsymmS8>();
386 }
387 
388 template <typename NormalizationWorkloadType, typename armnn::DataType DataType>
389 static void NeonCreateNormalizationWorkloadTest(DataLayout dataLayout)
390 {
391  Graph graph;
392  NeonWorkloadFactory factory =
393  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
394 
395  auto workload = CreateNormalizationWorkloadTest<NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
396 
397  // Checks that outputs and inputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
398  NormalizationQueueDescriptor queueDescriptor = workload->GetData();
399  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
400  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
401 
402  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 5, 5, 1} : TensorShape{3, 1, 5, 5};
403  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 5, 5, 1} : TensorShape{3, 1, 5, 5};
404 
405  CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
406  CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
407 }
408 
409 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
410 TEST_CASE("CreateNormalizationFloat16NchwWorkload")
411 {
412  NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float16>(DataLayout::NCHW);
413 }
414 
415 TEST_CASE("CreateNormalizationFloat16NhwcWorkload")
416 {
417  NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float16>(DataLayout::NHWC);
418 }
419 #endif
420 
421 TEST_CASE("CreateNormalizationFloatNchwWorkload")
422 {
423  NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float32>(DataLayout::NCHW);
424 }
425 
426 TEST_CASE("CreateNormalizationFloatNhwcWorkload")
427 {
428  NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float32>(DataLayout::NHWC);
429 }
430 
431 
432 template <typename armnn::DataType DataType>
433 static void NeonCreatePooling2dWorkloadTest(DataLayout dataLayout = DataLayout::NCHW)
434 {
435  Graph graph;
436  NeonWorkloadFactory factory =
437  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
438 
439  auto workload = CreatePooling2dWorkloadTest<NeonPooling2dWorkload, DataType>(factory, graph, dataLayout);
440 
441  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 2, 5, 5} : TensorShape{3, 5, 5, 2};
442  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 2, 2, 4} : TensorShape{3, 2, 4, 2};
443 
444  // Checks that outputs and inputs are as we expect them (see definition of CreatePooling2dWorkloadTest).
445  Pooling2dQueueDescriptor queueDescriptor = workload->GetData();
446  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
447  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
448  CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
449  CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
450 }
451 
452 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
453 TEST_CASE("CreatePooling2dFloat16Workload")
454 {
455  NeonCreatePooling2dWorkloadTest<DataType::Float16>();
456 }
457 #endif
458 
459 TEST_CASE("CreatePooling2dFloatNchwWorkload")
460 {
461  NeonCreatePooling2dWorkloadTest<DataType::Float32>(DataLayout::NCHW);
462 }
463 
464 TEST_CASE("CreatePooling2dFloatNhwcWorkload")
465 {
466  NeonCreatePooling2dWorkloadTest<DataType::Float32>(DataLayout::NHWC);
467 }
468 
469 TEST_CASE("CreatePooling2dUint8NchwWorkload")
470 {
471  NeonCreatePooling2dWorkloadTest<DataType::QAsymmU8>(DataLayout::NCHW);
472 }
473 
474 TEST_CASE("CreatePooling2dUint8NhwcWorkload")
475 {
476  NeonCreatePooling2dWorkloadTest<DataType::QAsymmU8>(DataLayout::NHWC);
477 }
478 
479 static void NeonCreatePreluWorkloadTest(const armnn::TensorShape& inputShape,
480  const armnn::TensorShape& alphaShape,
481  const armnn::TensorShape& outputShape,
482  armnn::DataType dataType)
483 {
484  Graph graph;
485  NeonWorkloadFactory factory =
486  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
487 
488  auto workload = CreatePreluWorkloadTest<NeonPreluWorkload>(factory,
489  graph,
490  inputShape,
491  alphaShape,
492  outputShape,
493  dataType);
494 
495  // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest).
496  PreluQueueDescriptor queueDescriptor = workload->GetData();
497  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
498  auto alphaHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
499  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
500  CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, dataType)));
501  CHECK(TestNeonTensorHandleInfo(alphaHandle, TensorInfo(alphaShape, dataType)));
502  CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, dataType)));
503 }
504 
505 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
506 TEST_CASE("CreatePreluFloat16Workload")
507 {
508  NeonCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float16);
509 }
510 #endif
511 
512 TEST_CASE("CreatePreluFloatWorkload")
513 {
514  NeonCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float32);
515 }
516 
517 TEST_CASE("CreatePreluUint8Workload")
518 {
519  NeonCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::QAsymmU8);
520 }
521 
522 template <typename armnn::DataType DataType>
523 static void NeonCreateReshapeWorkloadTest()
524 {
525  Graph graph;
526  NeonWorkloadFactory factory =
527  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
528 
529  auto workload = CreateReshapeWorkloadTest<NeonReshapeWorkload, DataType>(factory, graph);
530 
531  // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest).
532  ReshapeQueueDescriptor queueDescriptor = workload->GetData();
533  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
534  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
535  CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo({4, 1}, DataType)));
536  CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 4}, DataType)));
537 }
538 
539 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
540 TEST_CASE("CreateReshapeFloat16Workload")
541 {
542  NeonCreateReshapeWorkloadTest<DataType::Float16>();
543 }
544 #endif
545 
546 TEST_CASE("CreateReshapeFloatWorkload")
547 {
548  NeonCreateReshapeWorkloadTest<DataType::Float32>();
549 }
550 
551 TEST_CASE("CreateReshapeUint8Workload")
552 {
553  NeonCreateReshapeWorkloadTest<DataType::QAsymmU8>();
554 }
555 
556 template <typename ResizeWorkloadType, armnn::DataType DataType>
557 static void NeonCreateResizeWorkloadTest(DataLayout dataLayout)
558 {
559  Graph graph;
560  NeonWorkloadFactory factory =
561  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
562  auto workload = CreateResizeBilinearWorkloadTest<ResizeWorkloadType, DataType>(factory, graph, dataLayout);
563 
564  auto queueDescriptor = workload->GetData();
565 
566  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
567  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
568 
569  armnn::PredicateResult predResult(true);
570  switch (dataLayout)
571  {
572  case DataLayout::NHWC:
573  predResult = CompareIAclTensorHandleShape(inputHandle, { 2, 4, 4, 3 });
574  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
575  predResult = CompareIAclTensorHandleShape(outputHandle, { 2, 2, 2, 3 });
576  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
577  break;
578  default: // DataLayout::NCHW
579  predResult = CompareIAclTensorHandleShape(inputHandle, { 2, 3, 4, 4 });
580  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
581  predResult = CompareIAclTensorHandleShape(outputHandle, { 2, 3, 2, 2 });
582  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
583  }
584 }
585 
586 TEST_CASE("CreateResizeFloat32NchwWorkload")
587 {
588  NeonCreateResizeWorkloadTest<NeonResizeWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
589 }
590 
591 TEST_CASE("CreateResizeUint8NchwWorkload")
592 {
593  NeonCreateResizeWorkloadTest<NeonResizeWorkload, armnn::DataType::QAsymmU8>(DataLayout::NCHW);
594 }
595 
596 TEST_CASE("CreateResizeFloat32NhwcWorkload")
597 {
598  NeonCreateResizeWorkloadTest<NeonResizeWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
599 }
600 
601 TEST_CASE("CreateResizeUint8NhwcWorkload")
602 {
603  NeonCreateResizeWorkloadTest<NeonResizeWorkload, armnn::DataType::QAsymmU8>(DataLayout::NHWC);
604 }
605 
606 template <typename SoftmaxWorkloadType, typename armnn::DataType DataType>
607 static void NeonCreateSoftmaxWorkloadTest()
608 {
609  Graph graph;
610  NeonWorkloadFactory factory =
611  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
612 
613  auto workload = CreateSoftmaxWorkloadTest<SoftmaxWorkloadType, DataType>(factory, graph);
614 
615  // Checks that outputs and inputs are as we expect them (see definition of CreateSoftmaxWorkloadTest).
616  SoftmaxQueueDescriptor queueDescriptor = workload->GetData();
617  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
618  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
619  armnn::TensorInfo tensorInfo({4, 1}, DataType);
621  {
622  tensorInfo.SetQuantizationOffset(0);
623  tensorInfo.SetQuantizationScale(1.f / 256);
624  }
626  {
627  tensorInfo.SetQuantizationOffset(-128);
628  tensorInfo.SetQuantizationScale(1.f / 256);
629  }
630  CHECK(TestNeonTensorHandleInfo(inputHandle, tensorInfo));
631  CHECK(TestNeonTensorHandleInfo(outputHandle, tensorInfo));
632 }
633 
634 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
635 TEST_CASE("CreateSoftmaxFloat16Workload")
636 {
637  NeonCreateSoftmaxWorkloadTest<NeonSoftmaxWorkload, DataType::Float16>();
638 }
639 #endif
640 
641 TEST_CASE("CreateSoftmaxFloatWorkload")
642 {
643  NeonCreateSoftmaxWorkloadTest<NeonSoftmaxWorkload, DataType::Float32>();
644 }
645 
646 TEST_CASE("CreateSoftmaxQAsymmU8Workload")
647 {
648  NeonCreateSoftmaxWorkloadTest<NeonSoftmaxWorkload, DataType::QAsymmU8>();
649 }
650 
651 TEST_CASE("CreateSoftmaxQAsymmS8Workload")
652 {
653  NeonCreateSoftmaxWorkloadTest<NeonSoftmaxWorkload, DataType::QAsymmS8>();
654 }
655 
656 template <typename SpaceToDepthWorkloadType, typename armnn::DataType DataType>
657 static void NeonSpaceToDepthWorkloadTest()
658 {
659  Graph graph;
660  NeonWorkloadFactory factory =
661  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
662 
663  auto workload = CreateSpaceToDepthWorkloadTest<SpaceToDepthWorkloadType, DataType>(factory, graph);
664 
665  SpaceToDepthQueueDescriptor queueDescriptor = workload->GetData();
666  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
667  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
668 
669  CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo({ 1, 2, 2, 1 }, DataType)));
670  CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo({ 1, 1, 1, 4 }, DataType)));
671 }
672 
673 TEST_CASE("CreateSpaceToDepthFloat32Workload")
674 {
675  NeonSpaceToDepthWorkloadTest<NeonSpaceToDepthWorkload, armnn::DataType::Float32>();
676 }
677 
678 TEST_CASE("CreateSpaceToDepthFloat16Workload")
679 {
680  NeonSpaceToDepthWorkloadTest<NeonSpaceToDepthWorkload, armnn::DataType::Float16>();
681 }
682 
683 TEST_CASE("CreateSpaceToDepthQAsymm8Workload")
684 {
685  NeonSpaceToDepthWorkloadTest<NeonSpaceToDepthWorkload, armnn::DataType::QAsymmU8>();
686 }
687 
688 TEST_CASE("CreateSpaceToDepthQSymm16Workload")
689 {
690  NeonSpaceToDepthWorkloadTest<NeonSpaceToDepthWorkload, armnn::DataType::QSymmS16>();
691 }
692 
693 TEST_CASE("CreateSplitterWorkload")
694 {
695  Graph graph;
696  NeonWorkloadFactory factory =
697  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
698 
699  auto workload = CreateSplitterWorkloadTest<NeonSplitterWorkload, DataType::Float32>(factory, graph);
700 
701  // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest).
702  SplitterQueueDescriptor queueDescriptor = workload->GetData();
703  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
704  CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo({5, 7, 7}, DataType::Float32)));
705 
706  auto outputHandle0 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
707  CHECK(TestNeonTensorHandleInfo(outputHandle0, TensorInfo({1, 7, 7}, DataType::Float32)));
708 
709  auto outputHandle1 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
710  CHECK(TestNeonTensorHandleInfo(outputHandle1, TensorInfo({2, 7, 7}, DataType::Float32)));
711 
712  auto outputHandle2 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[2]);
713  CHECK(TestNeonTensorHandleInfo(outputHandle2, TensorInfo({2, 7, 7}, DataType::Float32)));
714 }
715 
716 TEST_CASE("CreateSplitterConcat")
717 {
718  // Tests that it is possible to decide which output of the splitter layer
719  // should be lined to which input of the concat layer.
720  // We tested that is is possible to specify 0th output
721  // of the splitter to be the 1st input to the concat, and the 1st output of the splitter to be 0th input
722  // of the concat.
723 
724  Graph graph;
725  NeonWorkloadFactory factory =
726  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
727 
728  auto workloads =
729  CreateSplitterConcatWorkloadTest<NeonSplitterWorkload, NeonConcatWorkload,
730  DataType::Float32>(factory, graph);
731 
732  auto wlSplitter = std::move(workloads.first);
733  auto wlConcat = std::move(workloads.second);
734 
735  //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction.
736  armnn::IAclTensorHandle* sOut0 = dynamic_cast<armnn::IAclTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
737  armnn::IAclTensorHandle* sOut1 = dynamic_cast<armnn::IAclTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
738  armnn::IAclTensorHandle* mIn0 = dynamic_cast<armnn::IAclTensorHandle*>(wlConcat->GetData().m_Inputs[0]);
739  armnn::IAclTensorHandle* mIn1 = dynamic_cast<armnn::IAclTensorHandle*>(wlConcat->GetData().m_Inputs[1]);
740 
741  CHECK(sOut0);
742  CHECK(sOut1);
743  CHECK(mIn0);
744  CHECK(mIn1);
745 
746  bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0);
747 
748  CHECK(validDataPointers);
749 }
750 
751 TEST_CASE("CreateSingleOutputMultipleInputs")
752 {
753  // Tests that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer.
754  // We created a splitter with two outputs. That each of those outputs is used by two different activation layers
755 
756  Graph graph;
757  NeonWorkloadFactory factory =
758  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
759 
760  std::unique_ptr<NeonSplitterWorkload> wlSplitter;
761  std::unique_ptr<NeonActivationWorkload> wlActiv0_0;
762  std::unique_ptr<NeonActivationWorkload> wlActiv0_1;
763  std::unique_ptr<NeonActivationWorkload> wlActiv1_0;
764  std::unique_ptr<NeonActivationWorkload> wlActiv1_1;
765 
766  CreateSplitterMultipleInputsOneOutputWorkloadTest<NeonSplitterWorkload,
767  NeonActivationWorkload, DataType::Float32>(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1,
768  wlActiv1_0, wlActiv1_1);
769 
770  armnn::IAclTensorHandle* sOut0 = dynamic_cast<armnn::IAclTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
771  armnn::IAclTensorHandle* sOut1 = dynamic_cast<armnn::IAclTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
772  armnn::IAclTensorHandle* activ0_0Im = dynamic_cast<armnn::IAclTensorHandle*>(wlActiv0_0->GetData().m_Inputs[0]);
773  armnn::IAclTensorHandle* activ0_1Im = dynamic_cast<armnn::IAclTensorHandle*>(wlActiv0_1->GetData().m_Inputs[0]);
774  armnn::IAclTensorHandle* activ1_0Im = dynamic_cast<armnn::IAclTensorHandle*>(wlActiv1_0->GetData().m_Inputs[0]);
775  armnn::IAclTensorHandle* activ1_1Im = dynamic_cast<armnn::IAclTensorHandle*>(wlActiv1_1->GetData().m_Inputs[0]);
776 
777 
778  CHECK(sOut0);
779  CHECK(sOut1);
780  CHECK(activ0_0Im);
781  CHECK(activ0_1Im);
782  CHECK(activ1_0Im);
783  CHECK(activ1_1Im);
784 
785  bool validDataPointers = (sOut0 == activ0_0Im) && (sOut0 == activ0_1Im) &&
786  (sOut1 == activ1_0Im) && (sOut1 == activ1_1Im);
787 
788  CHECK(validDataPointers);
789 }
790 
791 #if defined(ARMNNREF_ENABLED)
792 
793 // This test unit needs the reference backend, it's not available if the reference backend is not built
794 
795 TEST_CASE("CreateMemCopyWorkloadsNeon")
796 {
797  NeonWorkloadFactory factory =
798  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
799  CreateMemCopyWorkloads<IAclTensorHandle>(factory);
800 }
801 
802 #endif
803 
804 template <typename L2NormalizationWorkloadType, typename armnn::DataType DataType>
805 static void NeonCreateL2NormalizationWorkloadTest(DataLayout dataLayout)
806 {
807  Graph graph;
808  NeonWorkloadFactory factory =
809  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
810 
811  auto workload =
812  CreateL2NormalizationWorkloadTest<L2NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
813 
814  // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
815  L2NormalizationQueueDescriptor queueDescriptor = workload->GetData();
816  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
817  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
818 
819  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ?
820  TensorShape{ 5, 20, 50, 67 } : TensorShape{ 5, 50, 67, 20 };
821  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ?
822  TensorShape{ 5, 20, 50, 67 } : TensorShape{ 5, 50, 67, 20 };
823 
824  CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
825  CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
826 }
827 
828 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
829 TEST_CASE("CreateL2NormalizationFloat16NchwWorkload")
830 {
831  NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float16>(DataLayout::NCHW);
832 }
833 
834 TEST_CASE("CreateL2NormalizationFloat16NhwcWorkload")
835 {
836  NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float16>(DataLayout::NHWC);
837 }
838 #endif
839 
840 TEST_CASE("CreateL2NormalizationNchwWorkload")
841 {
842  NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float32>(DataLayout::NCHW);
843 }
844 
845 TEST_CASE("CreateL2NormalizationNhwcWorkload")
846 {
847  NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float32>(DataLayout::NHWC);
848 }
849 
850 template <typename LogSoftmaxWorkloadType, typename armnn::DataType DataType>
851 static void NeonCreateLogSoftmaxWorkloadTest()
852 {
853  Graph graph;
854  NeonWorkloadFactory factory =
855  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
856 
857  auto workload = CreateLogSoftmaxWorkloadTest<LogSoftmaxWorkloadType, DataType>(factory, graph);
858 
859  // Checks that outputs and inputs are as we expect them (see definition of CreateLogSoftmaxWorkloadTest).
860  LogSoftmaxQueueDescriptor queueDescriptor = workload->GetData();
861  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
862  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
863  armnn::TensorInfo tensorInfo({4, 1}, DataType);
864 
865  CHECK(TestNeonTensorHandleInfo(inputHandle, tensorInfo));
866  CHECK(TestNeonTensorHandleInfo(outputHandle, tensorInfo));
867 }
868 
869 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
870 TEST_CASE("CreateLogSoftmaxFloat16Workload")
871 {
872  NeonCreateLogSoftmaxWorkloadTest<NeonLogSoftmaxWorkload, DataType::Float16>();
873 }
874 #endif
875 
876 TEST_CASE("CreateLogSoftmaxFloatWorkload")
877 {
878  NeonCreateLogSoftmaxWorkloadTest<NeonLogSoftmaxWorkload, DataType::Float32>();
879 }
880 
881 template <typename LstmWorkloadType>
882 static void NeonCreateLstmWorkloadTest()
883 {
884  Graph graph;
885  NeonWorkloadFactory factory =
886  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
887 
888  auto workload = CreateLstmWorkloadTest<LstmWorkloadType>(factory, graph);
889 
890  LstmQueueDescriptor queueDescriptor = workload->GetData();
891 
892  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
893  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
894 
895  CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo({ 2, 2 }, DataType::Float32)));
896  CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo({ 2, 4 }, DataType::Float32)));
897 }
898 
899 TEST_CASE("CreateLSTMWorkloadFloatWorkload")
900 {
901  NeonCreateLstmWorkloadTest<NeonLstmFloatWorkload>();
902 }
903 
904 template <typename ConcatWorkloadType, armnn::DataType DataType>
905 static void NeonCreateConcatWorkloadTest(std::initializer_list<unsigned int> outputShape,
906  unsigned int concatAxis)
907 {
908  Graph graph;
909  NeonWorkloadFactory factory =
910  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
911 
912  auto workload = CreateConcatWorkloadTest<ConcatWorkloadType, DataType>(factory, graph, outputShape, concatAxis);
913 
914  ConcatQueueDescriptor queueDescriptor = workload->GetData();
915  auto inputHandle0 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
916  auto inputHandle1 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
917  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
918 
919  CHECK(TestNeonTensorHandleInfo(inputHandle0, TensorInfo({ 2, 3, 2, 5 }, DataType)));
920  CHECK(TestNeonTensorHandleInfo(inputHandle1, TensorInfo({ 2, 3, 2, 5 }, DataType)));
921  CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
922 }
923 
924 TEST_CASE("CreateConcatDim0Float32Workload")
925 {
926  NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::Float32>({ 4, 3, 2, 5 }, 0);
927 }
928 
929 TEST_CASE("CreateConcatDim1Float32Workload")
930 {
931  NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::Float32>({ 2, 6, 2, 5 }, 1);
932 }
933 
934 TEST_CASE("CreateConcatDim3Float32Workload")
935 {
936  NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::Float32>({ 2, 3, 2, 10 }, 3);
937 }
938 
939 TEST_CASE("CreateConcatDim0Uint8Workload")
940 {
941  NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::QAsymmU8>({ 4, 3, 2, 5 }, 0);
942 }
943 
944 TEST_CASE("CreateConcatDim1Uint8Workload")
945 {
946  NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::QAsymmU8>({ 2, 6, 2, 5 }, 1);
947 }
948 
949 TEST_CASE("CreateConcatDim3Uint8Workload")
950 {
951  NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::QAsymmU8>({ 2, 3, 2, 10 }, 3);
952 }
953 
954 template <armnn::DataType DataType>
955 static void NeonCreateStackWorkloadTest(const std::initializer_list<unsigned int>& inputShape,
956  const std::initializer_list<unsigned int>& outputShape,
957  unsigned int axis,
958  unsigned int numInputs)
959 {
960  armnn::Graph graph;
961  NeonWorkloadFactory factory =
962  NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
963 
964  auto workload = CreateStackWorkloadTest<NeonStackWorkload, DataType>(factory,
965  graph,
966  TensorShape(inputShape),
967  TensorShape(outputShape),
968  axis,
969  numInputs);
970 
971  // Check inputs and output are as expected
972  StackQueueDescriptor queueDescriptor = workload->GetData();
973  for (unsigned int i = 0; i < numInputs; ++i)
974  {
975  auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[i]);
976  CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
977  }
978  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
979  CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
980 }
981 
982 TEST_CASE("CreateStackFloat32Workload")
983 {
984  NeonCreateStackWorkloadTest<armnn::DataType::Float32>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
985 }
986 
987 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
988 TEST_CASE("CreateStackFloat16Workload")
989 {
990  NeonCreateStackWorkloadTest<armnn::DataType::Float16>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
991 }
992 #endif
993 
994 TEST_CASE("CreateStackUint8Workload")
995 {
996  NeonCreateStackWorkloadTest<armnn::DataType::QAsymmU8>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
997 }
998 
999 template <typename QuantizedLstmWorkloadType>
1000 static void NeonCreateQuantizedLstmWorkloadTest()
1001 {
1002  Graph graph;
1003  NeonWorkloadFactory factory = NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
1004 
1005  auto workload = CreateQuantizedLstmWorkloadTest<QuantizedLstmWorkloadType>(factory, graph);
1006 
1007  QuantizedLstmQueueDescriptor queueDescriptor = workload->GetData();
1008 
1009  IAclTensorHandle* inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
1010  CHECK((inputHandle->GetShape() == TensorShape({2, 2})));
1011  CHECK((inputHandle->GetDataType() == arm_compute::DataType::QASYMM8));
1012 
1013  IAclTensorHandle* cellStateInHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
1014  CHECK((cellStateInHandle->GetShape() == TensorShape({2, 4})));
1015  CHECK((cellStateInHandle->GetDataType() == arm_compute::DataType::QSYMM16));
1016 
1017  IAclTensorHandle* outputStateInHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[2]);
1018  CHECK((outputStateInHandle->GetShape() == TensorShape({2, 4})));
1019  CHECK((outputStateInHandle->GetDataType() == arm_compute::DataType::QASYMM8));
1020 
1021  IAclTensorHandle* cellStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
1022  CHECK((cellStateOutHandle->GetShape() == TensorShape({2, 4})));
1023  CHECK((cellStateOutHandle->GetDataType() == arm_compute::DataType::QSYMM16));
1024 
1025  IAclTensorHandle* outputStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
1026  CHECK((outputStateOutHandle->GetShape() == TensorShape({2, 4})));
1027  CHECK((outputStateOutHandle->GetDataType() == arm_compute::DataType::QASYMM8));
1028 }
1029 
1030 TEST_CASE("CreateQuantizedLstmWorkload")
1031 {
1032  NeonCreateQuantizedLstmWorkloadTest<NeonQuantizedLstmWorkload>();
1033 }
1034 
1035 template <typename QLstmWorkloadType>
1036 static void NeonCreateQLstmWorkloadTest()
1037 {
1038  Graph graph;
1039  NeonWorkloadFactory factory = NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
1040 
1041  auto workload = CreateQLstmWorkloadTest<QLstmWorkloadType>(factory, graph);
1042  QLstmQueueDescriptor queueDescriptor = workload->GetData();
1043 
1044  IAclTensorHandle* inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
1045  CHECK((inputHandle->GetShape() == TensorShape({2, 4})));
1046  CHECK((inputHandle->GetDataType() == arm_compute::DataType::QASYMM8_SIGNED));
1047 
1048  IAclTensorHandle* cellStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
1049  CHECK((cellStateOutHandle->GetShape() == TensorShape({2, 4})));
1050  CHECK((cellStateOutHandle->GetDataType() == arm_compute::DataType::QSYMM16));
1051 
1052  IAclTensorHandle* outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[2]);
1053  CHECK((outputHandle->GetShape() == TensorShape({2, 4})));
1054  CHECK((outputHandle->GetDataType() == arm_compute::DataType::QASYMM8_SIGNED));
1055 }
1056 
1057 TEST_CASE("CreateQLstmWorkloadTest")
1058 {
1059  NeonCreateQLstmWorkloadTest<NeonQLstmWorkload>();
1060 }
1061 
1062 template <armnn::DataType DataType>
1063 static void NeonCreateActivationWorkloadReplaceFunctionsTest()
1064 {
1065  shared_ptr<NeonMemoryManager> memoryManager = make_shared<NeonMemoryManager>();
1066 
1067  Graph graph;
1068  NeonWorkloadFactory factory = NeonWorkloadFactoryHelper::GetFactory(memoryManager);
1069  // input and output are created as armnn::TensorInfo tensorInfo({1, 1}, DataType)
1070  auto workloadPtr = CreateActivationWorkloadTest<NeonActivationWorkload, DataType>(factory, graph);
1071 
1072  // new input and output tensor handlers are created and then replace in the workload
1073  const NeonTensorHandleFactory tensorHandleFactory(memoryManager);
1074  TensorInfo inputInfo({2 , 2}, DataType::Float16);
1075  TensorInfo outputInfo({2 , 2}, DataType::Float16);
1076  unique_ptr<ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputInfo);
1077  inputHandle->Allocate();
1078  unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo);
1079  outputHandle->Allocate();
1080 
1081  unsigned int slot = 0;
1082  CHECK_THROWS_AS(workloadPtr->ReplaceInputTensorHandle(inputHandle.get(), slot), UnimplementedException);
1083  CHECK_THROWS_AS(workloadPtr->ReplaceOutputTensorHandle(outputHandle.get(), slot), UnimplementedException);
1084 }
1085 
1086 TEST_CASE("NeonReplaceFunctionsfromFloat32toFloat16ActivationWorkload")
1087 {
1088  NeonCreateActivationWorkloadReplaceFunctionsTest<armnn::DataType::Float32>();
1089 }
1090 
1091 TEST_CASE("NeonReplaceFunctionsfromUint8toFloat16ActivationWorkload")
1092 {
1093  NeonCreateActivationWorkloadReplaceFunctionsTest<armnn::DataType::QAsymmU8>();
1094 }
1095 
1096 }
virtual arm_compute::ITensor & GetTensor()=0
DataLayout
Definition: Types.hpp:62
std::vector< BackendOptions > ModelOptions
virtual void Allocate()=0
Indicate to the memory manager that this resource is no longer active.
void IgnoreUnused(Ts &&...)
virtual arm_compute::DataType GetDataType() const =0
DataType
Definition: Types.hpp:48
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
virtual TensorShape GetShape() const =0
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
This layer represents an addition operation.
Struct for the users to pass backend specific options.
This layer represents a subtraction operation.
std::vector< ITensorHandle * > m_Outputs
This layer represents a division operation.
void SetQuantizationOffset(int32_t offset)
Definition: Tensor.cpp:489
std::vector< ITensorHandle * > m_Inputs
This layer represents a multiplication operation.
Depthwise Convolution 2D layer workload data.
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below...
Definition: Types.hpp:467