ArmNN
 21.02
ClCreateWorkloadTests.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
8 
15 
18 
19 #include <cl/ClTensorHandle.hpp>
20 #include <cl/ClWorkloadFactory.hpp>
23 
24 boost::test_tools::predicate_result CompareIClTensorHandleShape(IClTensorHandle* tensorHandle,
25  std::initializer_list<unsigned int> expectedDimensions)
26 {
27  return CompareTensorHandleShape<IClTensorHandle>(tensorHandle, expectedDimensions);
28 }
29 
30 BOOST_FIXTURE_TEST_SUITE(CreateWorkloadCl, ClContextControlFixture)
31 
32 template <armnn::DataType DataType>
33 static void ClCreateActivationWorkloadTest()
34 {
35  Graph graph;
36  ClWorkloadFactory factory =
37  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
38 
39  auto workload = CreateActivationWorkloadTest<ClActivationWorkload, DataType>(factory, graph);
40 
41  // Checks that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest).
42  ActivationQueueDescriptor queueDescriptor = workload->GetData();
43  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
44  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
45 
46  BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {1, 1}));
47  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1, 1}));
48 }
49 
50 BOOST_AUTO_TEST_CASE(CreateActivationFloatWorkload)
51 {
52  ClCreateActivationWorkloadTest<armnn::DataType::Float32>();
53 }
54 
55 BOOST_AUTO_TEST_CASE(CreateActivationFloat16Workload)
56 {
57  ClCreateActivationWorkloadTest<armnn::DataType::Float16>();
58 }
59 
60 template <typename WorkloadType,
61  typename DescriptorType,
62  typename LayerType,
64 static void ClCreateElementwiseWorkloadTest()
65 {
66  Graph graph;
67  ClWorkloadFactory factory =
68  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
69 
70  auto workload = CreateElementwiseWorkloadTest<WorkloadType, DescriptorType, LayerType, DataType>(factory, graph);
71 
72  // Checks that inputs/outputs are as we expect them (see definition of CreateElementwiseWorkloadTest).
73  DescriptorType queueDescriptor = workload->GetData();
74  auto inputHandle1 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
75  auto inputHandle2 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
76  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
77  BOOST_TEST(CompareIClTensorHandleShape(inputHandle1, {2, 3}));
78  BOOST_TEST(CompareIClTensorHandleShape(inputHandle2, {2, 3}));
79  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 3}));
80 }
81 
82 BOOST_AUTO_TEST_CASE(CreateAdditionFloatWorkload)
83 {
84  ClCreateElementwiseWorkloadTest<ClAdditionWorkload,
88 }
89 
90 BOOST_AUTO_TEST_CASE(CreateAdditionFloat16Workload)
91 {
92  ClCreateElementwiseWorkloadTest<ClAdditionWorkload,
96 }
97 
98 BOOST_AUTO_TEST_CASE(CreateSubtractionFloatWorkload)
99 {
100  ClCreateElementwiseWorkloadTest<ClSubtractionWorkload,
104 }
105 
106 BOOST_AUTO_TEST_CASE(CreateSubtractionFloat16Workload)
107 {
108  ClCreateElementwiseWorkloadTest<ClSubtractionWorkload,
112 }
113 
114 BOOST_AUTO_TEST_CASE(CreateMultiplicationFloatWorkloadTest)
115 {
116  ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload,
120 }
121 
122 BOOST_AUTO_TEST_CASE(CreateMultiplicationFloat16WorkloadTest)
123 {
124  ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload,
128 }
129 
130 BOOST_AUTO_TEST_CASE(CreateMultiplicationUint8WorkloadTest)
131 {
132  ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload,
136 }
137 
138 BOOST_AUTO_TEST_CASE(CreateDivisionFloatWorkloadTest)
139 {
140  ClCreateElementwiseWorkloadTest<ClDivisionFloatWorkload,
144 }
145 
146 BOOST_AUTO_TEST_CASE(CreateDivisionFloat16WorkloadTest)
147 {
148  ClCreateElementwiseWorkloadTest<ClDivisionFloatWorkload,
152 }
153 
154 template <typename WorkloadType,
155  typename DescriptorType,
157 static void ClCreateElementwiseUnaryWorkloadTest(armnn::UnaryOperation op)
158 {
159  Graph graph;
160  ClWorkloadFactory factory =
161  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
162 
163  auto workload = CreateElementwiseUnaryWorkloadTest<WorkloadType, DescriptorType, DataType>(factory, graph, op);
164 
165  DescriptorType queueDescriptor = workload->GetData();
166 
167  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
168  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
169 
170  BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {2, 3}));
171  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 3}));
172 }
173 
174 BOOST_AUTO_TEST_CASE(CreateRsqrtFloat32WorkloadTest)
175 {
176  ClCreateElementwiseUnaryWorkloadTest<ClRsqrtWorkload, RsqrtQueueDescriptor, armnn::DataType::Float32>(
177  UnaryOperation::Rsqrt);
178 }
179 
180 template <typename BatchNormalizationWorkloadType, armnn::DataType DataType>
181 static void ClCreateBatchNormalizationWorkloadTest(DataLayout dataLayout)
182 {
183  Graph graph;
184  ClWorkloadFactory factory =
185  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
186 
187  auto workload = CreateBatchNormalizationWorkloadTest<BatchNormalizationWorkloadType, DataType>
188  (factory, graph, dataLayout);
189 
190  // Checks that inputs/outputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest).
191  BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData();
192  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
193  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
194 
195  switch (dataLayout)
196  {
197  case DataLayout::NHWC:
198  BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 4, 4, 3 }));
199  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 4, 4, 3 }));
200  break;
201  default: // NCHW
202  BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 3, 4, 4 }));
203  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 3, 4, 4 }));
204  }
205 }
206 
207 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloatNchwWorkload)
208 {
209  ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
210  armnn::DataType::Float32>(DataLayout::NCHW);
211 }
212 
213 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloat16NchwWorkload)
214 {
215  ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
216  armnn::DataType::Float16>(DataLayout::NCHW);
217 }
218 
219 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloatNhwcWorkload)
220 {
221  ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
222  armnn::DataType::Float32>(DataLayout::NHWC);
223 }
224 
225 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationNhwcFloat16NhwcWorkload)
226 {
227  ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
228  armnn::DataType::Float16>(DataLayout::NHWC);
229 }
230 
231 BOOST_AUTO_TEST_CASE(CreateConvertFp16ToFp32Workload)
232 {
233  Graph graph;
234  ClWorkloadFactory factory =
235  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
236 
237  auto workload = CreateConvertFp16ToFp32WorkloadTest<ClConvertFp16ToFp32Workload>(factory, graph);
238 
239  ConvertFp16ToFp32QueueDescriptor queueDescriptor = workload->GetData();
240  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
241  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
242 
243  BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {1, 3, 2, 3}));
244  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1, 3, 2, 3}));
245  BOOST_TEST((inputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F16));
246  BOOST_TEST((outputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F32));
247 }
248 
249 BOOST_AUTO_TEST_CASE(CreateConvertFp32ToFp16Workload)
250 {
251  Graph graph;
252  ClWorkloadFactory factory =
253  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
254 
255  auto workload = CreateConvertFp32ToFp16WorkloadTest<ClConvertFp32ToFp16Workload>(factory, graph);
256 
257  ConvertFp32ToFp16QueueDescriptor queueDescriptor = workload->GetData();
258  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
259  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
260 
261  BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {1, 3, 2, 3}));
262  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1, 3, 2, 3}));
263  BOOST_TEST((inputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F32));
264  BOOST_TEST((outputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F16));
265 }
266 
267 template <typename Convolution2dWorkloadType, typename armnn::DataType DataType>
268 static void ClConvolution2dWorkloadTest(DataLayout dataLayout)
269 {
270  Graph graph;
271  ClWorkloadFactory factory =
272  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
273 
274  auto workload = CreateConvolution2dWorkloadTest<ClConvolution2dWorkload, DataType>(factory,
275  graph,
276  dataLayout);
277 
278  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({2, 3, 8, 16})
279  : std::initializer_list<unsigned int>({2, 8, 16, 3});
280  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({2, 2, 2, 10})
281  : std::initializer_list<unsigned int>({2, 2, 10, 2});
282 
283  // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest).
284  Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
285  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
286  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
287  BOOST_TEST((inputHandle->GetShape() == inputShape));
288  BOOST_TEST((outputHandle->GetShape() == outputShape));
289 }
290 
291 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNchwWorkload)
292 {
293  ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
294 }
295 
296 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNhwcWorkload)
297 {
298  ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
299 }
300 
301 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NchwWorkload)
302 {
303  ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
304 }
305 
306 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NhwcWorkload)
307 {
308  ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
309 }
310 
311 BOOST_AUTO_TEST_CASE(CreateConvolution2dFastMathEnabledWorkload)
312 {
313  Graph graph;
314 
315  using ModelOptions = std::vector<BackendOptions>;
316  ModelOptions modelOptions = {};
317  BackendOptions gpuAcc("GpuAcc",
318  {
319  { "FastMathEnabled", true }
320  });
321  modelOptions.push_back(gpuAcc);
322 
323  ClWorkloadFactory factory =
324  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager(), modelOptions);
325 
326  auto workload =
327  CreateConvolution2dWorkloadFastMathTest<ClConvolution2dWorkload, armnn::DataType::Float32>(factory,
328  graph,
329  DataLayout::NCHW,
330  modelOptions);
331 
332  ARMNN_ASSERT(workload != nullptr);
333  auto conv2dWorkload = PolymorphicDowncast<ClConvolution2dWorkload*>(workload.get());
334  IgnoreUnused(conv2dWorkload);
335  ARMNN_ASSERT(conv2dWorkload != nullptr);
336  ARMNN_ASSERT(conv2dWorkload->GetConvolutionMethod() == arm_compute::ConvolutionMethod::WINOGRAD);
337 }
338 
339 BOOST_AUTO_TEST_CASE(CreateConvolution2dClCompiledContextWorkload)
340 {
341  using namespace armnn;
342 
343  const DataType inputType = DataType::QAsymmU8;
344  const DataType kernelType = DataType::QSymmS8;
345  const DataType biasType = DataType::Signed32;
346 
347  TensorInfo inputInfo ({ 1, 3, 1, 2 }, inputType, 0.5f, 128);
348  TensorInfo outputInfo({ 1, 3, 1, 3 }, inputType, 1.0f, 128);
349 
350  const std::vector<float> quantScales{ 0.5f, 0.75f, 1.0f };
351  constexpr unsigned int quantDimension = 0;
352 
353  TensorInfo kernelInfo({ 3, 1, 1, 2 }, kernelType, quantScales, quantDimension);
354 
355  const std::vector<float> biasQuantScales{ 0.25f, 0.375f, 0.5f };
356  TensorInfo biasInfo({ 3 }, biasType, biasQuantScales, quantDimension);
357 
358  std::vector<uint8_t> inputData =
359  {
360  138, 108, 138, 108, 138, 108
361  };
362 
363  std::vector<int8_t> kernelData =
364  {
365  1, 2, 1, 2, 1, 2
366  };
367 
368  std::vector<int32_t> biasData =
369  {
370  4, 4, 4
371  };
372 
373  std::vector<uint8_t> expectedOutputData =
374  {
375  121, 118, 115, 121, 118, 115, 121, 118, 115
376  };
377 
378 
379  Convolution2dDescriptor descriptor;
380  descriptor.m_StrideX = 1;
381  descriptor.m_StrideY = 1;
382  descriptor.m_PadLeft = 0;
383  descriptor.m_PadRight = 0;
384  descriptor.m_PadTop = 0;
385  descriptor.m_PadBottom = 0;
386  descriptor.m_BiasEnabled = true;
387  descriptor.m_DataLayout = DataLayout::NHWC;
388 
389  auto memoryManager = ClWorkloadFactoryHelper::GetMemoryManager();
390  auto clMemoryManager = armnn::PolymorphicPointerDowncast<armnn::ClMemoryManager>(memoryManager);
391  auto tensorHandleFactory = ClWorkloadFactoryHelper::GetTensorHandleFactory(memoryManager);
392 
393  std::unique_ptr<ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputInfo);
394  std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo);
395 
396 
397  WorkloadInfo workloadInfo;
398  ScopedCpuTensorHandle weightTensor(kernelInfo);
399  ScopedCpuTensorHandle biasTensor(biasInfo);
400 
401  AllocateAndCopyDataToITensorHandle(&weightTensor, kernelData.data());
402  AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
403 
404  Convolution2dQueueDescriptor queueDescriptor;
405  queueDescriptor.m_Parameters = descriptor;
406  queueDescriptor.m_Weight = &weightTensor;
407  queueDescriptor.m_Bias = &biasTensor;
408 
409  AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get());
410  AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get());
411 
412  // Initialize our m_CLCompileContext using default device and context
413  auto context = arm_compute::CLKernelLibrary::get().context();
414  auto device = arm_compute::CLKernelLibrary::get().get_device();
415  auto clCompileContext = arm_compute::CLCompileContext(context, device);
416 
417 
418 
419  // Check built programs are empty in context
420  BOOST_TEST(clCompileContext.get_built_programs().empty());
421 
422  auto workload = std::make_unique<ClConvolution2dWorkload>(queueDescriptor,
423  workloadInfo,
424  clMemoryManager->GetIntraLayerManager(),
425  clCompileContext);
426  ARMNN_ASSERT(workload != nullptr);
427  // Check built programs are not empty in context
428  BOOST_TEST(!clCompileContext.get_built_programs().empty());
429 }
430 
431 template <typename DepthwiseConvolutionWorkloadType, typename armnn::DataType DataType>
432 static void ClDepthwiseConvolutionWorkloadTest(DataLayout dataLayout)
433 {
434  Graph graph;
435  ClWorkloadFactory factory =
436  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
437 
438  auto workload = CreateDepthwiseConvolution2dWorkloadTest<DepthwiseConvolutionWorkloadType, DataType>
439  (factory, graph, dataLayout);
440 
441  // Checks that inputs/outputs are as we expect them (see definition of CreateDepthwiseConvolution2dWorkloadTest).
442  DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData();
443  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
444  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
445 
446  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
447  : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
448  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
449  : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
450 
451  BOOST_TEST((inputHandle->GetShape() == inputShape));
452  BOOST_TEST((outputHandle->GetShape() == outputShape));
453 }
454 
455 BOOST_AUTO_TEST_CASE(CreateDepthwiseConvolutionFloat32NhwcWorkload)
456 {
457  ClDepthwiseConvolutionWorkloadTest<ClDepthwiseConvolutionWorkload, DataType::Float32>(DataLayout::NHWC);
458 }
459 
460 template <typename Convolution2dWorkloadType, typename armnn::DataType DataType>
461 static void ClDirectConvolution2dWorkloadTest()
462 {
463  Graph graph;
464  ClWorkloadFactory factory =
465  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
466 
467  auto workload = CreateDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, DataType>(factory, graph);
468 
469  // Checks that outputs and inputs are as we expect them (see definition of CreateDirectConvolution2dWorkloadTest).
470  Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
471  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
472  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
473  BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {2, 3, 6, 6}));
474  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 2, 6, 6}));
475 }
476 
477 BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dFloatWorkload)
478 {
479  ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>();
480 }
481 
482 BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dFloat16Workload)
483 {
484  ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>();
485 }
486 
487 BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dUint8Workload)
488 {
489  ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::QAsymmU8>();
490 }
491 
492 template <typename FullyConnectedWorkloadType, typename armnn::DataType DataType>
493 static void ClCreateFullyConnectedWorkloadTest()
494 {
495  Graph graph;
496  ClWorkloadFactory factory =
497  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
498 
499  auto workload =
500  CreateFullyConnectedWorkloadTest<FullyConnectedWorkloadType, DataType>(factory, graph);
501 
502  // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest).
503  FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
504  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
505  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
506  BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {3, 1, 4, 5}));
507  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {3, 7}));
508 }
509 
510 
511 BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloatWorkloadTest)
512 {
513  ClCreateFullyConnectedWorkloadTest<ClFullyConnectedWorkload, armnn::DataType::Float32>();
514 }
515 
516 BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloat16WorkloadTest)
517 {
518  ClCreateFullyConnectedWorkloadTest<ClFullyConnectedWorkload, armnn::DataType::Float16>();
519 }
520 
521 template <typename NormalizationWorkloadType, typename armnn::DataType DataType>
522 static void ClNormalizationWorkloadTest(DataLayout dataLayout)
523 {
524  Graph graph;
525  ClWorkloadFactory factory =
526  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
527 
528  auto workload = CreateNormalizationWorkloadTest<NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
529 
530  // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
531  NormalizationQueueDescriptor queueDescriptor = workload->GetData();
532  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
533  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
534 
535  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 5, 5, 1})
536  : std::initializer_list<unsigned int>({3, 1, 5, 5});
537  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 5, 5, 1})
538  : std::initializer_list<unsigned int>({3, 1, 5, 5});
539 
540  BOOST_TEST((inputHandle->GetShape() == inputShape));
541  BOOST_TEST((outputHandle->GetShape() == outputShape));
542 }
543 
544 BOOST_AUTO_TEST_CASE(CreateNormalizationFloat32NchwWorkload)
545 {
546  ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
547 }
548 
549 BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16NchwWorkload)
550 {
551  ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
552 }
553 
554 BOOST_AUTO_TEST_CASE(CreateNormalizationFloat32NhwcWorkload)
555 {
556  ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
557 }
558 
559 BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16NhwcWorkload)
560 {
561  ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
562 }
563 
564 template <typename armnn::DataType DataType>
565 static void ClPooling2dWorkloadTest(DataLayout dataLayout)
566 {
567  Graph graph;
568  ClWorkloadFactory factory =
569  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
570 
571  auto workload = CreatePooling2dWorkloadTest<ClPooling2dWorkload, DataType>(factory, graph, dataLayout);
572 
573  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 2, 5, 5})
574  : std::initializer_list<unsigned int>({3, 5, 5, 2});
575  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 2, 2, 4})
576  : std::initializer_list<unsigned int>({3, 2, 4, 2});
577 
578  // Check that inputs/outputs are as we expect them (see definition of CreatePooling2dWorkloadTest).
579  Pooling2dQueueDescriptor queueDescriptor = workload->GetData();
580  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
581  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
582 
583  BOOST_TEST((inputHandle->GetShape() == inputShape));
584  BOOST_TEST((outputHandle->GetShape() == outputShape));
585 }
586 
587 BOOST_AUTO_TEST_CASE(CreatePooling2dFloatNchwWorkload)
588 {
589  ClPooling2dWorkloadTest<armnn::DataType::Float32>(DataLayout::NCHW);
590 }
591 
592 BOOST_AUTO_TEST_CASE(CreatePooling2dFloatNhwcWorkload)
593 {
594  ClPooling2dWorkloadTest<armnn::DataType::Float32>(DataLayout::NHWC);
595 }
596 
597 BOOST_AUTO_TEST_CASE(CreatePooling2dFloat16NchwWorkload)
598 {
599  ClPooling2dWorkloadTest<armnn::DataType::Float16>(DataLayout::NCHW);
600 }
601 
602 BOOST_AUTO_TEST_CASE(CreatePooling2dFloat16NhwcWorkload)
603 {
604  ClPooling2dWorkloadTest<armnn::DataType::Float16>(DataLayout::NHWC);
605 }
606 
607 static void ClCreatePreluWorkloadTest(const armnn::TensorShape& inputShape,
608  const armnn::TensorShape& alphaShape,
609  const armnn::TensorShape& outputShape,
610  armnn::DataType dataType)
611 {
612  Graph graph;
613  ClWorkloadFactory factory =
614  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
615 
616  auto workload = CreatePreluWorkloadTest<ClPreluWorkload>(factory,
617  graph,
618  inputShape,
619  alphaShape,
620  outputShape,
621  dataType);
622 
623  // Checks that outputs and inputs are as we expect them (see definition of CreatePreluWorkloadTest).
624  PreluQueueDescriptor queueDescriptor = workload->GetData();
625  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
626  auto alphaHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
627  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
628 
629  BOOST_TEST((inputHandle->GetShape() == inputShape));
630  BOOST_TEST((alphaHandle->GetShape() == alphaShape));
631  BOOST_TEST((outputHandle->GetShape() == outputShape));
632 }
633 
634 BOOST_AUTO_TEST_CASE(CreatePreluFloat16Workload)
635 {
636  ClCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float16);
637 }
638 
639 BOOST_AUTO_TEST_CASE(CreatePreluFloatWorkload)
640 {
641  ClCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float32);
642 }
643 
644 BOOST_AUTO_TEST_CASE(CreatePreluUint8Workload)
645 {
646  ClCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::QAsymmU8);
647 }
648 
649 template <typename armnn::DataType DataType>
650 static void ClCreateReshapeWorkloadTest()
651 {
652  Graph graph;
653  ClWorkloadFactory factory =
654  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
655 
656  auto workload = CreateReshapeWorkloadTest<ClReshapeWorkload, DataType>(factory, graph);
657 
658  // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest).
659  ReshapeQueueDescriptor queueDescriptor = workload->GetData();
660  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
661  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
662 
663  BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {4, 1}));
664  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1, 4}));
665 }
666 
667 BOOST_AUTO_TEST_CASE(CreateReshapeFloatWorkload)
668 {
669  ClCreateReshapeWorkloadTest<armnn::DataType::Float32>();
670 }
671 
672 BOOST_AUTO_TEST_CASE(CreateReshapeFloat16Workload)
673 {
674  ClCreateReshapeWorkloadTest<armnn::DataType::Float16>();
675 }
676 
677 BOOST_AUTO_TEST_CASE(CreateReshapeUint8Workload)
678 {
679  ClCreateReshapeWorkloadTest<armnn::DataType::QAsymmU8>();
680 }
681 
682 template <typename SoftmaxWorkloadType, typename armnn::DataType DataType>
683 static void ClSoftmaxWorkloadTest()
684 {
685  Graph graph;
686  ClWorkloadFactory factory =
687  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
688 
689  auto workload = CreateSoftmaxWorkloadTest<SoftmaxWorkloadType, DataType>(factory, graph);
690 
691  // Checks that inputs/outputs are as we expect them (see definition of ClSoftmaxFloatWorkload).
692  SoftmaxQueueDescriptor queueDescriptor = workload->GetData();
693  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
694  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
695 
696  armnn::TensorInfo tensorInfo({4, 1}, DataType);
698  {
699  tensorInfo.SetQuantizationOffset(0);
700  tensorInfo.SetQuantizationScale(1.f / 256);
701  }
703  {
704  tensorInfo.SetQuantizationOffset(-128);
705  tensorInfo.SetQuantizationScale(1.f / 256);
706  }
707 
708  BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {4, 1}));
709  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {4, 1}));
710 }
711 
712 
713 BOOST_AUTO_TEST_CASE(CreateSoftmaxFloat32WorkloadTest)
714 {
715  ClSoftmaxWorkloadTest<ClSoftmaxWorkload, armnn::DataType::Float32>();
716 }
717 
718 BOOST_AUTO_TEST_CASE(CreateSoftmaxFloat16WorkloadTest)
719 {
720  ClSoftmaxWorkloadTest<ClSoftmaxWorkload, armnn::DataType::Float16>();
721 }
722 
723 BOOST_AUTO_TEST_CASE(CreateSoftmaxQAsymmU8Workload)
724 {
725  ClSoftmaxWorkloadTest<ClSoftmaxWorkload, armnn::DataType::QAsymmU8>();
726 }
727 
728 BOOST_AUTO_TEST_CASE(CreateSoftmaxQAsymmS8Workload)
729 {
730  ClSoftmaxWorkloadTest<ClSoftmaxWorkload, armnn::DataType::QAsymmS8>();
731 }
732 
733 template <typename armnn::DataType DataType>
734 static void ClSplitterWorkloadTest()
735 {
736  Graph graph;
737  ClWorkloadFactory factory =
738  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
739 
740  auto workload = CreateSplitterWorkloadTest<ClSplitterWorkload, DataType>(factory, graph);
741 
742  // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest).
743  SplitterQueueDescriptor queueDescriptor = workload->GetData();
744  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
745  BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {5, 7, 7}));
746 
747  auto outputHandle1 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]);
748  BOOST_TEST(CompareIClTensorHandleShape(outputHandle1, {2, 7, 7}));
749 
750  auto outputHandle2 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[2]);
751  BOOST_TEST(CompareIClTensorHandleShape(outputHandle2, {2, 7, 7}));
752 
753  auto outputHandle0 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
754  BOOST_TEST(CompareIClTensorHandleShape(outputHandle0, {1, 7, 7}));
755 }
756 
757 BOOST_AUTO_TEST_CASE(CreateSplitterFloatWorkload)
758 {
759  ClSplitterWorkloadTest<armnn::DataType::Float32>();
760 }
761 
762 BOOST_AUTO_TEST_CASE(CreateSplitterFloat16Workload)
763 {
764  ClSplitterWorkloadTest<armnn::DataType::Float16>();
765 }
766 
767 template <typename armnn::DataType DataType>
768 static void ClSplitterConcatTest()
769 {
770  // Tests that it is possible to decide which output of the splitter layer
771  // should be lined to which input of the concat layer.
772  // We test that is is possible to specify 0th output
773  // of the splitter to be the 1st input to the concat and the 1st output of the splitter to be 0th input
774  // of the concat.
775 
776  Graph graph;
777  ClWorkloadFactory factory =
778  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
779 
780  auto workloads =
781  CreateSplitterConcatWorkloadTest<ClSplitterWorkload, ClConcatWorkload, DataType>
782  (factory, graph);
783 
784  auto wlSplitter = std::move(workloads.first);
785  auto wlConcat = std::move(workloads.second);
786 
787  //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction.
788  armnn::ClSubTensorHandle* sOut0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
789  armnn::ClSubTensorHandle* sOut1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
790  armnn::ClSubTensorHandle* mIn0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlConcat->GetData().m_Inputs[0]);
791  armnn::ClSubTensorHandle* mIn1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlConcat->GetData().m_Inputs[1]);
792 
793  BOOST_TEST(sOut0);
794  BOOST_TEST(sOut1);
795  BOOST_TEST(mIn0);
796  BOOST_TEST(mIn1);
797 
798  //Fliped order of inputs/outputs.
799  bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0);
800  BOOST_TEST(validDataPointers);
801 
802 
803  //Also make sure that the inputs are subtensors of one tensor and outputs are sub tensors of another tensor.
804  bool validSubTensorParents = (mIn0->GetTensor().parent() == mIn1->GetTensor().parent())
805  && (sOut0->GetTensor().parent() == sOut1->GetTensor().parent());
806 
807  BOOST_TEST(validSubTensorParents);
808 }
809 
810 BOOST_AUTO_TEST_CASE(CreateSplitterConcatFloatWorkload)
811 {
812  ClSplitterConcatTest<armnn::DataType::Float32>();
813 }
814 
815 BOOST_AUTO_TEST_CASE(CreateSplitterConcatFloat16Workload)
816 {
817  ClSplitterConcatTest<armnn::DataType::Float16>();
818 }
819 
820 
821 BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputs)
822 {
823  // Test that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer.
824  // We create a splitter with two outputs. That each of those outputs is used by two different activation layers.
825 
826  Graph graph;
827  ClWorkloadFactory factory =
828  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
829 
830  std::unique_ptr<ClSplitterWorkload> wlSplitter;
831  std::unique_ptr<ClActivationWorkload> wlActiv0_0;
832  std::unique_ptr<ClActivationWorkload> wlActiv0_1;
833  std::unique_ptr<ClActivationWorkload> wlActiv1_0;
834  std::unique_ptr<ClActivationWorkload> wlActiv1_1;
835 
836  CreateSplitterMultipleInputsOneOutputWorkloadTest<ClSplitterWorkload,
837  ClActivationWorkload, armnn::DataType::Float32>(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1,
838  wlActiv1_0, wlActiv1_1);
839 
840  //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction.
841  armnn::ClSubTensorHandle* sOut0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
842  armnn::ClSubTensorHandle* sOut1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
843  armnn::ClSubTensorHandle* activ0_0Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv0_0->GetData().m_Inputs[0]);
844  armnn::ClSubTensorHandle* activ0_1Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv0_1->GetData().m_Inputs[0]);
845  armnn::ClSubTensorHandle* activ1_0Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv1_0->GetData().m_Inputs[0]);
846  armnn::ClSubTensorHandle* activ1_1Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv1_1->GetData().m_Inputs[0]);
847 
848 
849  BOOST_TEST(sOut0);
850  BOOST_TEST(sOut1);
851  BOOST_TEST(activ0_0Im);
852  BOOST_TEST(activ0_1Im);
853  BOOST_TEST(activ1_0Im);
854  BOOST_TEST(activ1_1Im);
855 
856  bool validDataPointers = (sOut0 == activ0_0Im) && (sOut0 == activ0_1Im) &&
857  (sOut1 == activ1_0Im) && (sOut1 == activ1_1Im);
858 
859  BOOST_TEST(validDataPointers);
860 }
861 
862 #if defined(ARMNNREF_ENABLED)
863 
864 // This test unit needs the reference backend, it's not available if the reference backend is not built
865 
866 BOOST_AUTO_TEST_CASE(CreateMemCopyWorkloadsCl)
867 {
868  ClWorkloadFactory factory =
869  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
870 
871  CreateMemCopyWorkloads<IClTensorHandle>(factory);
872 }
873 
874 #endif
875 
876 template <typename L2NormalizationWorkloadType, typename armnn::DataType DataType>
877 static void ClL2NormalizationWorkloadTest(DataLayout dataLayout)
878 {
879  Graph graph;
880  ClWorkloadFactory factory =
881  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
882 
883  auto workload =
884  CreateL2NormalizationWorkloadTest<L2NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
885 
886  // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
887  L2NormalizationQueueDescriptor queueDescriptor = workload->GetData();
888  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
889  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
890 
891  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 5, 20, 50, 67 })
892  : std::initializer_list<unsigned int>({ 5, 50, 67, 20 });
893  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 5, 20, 50, 67 })
894  : std::initializer_list<unsigned int>({ 5, 50, 67, 20 });
895 
896  BOOST_TEST((inputHandle->GetShape() == inputShape));
897  BOOST_TEST((outputHandle->GetShape() == outputShape));
898 }
899 
900 BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloatNchwWorkload)
901 {
902  ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
903 }
904 
905 BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloatNhwcWorkload)
906 {
907  ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
908 }
909 
910 BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat16NchwWorkload)
911 {
912  ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
913 }
914 
915 BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat16NhwcWorkload)
916 {
917  ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
918 }
919 
920 template <typename LogSoftmaxWorkloadType, typename armnn::DataType DataType>
921 static void ClCreateLogSoftmaxWorkloadTest()
922 {
923  Graph graph;
924  ClWorkloadFactory factory =
925  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
926 
927  auto workload = CreateLogSoftmaxWorkloadTest<LogSoftmaxWorkloadType, DataType>(factory, graph);
928 
929  // Checks that outputs and inputs are as we expect them (see definition of CreateLogSoftmaxWorkloadTest).
930  LogSoftmaxQueueDescriptor queueDescriptor = workload->GetData();
931  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
932  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
933 
934  BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {4, 1}));
935  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {4, 1}));
936 }
937 
938 BOOST_AUTO_TEST_CASE(CreateLogSoftmaxFloat32WorkloadTest)
939 {
940  ClCreateLogSoftmaxWorkloadTest<ClLogSoftmaxWorkload, armnn::DataType::Float32>();
941 }
942 
943 template <typename LstmWorkloadType>
944 static void ClCreateLstmWorkloadTest()
945 {
946  Graph graph;
947  ClWorkloadFactory factory =
948  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
949 
950  auto workload = CreateLstmWorkloadTest<LstmWorkloadType>(factory, graph);
951 
952  LstmQueueDescriptor queueDescriptor = workload->GetData();
953  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
954  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]);
955  BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 2 }));
956  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 4 }));
957 }
958 
959 BOOST_AUTO_TEST_CASE(CreateLSTMWorkloadFloatWorkload)
960 {
961  ClCreateLstmWorkloadTest<ClLstmFloatWorkload>();
962 }
963 
964 template <typename ResizeWorkloadType, typename armnn::DataType DataType>
965 static void ClResizeWorkloadTest(DataLayout dataLayout)
966 {
967  Graph graph;
968  ClWorkloadFactory factory =
969  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
970 
971  auto workload = CreateResizeBilinearWorkloadTest<ResizeWorkloadType, DataType>(factory, graph, dataLayout);
972 
973  auto queueDescriptor = workload->GetData();
974 
975  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
976  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
977 
978  switch (dataLayout)
979  {
980  case DataLayout::NHWC:
981  BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 4, 4, 3 }));
982  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 2, 2, 3 }));
983  break;
984  case DataLayout::NCHW:
985  default:
986  BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 3, 4, 4 }));
987  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 3, 2, 2 }));
988  }
989 }
990 
991 BOOST_AUTO_TEST_CASE(CreateResizeFloat32NchwWorkload)
992 {
993  ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
994 }
995 
996 BOOST_AUTO_TEST_CASE(CreateResizeFloat16NchwWorkload)
997 {
998  ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
999 }
1000 
1001 BOOST_AUTO_TEST_CASE(CreateResizeUint8NchwWorkload)
1002 {
1003  ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::QAsymmU8>(DataLayout::NCHW);
1004 }
1005 
1006 BOOST_AUTO_TEST_CASE(CreateResizeFloat32NhwcWorkload)
1007 {
1008  ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
1009 }
1010 
1011 BOOST_AUTO_TEST_CASE(CreateResizeFloat16NhwcWorkload)
1012 {
1013  ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
1014 }
1015 
1016 BOOST_AUTO_TEST_CASE(CreateResizeUint8NhwcWorkload)
1017 {
1018  ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::QAsymmU8>(DataLayout::NHWC);
1019 }
1020 
1021 template <typename MeanWorkloadType, typename armnn::DataType DataType>
1022 static void ClMeanWorkloadTest()
1023 {
1024  Graph graph;
1025  ClWorkloadFactory factory =
1026  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1027 
1028  auto workload = CreateMeanWorkloadTest<MeanWorkloadType, DataType>(factory, graph);
1029 
1030  // Checks that inputs/outputs are as we expect them (see definition of CreateMeanWorkloadTest).
1031  MeanQueueDescriptor queueDescriptor = workload->GetData();
1032  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1033  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1034 
1035  // The first dimension (batch size) in both input and output is singular thus it has been reduced by ACL.
1036  BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 1, 3, 7, 4 }));
1037  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 1, 4 }));
1038 }
1039 
1040 BOOST_AUTO_TEST_CASE(CreateMeanFloat32Workload)
1041 {
1042  ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::Float32>();
1043 }
1044 
1045 BOOST_AUTO_TEST_CASE(CreateMeanFloat16Workload)
1046 {
1047  ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::Float16>();
1048 }
1049 
1050 BOOST_AUTO_TEST_CASE(CreateMeanUint8Workload)
1051 {
1052  ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::QAsymmU8>();
1053 }
1054 
1055 template <typename ConcatWorkloadType, armnn::DataType DataType>
1056 static void ClCreateConcatWorkloadTest(std::initializer_list<unsigned int> outputShape,
1057  unsigned int concatAxis)
1058 {
1059  Graph graph;
1060  ClWorkloadFactory factory =
1061  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1062 
1063  auto workload = CreateConcatWorkloadTest<ConcatWorkloadType, DataType>(factory, graph, outputShape, concatAxis);
1064 
1065  ConcatQueueDescriptor queueDescriptor = workload->GetData();
1066  auto inputHandle0 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1067  auto inputHandle1 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
1068  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1069 
1070  BOOST_TEST(CompareIClTensorHandleShape(inputHandle0, { 2, 3, 2, 5 }));
1071  BOOST_TEST(CompareIClTensorHandleShape(inputHandle1, { 2, 3, 2, 5 }));
1072  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, outputShape));
1073 }
1074 
1075 BOOST_AUTO_TEST_CASE(CreateConcatDim0Float32Workload)
1076 {
1077  ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::Float32>({ 4, 3, 2, 5 }, 0);
1078 }
1079 
1080 BOOST_AUTO_TEST_CASE(CreateConcatDim1Float32Workload)
1081 {
1082  ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::Float32>({ 2, 6, 2, 5 }, 1);
1083 }
1084 
1085 BOOST_AUTO_TEST_CASE(CreateConcatDim3Float32Workload)
1086 {
1087  ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::Float32>({ 2, 3, 2, 10 }, 3);
1088 }
1089 
1090 BOOST_AUTO_TEST_CASE(CreateConcatDim0Uint8Workload)
1091 {
1092  ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::QAsymmU8>({ 4, 3, 2, 5 }, 0);
1093 }
1094 
1095 BOOST_AUTO_TEST_CASE(CreateConcatDim1Uint8Workload)
1096 {
1097  ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::QAsymmU8>({ 2, 6, 2, 5 }, 1);
1098 }
1099 
1100 BOOST_AUTO_TEST_CASE(CreateConcatDim3Uint8Workload)
1101 {
1102  ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::QAsymmU8>({ 2, 3, 2, 10 }, 3);
1103 }
1104 
1105 template <typename SpaceToDepthWorkloadType, typename armnn::DataType DataType>
1106 static void ClSpaceToDepthWorkloadTest()
1107 {
1108  Graph graph;
1109  ClWorkloadFactory factory =
1110  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1111 
1112  auto workload = CreateSpaceToDepthWorkloadTest<SpaceToDepthWorkloadType, DataType>(factory, graph);
1113 
1114  SpaceToDepthQueueDescriptor queueDescriptor = workload->GetData();
1115  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1116  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1117 
1118  BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 1, 2, 2, 1 }));
1119  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 1, 1, 1, 4 }));
1120 }
1121 
1122 BOOST_AUTO_TEST_CASE(CreateSpaceToDepthFloat32Workload)
1123 {
1124  ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::Float32>();
1125 }
1126 
1127 BOOST_AUTO_TEST_CASE(CreateSpaceToDepthFloat16Workload)
1128 {
1129  ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::Float16>();
1130 }
1131 
1132 BOOST_AUTO_TEST_CASE(CreateSpaceToDepthQAsymm8Workload)
1133 {
1134  ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::QAsymmU8>();
1135 }
1136 
1137 BOOST_AUTO_TEST_CASE(CreateSpaceToDepthQSymm16Workload)
1138 {
1139  ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::QSymmS16>();
1140 }
1141 
1142 template <armnn::DataType DataType>
1143 static void ClCreateStackWorkloadTest(const std::initializer_list<unsigned int>& inputShape,
1144  const std::initializer_list<unsigned int>& outputShape,
1145  unsigned int axis,
1146  unsigned int numInputs)
1147 {
1148  armnn::Graph graph;
1149  ClWorkloadFactory factory =
1150  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1151 
1152  auto workload = CreateStackWorkloadTest<ClStackWorkload, DataType>(factory,
1153  graph,
1154  TensorShape(inputShape),
1155  TensorShape(outputShape),
1156  axis,
1157  numInputs);
1158 
1159  // Check inputs and output are as expected
1160  StackQueueDescriptor queueDescriptor = workload->GetData();
1161  for (unsigned int i = 0; i < numInputs; ++i)
1162  {
1163  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[i]);
1164  BOOST_TEST(CompareIClTensorHandleShape(inputHandle, inputShape));
1165  }
1166  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1167  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, outputShape));
1168 }
1169 
1170 BOOST_AUTO_TEST_CASE(CreateStackFloat32Workload)
1171 {
1172  ClCreateStackWorkloadTest<armnn::DataType::Float32>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
1173 }
1174 
1175 BOOST_AUTO_TEST_CASE(CreateStackFloat16Workload)
1176 {
1177  ClCreateStackWorkloadTest<armnn::DataType::Float16>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
1178 }
1179 
1180 BOOST_AUTO_TEST_CASE(CreateStackUint8Workload)
1181 {
1182  ClCreateStackWorkloadTest<armnn::DataType::QAsymmU8>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
1183 }
1184 
1185 
1186 template <typename QLstmWorkloadType>
1187 static void ClCreateQLstmWorkloadTest()
1188 {
1189  Graph graph;
1190  ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1191 
1192  auto workload = CreateQLstmWorkloadTest<QLstmWorkloadType>(factory, graph);
1193  QLstmQueueDescriptor queueDescriptor = workload->GetData();
1194 
1195  IAclTensorHandle* inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
1196  BOOST_TEST((inputHandle->GetShape() == TensorShape({2, 4})));
1197  BOOST_TEST((inputHandle->GetDataType() == arm_compute::DataType::QASYMM8_SIGNED));
1198 
1199  IAclTensorHandle* cellStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
1200  BOOST_TEST((cellStateOutHandle->GetShape() == TensorShape({2, 4})));
1201  BOOST_TEST((cellStateOutHandle->GetDataType() == arm_compute::DataType::QSYMM16));
1202 
1203  IAclTensorHandle* outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[2]);
1204  BOOST_TEST((outputHandle->GetShape() == TensorShape({2, 4})));
1205  BOOST_TEST((outputHandle->GetDataType() == arm_compute::DataType::QASYMM8_SIGNED));
1206 }
1207 
1208 BOOST_AUTO_TEST_CASE(CreateQLstmWorkloadTest)
1209 {
1210  ClCreateQLstmWorkloadTest<ClQLstmWorkload>();
1211 }
1212 
1213 template <typename QuantizedLstmWorkloadType>
1214 static void ClCreateQuantizedLstmWorkloadTest()
1215 {
1216  using namespace armnn::armcomputetensorutils;
1217 
1218  Graph graph;
1219  ClWorkloadFactory factory =
1220  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1221 
1222  auto workload = CreateQuantizedLstmWorkloadTest<QuantizedLstmWorkloadType>(factory, graph);
1223 
1224  QuantizedLstmQueueDescriptor queueDescriptor = workload->GetData();
1225 
1226  IAclTensorHandle* inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
1227  BOOST_TEST((inputHandle->GetShape() == TensorShape({2, 2})));
1228  BOOST_TEST((inputHandle->GetDataType() == arm_compute::DataType::QASYMM8));
1229 
1230  IAclTensorHandle* cellStateInHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
1231  BOOST_TEST((cellStateInHandle->GetShape() == TensorShape({2, 4})));
1232  BOOST_TEST((cellStateInHandle->GetDataType() == arm_compute::DataType::QSYMM16));
1233 
1234  IAclTensorHandle* outputStateInHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[2]);
1235  BOOST_TEST((outputStateInHandle->GetShape() == TensorShape({2, 4})));
1236  BOOST_TEST((outputStateInHandle->GetDataType() == arm_compute::DataType::QASYMM8));
1237 
1238  IAclTensorHandle* cellStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
1239  BOOST_TEST((cellStateOutHandle->GetShape() == TensorShape({2, 4})));
1240  BOOST_TEST((cellStateOutHandle->GetDataType() == arm_compute::DataType::QSYMM16));
1241 
1242  IAclTensorHandle* outputStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
1243  BOOST_TEST((outputStateOutHandle->GetShape() == TensorShape({2, 4})));
1244  BOOST_TEST((outputStateOutHandle->GetDataType() == arm_compute::DataType::QASYMM8));
1245 }
1246 
1247 BOOST_AUTO_TEST_CASE(CreateQuantizedLstmWorkload)
1248 {
1249  ClCreateQuantizedLstmWorkloadTest<ClQuantizedLstmWorkload>();
1250 }
1251 
uint32_t m_PadBottom
Padding bottom value in the height dimension.
bool m_BiasEnabled
Enable/disable bias.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
DataLayout
Definition: Types.hpp:50
std::vector< BackendOptions > ModelOptions
BOOST_AUTO_TEST_CASE(CreateActivationFloatWorkload)
A Convolution2dDescriptor for the Convolution2dLayer.
uint32_t m_PadRight
Padding right value in the width dimension.
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
virtual arm_compute::DataType GetDataType() const =0
uint32_t m_PadTop
Padding top value in the height dimension.
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
DataType
Definition: Types.hpp:32
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
void AllocateAndCopyDataToITensorHandle(armnn::ITensorHandle *tensorHandle, const void *memory)
BOOST_AUTO_TEST_CASE(CheckConvolution2dLayer)
virtual TensorShape GetShape() const =0
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
This layer represents an addition operation.
Struct for the users to pass backend specific options.
BOOST_AUTO_TEST_SUITE_END()
This layer represents a subtraction operation.
std::vector< ITensorHandle * > m_Outputs
boost::test_tools::predicate_result CompareIClTensorHandleShape(IClTensorHandle *tensorHandle, std::initializer_list< unsigned int > expectedDimensions)
This layer represents a division operation.
UnaryOperation
Definition: Types.hpp:94
Contains information about inputs and outputs to a layer.
void SetQuantizationOffset(int32_t offset)
Definition: Tensor.cpp:480
std::vector< ITensorHandle * > m_Inputs
This layer represents a multiplication operation.
arm_compute::CLSubTensor & GetTensor() override
uint32_t m_PadLeft
Padding left value in the width dimension.
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below...
Definition: Types.hpp:419