ArmNN
 21.08
ClCreateWorkloadTests.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
8 
15 
18 
19 #include <cl/ClTensorHandle.hpp>
20 #include <cl/ClWorkloadFactory.hpp>
23 
24 #include <doctest/doctest.h>
25 
27  std::initializer_list<unsigned int> expectedDimensions)
28 {
29  return CompareTensorHandleShape<IClTensorHandle>(tensorHandle, expectedDimensions);
30 }
31 
32 TEST_SUITE("CreateWorkloadCl")
33 {
34 template <armnn::DataType DataType>
35 static void ClCreateActivationWorkloadTest()
36 {
37  Graph graph;
38  ClWorkloadFactory factory =
39  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
40 
41  auto workload = CreateActivationWorkloadTest<ClActivationWorkload, DataType>(factory, graph);
42 
43  // Checks that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest).
44  ActivationQueueDescriptor queueDescriptor = workload->GetData();
45  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
46  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
47 
48  auto predResult = CompareIClTensorHandleShape(inputHandle, {1, 1});
49  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
50 
51  predResult = CompareIClTensorHandleShape(outputHandle, {1, 1});
52  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
53 }
54 
55 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateActivationFloatWorkload")
56 {
57  ClCreateActivationWorkloadTest<armnn::DataType::Float32>();
58 }
59 
60 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateActivationFloat16Workload")
61 {
62  ClCreateActivationWorkloadTest<armnn::DataType::Float16>();
63 }
64 
65 template <typename WorkloadType,
66  typename DescriptorType,
67  typename LayerType,
69 static void ClCreateElementwiseWorkloadTest()
70 {
71  Graph graph;
72  ClWorkloadFactory factory =
73  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
74 
75  auto workload = CreateElementwiseWorkloadTest<WorkloadType, DescriptorType, LayerType, DataType>(factory, graph);
76 
77  // Checks that inputs/outputs are as we expect them (see definition of CreateElementwiseWorkloadTest).
78  DescriptorType queueDescriptor = workload->GetData();
79  auto inputHandle1 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
80  auto inputHandle2 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
81  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
82  auto predResult = CompareIClTensorHandleShape(inputHandle1, {2, 3});
83  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
84  predResult = CompareIClTensorHandleShape(inputHandle2, {2, 3});
85  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
86  predResult = CompareIClTensorHandleShape(outputHandle, {2, 3});
87  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
88 }
89 
90 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateAdditionFloatWorkload")
91 {
92  ClCreateElementwiseWorkloadTest<ClAdditionWorkload,
96 }
97 
98 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateAdditionFloat16Workload")
99 {
100  ClCreateElementwiseWorkloadTest<ClAdditionWorkload,
104 }
105 
106 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSubtractionFloatWorkload")
107 {
108  ClCreateElementwiseWorkloadTest<ClSubtractionWorkload,
112 }
113 
114 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSubtractionFloat16Workload")
115 {
116  ClCreateElementwiseWorkloadTest<ClSubtractionWorkload,
120 }
121 
122 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMultiplicationFloatWorkloadTest")
123 {
124  ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload,
128 }
129 
130 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMultiplicationFloat16WorkloadTest")
131 {
132  ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload,
136 }
137 
138 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMultiplicationUint8WorkloadTest")
139 {
140  ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload,
144 }
145 
146 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDivisionFloatWorkloadTest")
147 {
148  ClCreateElementwiseWorkloadTest<ClDivisionWorkload,
152 }
153 
154 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDivisionFloat16WorkloadTest")
155 {
156  ClCreateElementwiseWorkloadTest<ClDivisionWorkload,
160 }
161 
162 template <typename WorkloadType,
163  typename DescriptorType,
165 static void ClCreateElementwiseUnaryWorkloadTest(armnn::UnaryOperation op)
166 {
167  Graph graph;
168  ClWorkloadFactory factory =
169  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
170 
171  auto workload = CreateElementwiseUnaryWorkloadTest<WorkloadType, DescriptorType, DataType>(factory, graph, op);
172 
173  DescriptorType queueDescriptor = workload->GetData();
174 
175  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
176  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
177 
178  auto predResult = CompareIClTensorHandleShape(inputHandle, {2, 3});
179  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
180 
181  predResult = CompareIClTensorHandleShape(outputHandle, {2, 3});
182  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
183 }
184 
185 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateRsqrtFloat32WorkloadTest")
186 {
187  ClCreateElementwiseUnaryWorkloadTest<ClRsqrtWorkload, RsqrtQueueDescriptor, armnn::DataType::Float32>(
188  UnaryOperation::Rsqrt);
189 }
190 
191 template <typename BatchNormalizationWorkloadType, armnn::DataType DataType>
192 static void ClCreateBatchNormalizationWorkloadTest(DataLayout dataLayout)
193 {
194  Graph graph;
195  ClWorkloadFactory factory =
196  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
197 
198  auto workload = CreateBatchNormalizationWorkloadTest<BatchNormalizationWorkloadType, DataType>
199  (factory, graph, dataLayout);
200 
201  // Checks that inputs/outputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest).
202  BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData();
203  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
204  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
205 
206  armnn::PredicateResult predResult(true);
207  switch (dataLayout)
208  {
209  case DataLayout::NHWC:
210  predResult = CompareIClTensorHandleShape(inputHandle, { 2, 4, 4, 3 });
211  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
212  predResult = CompareIClTensorHandleShape(outputHandle, { 2, 4, 4, 3 });
213  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
214  break;
215  default: // NCHW
216  predResult = CompareIClTensorHandleShape(inputHandle, { 2, 3, 4, 4 });
217  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
218  predResult = CompareIClTensorHandleShape(outputHandle, { 2, 3, 4, 4 });
219  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
220  }
221 }
222 
223 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateBatchNormalizationFloatNchwWorkload")
224 {
225  ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
226  armnn::DataType::Float32>(DataLayout::NCHW);
227 }
228 
229 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateBatchNormalizationFloat16NchwWorkload")
230 {
231  ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
232  armnn::DataType::Float16>(DataLayout::NCHW);
233 }
234 
235 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateBatchNormalizationFloatNhwcWorkload")
236 {
237  ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
238  armnn::DataType::Float32>(DataLayout::NHWC);
239 }
240 
241 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateBatchNormalizationNhwcFloat16NhwcWorkload")
242 {
243  ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
244  armnn::DataType::Float16>(DataLayout::NHWC);
245 }
246 
247 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvertFp16ToFp32Workload")
248 {
249  Graph graph;
250  ClWorkloadFactory factory =
251  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
252 
253  auto workload = CreateConvertFp16ToFp32WorkloadTest<ClConvertFp16ToFp32Workload>(factory, graph);
254 
255  ConvertFp16ToFp32QueueDescriptor queueDescriptor = workload->GetData();
256  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
257  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
258  auto predResult = CompareIClTensorHandleShape(inputHandle, {1, 3, 2, 3});
259  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
260  predResult = CompareIClTensorHandleShape(outputHandle, {1, 3, 2, 3});
261  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
262  CHECK((inputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F16));
263  CHECK((outputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F32));
264 }
265 
266 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvertFp32ToFp16Workload")
267 {
268  Graph graph;
269  ClWorkloadFactory factory =
270  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
271 
272  auto workload = CreateConvertFp32ToFp16WorkloadTest<ClConvertFp32ToFp16Workload>(factory, graph);
273 
274  ConvertFp32ToFp16QueueDescriptor queueDescriptor = workload->GetData();
275  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
276  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
277 
278  auto predResult = CompareIClTensorHandleShape(inputHandle, {1, 3, 2, 3});
279  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
280  predResult = CompareIClTensorHandleShape(outputHandle, {1, 3, 2, 3});
281  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
282  CHECK((inputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F32));
283  CHECK((outputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F16));
284 }
285 
286 template <typename Convolution2dWorkloadType, typename armnn::DataType DataType>
287 static void ClConvolution2dWorkloadTest(DataLayout dataLayout)
288 {
289  Graph graph;
290  ClWorkloadFactory factory =
291  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
292 
293  auto workload = CreateConvolution2dWorkloadTest<ClConvolution2dWorkload, DataType>(factory,
294  graph,
295  dataLayout);
296 
297  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({2, 3, 8, 16})
298  : std::initializer_list<unsigned int>({2, 8, 16, 3});
299  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({2, 2, 2, 10})
300  : std::initializer_list<unsigned int>({2, 2, 10, 2});
301 
302  // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest).
303  Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
304  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
305  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
306  CHECK((inputHandle->GetShape() == inputShape));
307  CHECK((outputHandle->GetShape() == outputShape));
308 }
309 
310 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvolution2dFloatNchwWorkload")
311 {
312  ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
313 }
314 
315 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvolution2dFloatNhwcWorkload")
316 {
317  ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
318 }
319 
320 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvolution2dFloat16NchwWorkload")
321 {
322  ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
323 }
324 
325 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvolution2dFloat16NhwcWorkload")
326 {
327  ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
328 }
329 
330 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvolution2dFastMathEnabledWorkload")
331 {
332  Graph graph;
333 
334  using ModelOptions = std::vector<BackendOptions>;
335  ModelOptions modelOptions = {};
336  BackendOptions gpuAcc("GpuAcc",
337  {
338  { "FastMathEnabled", true }
339  });
340  modelOptions.push_back(gpuAcc);
341 
342  ClWorkloadFactory factory =
343  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager(), modelOptions);
344 
345  auto workload =
346  CreateConvolution2dWorkloadFastMathTest<ClConvolution2dWorkload, armnn::DataType::Float32>(factory,
347  graph,
348  DataLayout::NCHW,
349  modelOptions);
350 
351  ARMNN_ASSERT(workload != nullptr);
352  auto conv2dWorkload = PolymorphicDowncast<ClConvolution2dWorkload*>(workload.get());
353  IgnoreUnused(conv2dWorkload);
354  ARMNN_ASSERT(conv2dWorkload != nullptr);
355  ARMNN_ASSERT(conv2dWorkload->GetConvolutionMethod() == arm_compute::ConvolutionMethod::WINOGRAD);
356 }
357 
358 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvolution2dClCompiledContextWorkload")
359 {
360  using namespace armnn;
361 
362  const DataType inputType = DataType::QAsymmU8;
363  const DataType kernelType = DataType::QSymmS8;
364  const DataType biasType = DataType::Signed32;
365 
366  TensorInfo inputInfo ({ 1, 3, 1, 2 }, inputType, 0.5f, 128);
367  TensorInfo outputInfo({ 1, 3, 1, 3 }, inputType, 1.0f, 128);
368 
369  const std::vector<float> quantScales{ 0.5f, 0.75f, 1.0f };
370  constexpr unsigned int quantDimension = 0;
371 
372  TensorInfo kernelInfo({ 3, 1, 1, 2 }, kernelType, quantScales, quantDimension);
373 
374  const std::vector<float> biasQuantScales{ 0.25f, 0.375f, 0.5f };
375  TensorInfo biasInfo({ 3 }, biasType, biasQuantScales, quantDimension);
376 
377  std::vector<uint8_t> inputData =
378  {
379  138, 108, 138, 108, 138, 108
380  };
381 
382  std::vector<int8_t> kernelData =
383  {
384  1, 2, 1, 2, 1, 2
385  };
386 
387  std::vector<int32_t> biasData =
388  {
389  4, 4, 4
390  };
391 
392  std::vector<uint8_t> expectedOutputData =
393  {
394  121, 118, 115, 121, 118, 115, 121, 118, 115
395  };
396 
397 
398  Convolution2dDescriptor descriptor;
399  descriptor.m_StrideX = 1;
400  descriptor.m_StrideY = 1;
401  descriptor.m_PadLeft = 0;
402  descriptor.m_PadRight = 0;
403  descriptor.m_PadTop = 0;
404  descriptor.m_PadBottom = 0;
405  descriptor.m_BiasEnabled = true;
406  descriptor.m_DataLayout = DataLayout::NHWC;
407 
408  auto memoryManager = ClWorkloadFactoryHelper::GetMemoryManager();
409  auto clMemoryManager = armnn::PolymorphicPointerDowncast<armnn::ClMemoryManager>(memoryManager);
410  auto tensorHandleFactory = ClWorkloadFactoryHelper::GetTensorHandleFactory(memoryManager);
411 
412  std::unique_ptr<ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputInfo);
413  std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo);
414 
415 
416  WorkloadInfo workloadInfo;
417  ScopedTensorHandle weightTensor(kernelInfo);
418  ScopedTensorHandle biasTensor(biasInfo);
419 
420  AllocateAndCopyDataToITensorHandle(&weightTensor, kernelData.data());
421  AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
422 
423  Convolution2dQueueDescriptor queueDescriptor;
424  queueDescriptor.m_Parameters = descriptor;
425  queueDescriptor.m_Weight = &weightTensor;
426  queueDescriptor.m_Bias = &biasTensor;
427 
428  AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get());
429  AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get());
430 
431  // Initialize our m_CLCompileContext using default device and context
432  auto context = arm_compute::CLKernelLibrary::get().context();
433  auto device = arm_compute::CLKernelLibrary::get().get_device();
434  auto clCompileContext = arm_compute::CLCompileContext(context, device);
435 
436 
437 
438  // Check built programs are empty in context
439  CHECK(clCompileContext.get_built_programs().empty());
440 
441  auto workload = std::make_unique<ClConvolution2dWorkload>(queueDescriptor,
442  workloadInfo,
443  clMemoryManager->GetIntraLayerManager(),
444  clCompileContext);
445  ARMNN_ASSERT(workload != nullptr);
446  // Check built programs are not empty in context
447  CHECK(!clCompileContext.get_built_programs().empty());
448 }
449 
450 template <typename DepthwiseConvolutionWorkloadType, typename armnn::DataType DataType>
451 static void ClDepthwiseConvolutionWorkloadTest(DataLayout dataLayout)
452 {
453  Graph graph;
454  ClWorkloadFactory factory =
455  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
456 
457  auto workload = CreateDepthwiseConvolution2dWorkloadTest<DepthwiseConvolutionWorkloadType, DataType>
458  (factory, graph, dataLayout);
459 
460  // Checks that inputs/outputs are as we expect them (see definition of CreateDepthwiseConvolution2dWorkloadTest).
461  DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData();
462  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
463  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
464 
465  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
466  : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
467  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
468  : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
469 
470  CHECK((inputHandle->GetShape() == inputShape));
471  CHECK((outputHandle->GetShape() == outputShape));
472 }
473 
474 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDepthwiseConvolutionFloat32NhwcWorkload")
475 {
476  ClDepthwiseConvolutionWorkloadTest<ClDepthwiseConvolutionWorkload, DataType::Float32>(DataLayout::NHWC);
477 }
478 
479 template <typename Convolution2dWorkloadType, typename armnn::DataType DataType>
480 static void ClDirectConvolution2dWorkloadTest()
481 {
482  Graph graph;
483  ClWorkloadFactory factory =
484  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
485 
486  auto workload = CreateDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, DataType>(factory, graph);
487 
488  // Checks that outputs and inputs are as we expect them (see definition of CreateDirectConvolution2dWorkloadTest).
489  Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
490  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
491  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
492  auto predResult = CompareIClTensorHandleShape(inputHandle, {2, 3, 6, 6});
493  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
494  predResult = CompareIClTensorHandleShape(outputHandle, {2, 2, 6, 6});
495  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
496 }
497 
498 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDirectConvolution2dFloatWorkload")
499 {
500  ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>();
501 }
502 
503 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDirectConvolution2dFloat16Workload")
504 {
505  ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>();
506 }
507 
508 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDirectConvolution2dUint8Workload")
509 {
510  ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::QAsymmU8>();
511 }
512 
513 template <typename FullyConnectedWorkloadType, typename armnn::DataType DataType>
514 static void ClCreateFullyConnectedWorkloadTest()
515 {
516  Graph graph;
517  ClWorkloadFactory factory =
518  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
519 
520  auto workload =
521  CreateFullyConnectedWorkloadTest<FullyConnectedWorkloadType, DataType>(factory, graph);
522 
523  // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest).
524  FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
525  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
526  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
527  auto predResult = CompareIClTensorHandleShape(inputHandle, {3, 1, 4, 5});
528  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
529  predResult = CompareIClTensorHandleShape(outputHandle, {3, 7});
530  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
531 }
532 
533 
534 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateFullyConnectedFloatWorkloadTest")
535 {
536  ClCreateFullyConnectedWorkloadTest<ClFullyConnectedWorkload, armnn::DataType::Float32>();
537 }
538 
539 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateFullyConnectedFloat16WorkloadTest")
540 {
541  ClCreateFullyConnectedWorkloadTest<ClFullyConnectedWorkload, armnn::DataType::Float16>();
542 }
543 
544 template <typename NormalizationWorkloadType, typename armnn::DataType DataType>
545 static void ClNormalizationWorkloadTest(DataLayout dataLayout)
546 {
547  Graph graph;
548  ClWorkloadFactory factory =
549  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
550 
551  auto workload = CreateNormalizationWorkloadTest<NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
552 
553  // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
554  NormalizationQueueDescriptor queueDescriptor = workload->GetData();
555  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
556  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
557 
558  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 5, 5, 1})
559  : std::initializer_list<unsigned int>({3, 1, 5, 5});
560  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 5, 5, 1})
561  : std::initializer_list<unsigned int>({3, 1, 5, 5});
562 
563  CHECK((inputHandle->GetShape() == inputShape));
564  CHECK((outputHandle->GetShape() == outputShape));
565 }
566 
567 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateNormalizationFloat32NchwWorkload")
568 {
569  ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
570 }
571 
572 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateNormalizationFloat16NchwWorkload")
573 {
574  ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
575 }
576 
577 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateNormalizationFloat32NhwcWorkload")
578 {
579  ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
580 }
581 
582 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateNormalizationFloat16NhwcWorkload")
583 {
584  ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
585 }
586 
587 template <typename armnn::DataType DataType>
588 static void ClPooling2dWorkloadTest(DataLayout dataLayout)
589 {
590  Graph graph;
591  ClWorkloadFactory factory =
592  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
593 
594  auto workload = CreatePooling2dWorkloadTest<ClPooling2dWorkload, DataType>(factory, graph, dataLayout);
595 
596  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 2, 5, 5})
597  : std::initializer_list<unsigned int>({3, 5, 5, 2});
598  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 2, 2, 4})
599  : std::initializer_list<unsigned int>({3, 2, 4, 2});
600 
601  // Check that inputs/outputs are as we expect them (see definition of CreatePooling2dWorkloadTest).
602  Pooling2dQueueDescriptor queueDescriptor = workload->GetData();
603  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
604  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
605 
606  CHECK((inputHandle->GetShape() == inputShape));
607  CHECK((outputHandle->GetShape() == outputShape));
608 }
609 
610 TEST_CASE_FIXTURE(ClContextControlFixture, "CreatePooling2dFloatNchwWorkload")
611 {
612  ClPooling2dWorkloadTest<armnn::DataType::Float32>(DataLayout::NCHW);
613 }
614 
615 TEST_CASE_FIXTURE(ClContextControlFixture, "CreatePooling2dFloatNhwcWorkload")
616 {
617  ClPooling2dWorkloadTest<armnn::DataType::Float32>(DataLayout::NHWC);
618 }
619 
620 TEST_CASE_FIXTURE(ClContextControlFixture, "CreatePooling2dFloat16NchwWorkload")
621 {
622  ClPooling2dWorkloadTest<armnn::DataType::Float16>(DataLayout::NCHW);
623 }
624 
625 TEST_CASE_FIXTURE(ClContextControlFixture, "CreatePooling2dFloat16NhwcWorkload")
626 {
627  ClPooling2dWorkloadTest<armnn::DataType::Float16>(DataLayout::NHWC);
628 }
629 
630 static void ClCreatePreluWorkloadTest(const armnn::TensorShape& inputShape,
631  const armnn::TensorShape& alphaShape,
632  const armnn::TensorShape& outputShape,
633  armnn::DataType dataType)
634 {
635  Graph graph;
636  ClWorkloadFactory factory =
637  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
638 
639  auto workload = CreatePreluWorkloadTest<ClPreluWorkload>(factory,
640  graph,
641  inputShape,
642  alphaShape,
643  outputShape,
644  dataType);
645 
646  // Checks that outputs and inputs are as we expect them (see definition of CreatePreluWorkloadTest).
647  PreluQueueDescriptor queueDescriptor = workload->GetData();
648  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
649  auto alphaHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
650  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
651 
652  CHECK((inputHandle->GetShape() == inputShape));
653  CHECK((alphaHandle->GetShape() == alphaShape));
654  CHECK((outputHandle->GetShape() == outputShape));
655 }
656 
657 TEST_CASE_FIXTURE(ClContextControlFixture, "CreatePreluFloat16Workload")
658 {
659  ClCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float16);
660 }
661 
662 TEST_CASE_FIXTURE(ClContextControlFixture, "CreatePreluFloatWorkload")
663 {
664  ClCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float32);
665 }
666 
667 TEST_CASE_FIXTURE(ClContextControlFixture, "CreatePreluUint8Workload")
668 {
669  ClCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::QAsymmU8);
670 }
671 
672 template <typename armnn::DataType DataType>
673 static void ClCreateReshapeWorkloadTest()
674 {
675  Graph graph;
676  ClWorkloadFactory factory =
677  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
678 
679  auto workload = CreateReshapeWorkloadTest<ClReshapeWorkload, DataType>(factory, graph);
680 
681  // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest).
682  ReshapeQueueDescriptor queueDescriptor = workload->GetData();
683  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
684  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
685 
686  auto predResult = CompareIClTensorHandleShape(inputHandle, {4, 1});
687  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
688  predResult = CompareIClTensorHandleShape(outputHandle, {1, 4});
689  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
690 }
691 
692 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateReshapeFloatWorkload")
693 {
694  ClCreateReshapeWorkloadTest<armnn::DataType::Float32>();
695 }
696 
697 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateReshapeFloat16Workload")
698 {
699  ClCreateReshapeWorkloadTest<armnn::DataType::Float16>();
700 }
701 
702 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateReshapeUint8Workload")
703 {
704  ClCreateReshapeWorkloadTest<armnn::DataType::QAsymmU8>();
705 }
706 
707 template <typename SoftmaxWorkloadType, typename armnn::DataType DataType>
708 static void ClSoftmaxWorkloadTest()
709 {
710  Graph graph;
711  ClWorkloadFactory factory =
712  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
713 
714  auto workload = CreateSoftmaxWorkloadTest<SoftmaxWorkloadType, DataType>(factory, graph);
715 
716  // Checks that inputs/outputs are as we expect them (see definition of ClSoftmaxFloatWorkload).
717  SoftmaxQueueDescriptor queueDescriptor = workload->GetData();
718  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
719  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
720 
721  armnn::TensorInfo tensorInfo({4, 1}, DataType);
723  {
724  tensorInfo.SetQuantizationOffset(0);
725  tensorInfo.SetQuantizationScale(1.f / 256);
726  }
728  {
729  tensorInfo.SetQuantizationOffset(-128);
730  tensorInfo.SetQuantizationScale(1.f / 256);
731  }
732 
733  auto predResult = CompareIClTensorHandleShape(inputHandle, {4, 1});
734  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
735  predResult = CompareIClTensorHandleShape(outputHandle, {4, 1});
736  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
737 }
738 
739 
740 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSoftmaxFloat32WorkloadTest")
741 {
742  ClSoftmaxWorkloadTest<ClSoftmaxWorkload, armnn::DataType::Float32>();
743 }
744 
745 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSoftmaxFloat16WorkloadTest")
746 {
747  ClSoftmaxWorkloadTest<ClSoftmaxWorkload, armnn::DataType::Float16>();
748 }
749 
750 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSoftmaxQAsymmU8Workload")
751 {
752  ClSoftmaxWorkloadTest<ClSoftmaxWorkload, armnn::DataType::QAsymmU8>();
753 }
754 
755 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSoftmaxQAsymmS8Workload")
756 {
757  ClSoftmaxWorkloadTest<ClSoftmaxWorkload, armnn::DataType::QAsymmS8>();
758 }
759 
760 template <typename armnn::DataType DataType>
761 static void ClSplitterWorkloadTest()
762 {
763  Graph graph;
764  ClWorkloadFactory factory =
765  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
766 
767  auto workload = CreateSplitterWorkloadTest<ClSplitterWorkload, DataType>(factory, graph);
768 
769  // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest).
770  SplitterQueueDescriptor queueDescriptor = workload->GetData();
771  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
772  auto predResult = CompareIClTensorHandleShape(inputHandle, {5, 7, 7});
773  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
774 
775  auto outputHandle1 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]);
776  predResult = CompareIClTensorHandleShape(outputHandle1, {2, 7, 7});
777  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
778 
779  auto outputHandle2 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[2]);
780  predResult = CompareIClTensorHandleShape(outputHandle2, {2, 7, 7});
781  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
782 
783  auto outputHandle0 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
784  predResult = CompareIClTensorHandleShape(outputHandle0, {1, 7, 7});
785  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
786 }
787 
788 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSplitterFloatWorkload")
789 {
790  ClSplitterWorkloadTest<armnn::DataType::Float32>();
791 }
792 
793 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSplitterFloat16Workload")
794 {
795  ClSplitterWorkloadTest<armnn::DataType::Float16>();
796 }
797 
798 template <typename armnn::DataType DataType>
799 static void ClSplitterConcatTest()
800 {
801  // Tests that it is possible to decide which output of the splitter layer
802  // should be lined to which input of the concat layer.
803  // We test that is is possible to specify 0th output
804  // of the splitter to be the 1st input to the concat and the 1st output of the splitter to be 0th input
805  // of the concat.
806 
807  Graph graph;
808  ClWorkloadFactory factory =
809  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
810 
811  auto workloads =
812  CreateSplitterConcatWorkloadTest<ClSplitterWorkload, ClConcatWorkload, DataType>
813  (factory, graph);
814 
815  auto wlSplitter = std::move(workloads.first);
816  auto wlConcat = std::move(workloads.second);
817 
818  //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction.
819  armnn::ClSubTensorHandle* sOut0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
820  armnn::ClSubTensorHandle* sOut1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
821  armnn::ClSubTensorHandle* mIn0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlConcat->GetData().m_Inputs[0]);
822  armnn::ClSubTensorHandle* mIn1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlConcat->GetData().m_Inputs[1]);
823 
824  CHECK(sOut0);
825  CHECK(sOut1);
826  CHECK(mIn0);
827  CHECK(mIn1);
828 
829  //Fliped order of inputs/outputs.
830  bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0);
831  CHECK(validDataPointers);
832 
833 
834  //Also make sure that the inputs are subtensors of one tensor and outputs are sub tensors of another tensor.
835  bool validSubTensorParents = (mIn0->GetTensor().parent() == mIn1->GetTensor().parent())
836  && (sOut0->GetTensor().parent() == sOut1->GetTensor().parent());
837 
838  CHECK(validSubTensorParents);
839 }
840 
841 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSplitterConcatFloatWorkload")
842 {
843  ClSplitterConcatTest<armnn::DataType::Float32>();
844 }
845 
846 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSplitterConcatFloat16Workload")
847 {
848  ClSplitterConcatTest<armnn::DataType::Float16>();
849 }
850 
851 
852 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSingleOutputMultipleInputs")
853 {
854  // Test that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer.
855  // We create a splitter with two outputs. That each of those outputs is used by two different activation layers.
856 
857  Graph graph;
858  ClWorkloadFactory factory =
859  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
860 
861  std::unique_ptr<ClSplitterWorkload> wlSplitter;
862  std::unique_ptr<ClActivationWorkload> wlActiv0_0;
863  std::unique_ptr<ClActivationWorkload> wlActiv0_1;
864  std::unique_ptr<ClActivationWorkload> wlActiv1_0;
865  std::unique_ptr<ClActivationWorkload> wlActiv1_1;
866 
867  CreateSplitterMultipleInputsOneOutputWorkloadTest<ClSplitterWorkload,
868  ClActivationWorkload, armnn::DataType::Float32>(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1,
869  wlActiv1_0, wlActiv1_1);
870 
871  //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction.
872  armnn::ClSubTensorHandle* sOut0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
873  armnn::ClSubTensorHandle* sOut1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
874  armnn::ClSubTensorHandle* activ0_0Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv0_0->GetData().m_Inputs[0]);
875  armnn::ClSubTensorHandle* activ0_1Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv0_1->GetData().m_Inputs[0]);
876  armnn::ClSubTensorHandle* activ1_0Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv1_0->GetData().m_Inputs[0]);
877  armnn::ClSubTensorHandle* activ1_1Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv1_1->GetData().m_Inputs[0]);
878 
879 
880  CHECK(sOut0);
881  CHECK(sOut1);
882  CHECK(activ0_0Im);
883  CHECK(activ0_1Im);
884  CHECK(activ1_0Im);
885  CHECK(activ1_1Im);
886 
887  bool validDataPointers = (sOut0 == activ0_0Im) && (sOut0 == activ0_1Im) &&
888  (sOut1 == activ1_0Im) && (sOut1 == activ1_1Im);
889 
890  CHECK(validDataPointers);
891 }
892 
893 #if defined(ARMNNREF_ENABLED)
894 
895 // This test unit needs the reference backend, it's not available if the reference backend is not built
896 
897 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMemCopyWorkloadsCl")
898 {
899  ClWorkloadFactory factory =
900  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
901 
902  CreateMemCopyWorkloads<IClTensorHandle>(factory);
903 }
904 
905 #endif
906 
907 template <typename L2NormalizationWorkloadType, typename armnn::DataType DataType>
908 static void ClL2NormalizationWorkloadTest(DataLayout dataLayout)
909 {
910  Graph graph;
911  ClWorkloadFactory factory =
912  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
913 
914  auto workload =
915  CreateL2NormalizationWorkloadTest<L2NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
916 
917  // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
918  L2NormalizationQueueDescriptor queueDescriptor = workload->GetData();
919  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
920  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
921 
922  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 5, 20, 50, 67 })
923  : std::initializer_list<unsigned int>({ 5, 50, 67, 20 });
924  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 5, 20, 50, 67 })
925  : std::initializer_list<unsigned int>({ 5, 50, 67, 20 });
926 
927  CHECK((inputHandle->GetShape() == inputShape));
928  CHECK((outputHandle->GetShape() == outputShape));
929 }
930 
931 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateL2NormalizationFloatNchwWorkload")
932 {
933  ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
934 }
935 
936 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateL2NormalizationFloatNhwcWorkload")
937 {
938  ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
939 }
940 
941 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateL2NormalizationFloat16NchwWorkload")
942 {
943  ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
944 }
945 
946 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateL2NormalizationFloat16NhwcWorkload")
947 {
948  ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
949 }
950 
951 template <typename LogSoftmaxWorkloadType, typename armnn::DataType DataType>
952 static void ClCreateLogSoftmaxWorkloadTest()
953 {
954  Graph graph;
955  ClWorkloadFactory factory =
956  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
957 
958  auto workload = CreateLogSoftmaxWorkloadTest<LogSoftmaxWorkloadType, DataType>(factory, graph);
959 
960  // Checks that outputs and inputs are as we expect them (see definition of CreateLogSoftmaxWorkloadTest).
961  LogSoftmaxQueueDescriptor queueDescriptor = workload->GetData();
962  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
963  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
964 
965  auto predResult = CompareIClTensorHandleShape(inputHandle, {4, 1});
966  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
967  predResult = CompareIClTensorHandleShape(outputHandle, {4, 1});
968  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
969 }
970 
971 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateLogSoftmaxFloat32WorkloadTest")
972 {
973  ClCreateLogSoftmaxWorkloadTest<ClLogSoftmaxWorkload, armnn::DataType::Float32>();
974 }
975 
976 template <typename LstmWorkloadType>
977 static void ClCreateLstmWorkloadTest()
978 {
979  Graph graph;
980  ClWorkloadFactory factory =
981  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
982 
983  auto workload = CreateLstmWorkloadTest<LstmWorkloadType>(factory, graph);
984 
985  LstmQueueDescriptor queueDescriptor = workload->GetData();
986  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
987  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]);
988  auto predResult = CompareIClTensorHandleShape(inputHandle, {2, 2});
989  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
990  predResult = CompareIClTensorHandleShape(outputHandle, {2, 4});
991  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
992 }
993 
994 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateLSTMWorkloadFloatWorkload")
995 {
996  ClCreateLstmWorkloadTest<ClLstmFloatWorkload>();
997 }
998 
999 template <typename ResizeWorkloadType, typename armnn::DataType DataType>
1000 static void ClResizeWorkloadTest(DataLayout dataLayout)
1001 {
1002  Graph graph;
1003  ClWorkloadFactory factory =
1004  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1005 
1006  auto workload = CreateResizeBilinearWorkloadTest<ResizeWorkloadType, DataType>(factory, graph, dataLayout);
1007 
1008  auto queueDescriptor = workload->GetData();
1009 
1010  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1011  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1012 
1013  armnn::PredicateResult predResult(true);
1014  switch (dataLayout)
1015  {
1016  case DataLayout::NHWC:
1017  predResult = CompareIClTensorHandleShape(inputHandle, { 2, 4, 4, 3 });
1018  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1019  predResult = CompareIClTensorHandleShape(outputHandle, { 2, 2, 2, 3 });
1020  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1021  break;
1022  default: // DataLayout::NCHW
1023  predResult = CompareIClTensorHandleShape(inputHandle, { 2, 3, 4, 4 });
1024  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1025  predResult = CompareIClTensorHandleShape(outputHandle, { 2, 3, 2, 2 });
1026  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1027  }
1028 }
1029 
1030 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateResizeFloat32NchwWorkload")
1031 {
1032  ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
1033 }
1034 
1035 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateResizeFloat16NchwWorkload")
1036 {
1037  ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
1038 }
1039 
1040 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateResizeUint8NchwWorkload")
1041 {
1042  ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::QAsymmU8>(DataLayout::NCHW);
1043 }
1044 
1045 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateResizeFloat32NhwcWorkload")
1046 {
1047  ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
1048 }
1049 
1050 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateResizeFloat16NhwcWorkload")
1051 {
1052  ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
1053 }
1054 
1055 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateResizeUint8NhwcWorkload")
1056 {
1057  ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::QAsymmU8>(DataLayout::NHWC);
1058 }
1059 
1060 template <typename MeanWorkloadType, typename armnn::DataType DataType>
1061 static void ClMeanWorkloadTest()
1062 {
1063  Graph graph;
1064  ClWorkloadFactory factory =
1065  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1066 
1067  auto workload = CreateMeanWorkloadTest<MeanWorkloadType, DataType>(factory, graph);
1068 
1069  // Checks that inputs/outputs are as we expect them (see definition of CreateMeanWorkloadTest).
1070  MeanQueueDescriptor queueDescriptor = workload->GetData();
1071  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1072  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1073 
1074  // The first dimension (batch size) in both input and output is singular thus it has been reduced by ACL.
1075  auto predResult = CompareIClTensorHandleShape(inputHandle, { 1, 3, 7, 4 });
1076  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1077  predResult = CompareIClTensorHandleShape(outputHandle, { 1, 4 });
1078  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1079 }
1080 
1081 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMeanFloat32Workload")
1082 {
1083  ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::Float32>();
1084 }
1085 
1086 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMeanFloat16Workload")
1087 {
1088  ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::Float16>();
1089 }
1090 
1091 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMeanUint8Workload")
1092 {
1093  ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::QAsymmU8>();
1094 }
1095 
1096 template <typename ConcatWorkloadType, armnn::DataType DataType>
1097 static void ClCreateConcatWorkloadTest(std::initializer_list<unsigned int> outputShape,
1098  unsigned int concatAxis)
1099 {
1100  Graph graph;
1101  ClWorkloadFactory factory =
1102  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1103 
1104  auto workload = CreateConcatWorkloadTest<ConcatWorkloadType, DataType>(factory, graph, outputShape, concatAxis);
1105 
1106  ConcatQueueDescriptor queueDescriptor = workload->GetData();
1107  auto inputHandle0 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1108  auto inputHandle1 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
1109  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1110 
1111  auto predResult = CompareIClTensorHandleShape(inputHandle0, { 2, 3, 2, 5 });
1112  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1113  predResult = CompareIClTensorHandleShape(inputHandle1, { 2, 3, 2, 5 });
1114  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1115  predResult = CompareIClTensorHandleShape(outputHandle, outputShape);
1116  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1117 }
1118 
1119 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConcatDim0Float32Workload")
1120 {
1121  ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::Float32>({ 4, 3, 2, 5 }, 0);
1122 }
1123 
1124 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConcatDim1Float32Workload")
1125 {
1126  ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::Float32>({ 2, 6, 2, 5 }, 1);
1127 }
1128 
1129 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConcatDim3Float32Workload")
1130 {
1131  ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::Float32>({ 2, 3, 2, 10 }, 3);
1132 }
1133 
1134 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConcatDim0Uint8Workload")
1135 {
1136  ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::QAsymmU8>({ 4, 3, 2, 5 }, 0);
1137 }
1138 
1139 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConcatDim1Uint8Workload")
1140 {
1141  ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::QAsymmU8>({ 2, 6, 2, 5 }, 1);
1142 }
1143 
1144 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConcatDim3Uint8Workload")
1145 {
1146  ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::QAsymmU8>({ 2, 3, 2, 10 }, 3);
1147 }
1148 
1149 template <typename SpaceToDepthWorkloadType, typename armnn::DataType DataType>
1150 static void ClSpaceToDepthWorkloadTest()
1151 {
1152  Graph graph;
1153  ClWorkloadFactory factory =
1154  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1155 
1156  auto workload = CreateSpaceToDepthWorkloadTest<SpaceToDepthWorkloadType, DataType>(factory, graph);
1157 
1158  SpaceToDepthQueueDescriptor queueDescriptor = workload->GetData();
1159  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1160  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1161 
1162  auto predResult = CompareIClTensorHandleShape(inputHandle, { 1, 2, 2, 1 });
1163  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1164  predResult = CompareIClTensorHandleShape(outputHandle, { 1, 1, 1, 4 });
1165  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1166 }
1167 
1168 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSpaceToDepthFloat32Workload")
1169 {
1170  ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::Float32>();
1171 }
1172 
1173 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSpaceToDepthFloat16Workload")
1174 {
1175  ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::Float16>();
1176 }
1177 
1178 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSpaceToDepthQAsymm8Workload")
1179 {
1180  ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::QAsymmU8>();
1181 }
1182 
1183 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSpaceToDepthQSymm16Workload")
1184 {
1185  ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::QSymmS16>();
1186 }
1187 
1188 template <armnn::DataType DataType>
1189 static void ClCreateStackWorkloadTest(const std::initializer_list<unsigned int>& inputShape,
1190  const std::initializer_list<unsigned int>& outputShape,
1191  unsigned int axis,
1192  unsigned int numInputs)
1193 {
1194  armnn::Graph graph;
1195  ClWorkloadFactory factory =
1196  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1197 
1198  auto workload = CreateStackWorkloadTest<ClStackWorkload, DataType>(factory,
1199  graph,
1200  TensorShape(inputShape),
1201  TensorShape(outputShape),
1202  axis,
1203  numInputs);
1204 
1205  // Check inputs and output are as expected
1206  StackQueueDescriptor queueDescriptor = workload->GetData();
1207  for (unsigned int i = 0; i < numInputs; ++i)
1208  {
1209  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[i]);
1210  auto predResult1 = CompareIClTensorHandleShape(inputHandle, inputShape);
1211  CHECK_MESSAGE(predResult1.m_Result, predResult1.m_Message.str());
1212  }
1213  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1214  auto predResult2 = CompareIClTensorHandleShape(outputHandle, outputShape);
1215  CHECK_MESSAGE(predResult2.m_Result, predResult2.m_Message.str());
1216 }
1217 
1218 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateStackFloat32Workload")
1219 {
1220  ClCreateStackWorkloadTest<armnn::DataType::Float32>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
1221 }
1222 
1223 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateStackFloat16Workload")
1224 {
1225  ClCreateStackWorkloadTest<armnn::DataType::Float16>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
1226 }
1227 
1228 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateStackUint8Workload")
1229 {
1230  ClCreateStackWorkloadTest<armnn::DataType::QAsymmU8>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
1231 }
1232 
1233 
1234 template <typename QLstmWorkloadType>
1235 static void ClCreateQLstmWorkloadTest()
1236 {
1237  Graph graph;
1238  ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1239 
1240  auto workload = CreateQLstmWorkloadTest<QLstmWorkloadType>(factory, graph);
1241  QLstmQueueDescriptor queueDescriptor = workload->GetData();
1242 
1243  IAclTensorHandle* inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
1244  CHECK((inputHandle->GetShape() == TensorShape({2, 4})));
1245  CHECK((inputHandle->GetDataType() == arm_compute::DataType::QASYMM8_SIGNED));
1246 
1247  IAclTensorHandle* cellStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
1248  CHECK((cellStateOutHandle->GetShape() == TensorShape({2, 4})));
1249  CHECK((cellStateOutHandle->GetDataType() == arm_compute::DataType::QSYMM16));
1250 
1251  IAclTensorHandle* outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[2]);
1252  CHECK((outputHandle->GetShape() == TensorShape({2, 4})));
1253  CHECK((outputHandle->GetDataType() == arm_compute::DataType::QASYMM8_SIGNED));
1254 }
1255 
1256 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateQLstmWorkloadTest")
1257 {
1258  ClCreateQLstmWorkloadTest<ClQLstmWorkload>();
1259 }
1260 
1261 template <typename QuantizedLstmWorkloadType>
1262 static void ClCreateQuantizedLstmWorkloadTest()
1263 {
1264  using namespace armnn::armcomputetensorutils;
1265 
1266  Graph graph;
1267  ClWorkloadFactory factory =
1268  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1269 
1270  auto workload = CreateQuantizedLstmWorkloadTest<QuantizedLstmWorkloadType>(factory, graph);
1271 
1272  QuantizedLstmQueueDescriptor queueDescriptor = workload->GetData();
1273 
1274  IAclTensorHandle* inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
1275  CHECK((inputHandle->GetShape() == TensorShape({2, 2})));
1276  CHECK((inputHandle->GetDataType() == arm_compute::DataType::QASYMM8));
1277 
1278  IAclTensorHandle* cellStateInHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
1279  CHECK((cellStateInHandle->GetShape() == TensorShape({2, 4})));
1280  CHECK((cellStateInHandle->GetDataType() == arm_compute::DataType::QSYMM16));
1281 
1282  IAclTensorHandle* outputStateInHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[2]);
1283  CHECK((outputStateInHandle->GetShape() == TensorShape({2, 4})));
1284  CHECK((outputStateInHandle->GetDataType() == arm_compute::DataType::QASYMM8));
1285 
1286  IAclTensorHandle* cellStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
1287  CHECK((cellStateOutHandle->GetShape() == TensorShape({2, 4})));
1288  CHECK((cellStateOutHandle->GetDataType() == arm_compute::DataType::QSYMM16));
1289 
1290  IAclTensorHandle* outputStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
1291  CHECK((outputStateOutHandle->GetShape() == TensorShape({2, 4})));
1292  CHECK((outputStateOutHandle->GetDataType() == arm_compute::DataType::QASYMM8));
1293 }
1294 
1295 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateQuantizedLstmWorkload")
1296 {
1297  ClCreateQuantizedLstmWorkloadTest<ClQuantizedLstmWorkload>();
1298 }
1299 
1300 }
uint32_t m_PadBottom
Padding bottom value in the height dimension.
bool m_BiasEnabled
Enable/disable bias.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
DataLayout
Definition: Types.hpp:53
armnn::PredicateResult CompareIClTensorHandleShape(IClTensorHandle *tensorHandle, std::initializer_list< unsigned int > expectedDimensions)
TEST_SUITE("CreateWorkloadCl")
std::vector< BackendOptions > ModelOptions
A Convolution2dDescriptor for the Convolution2dLayer.
uint32_t m_PadRight
Padding right value in the width dimension.
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
virtual arm_compute::DataType GetDataType() const =0
uint32_t m_PadTop
Padding top value in the height dimension.
TEST_CASE_FIXTURE(ClContextControlFixture, "CopyBetweenNeonAndGpu")
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
DataType
Definition: Types.hpp:35
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
void AllocateAndCopyDataToITensorHandle(armnn::ITensorHandle *tensorHandle, const void *memory)
virtual TensorShape GetShape() const =0
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
This layer represents an addition operation.
Struct for the users to pass backend specific options.
This layer represents a subtraction operation.
std::vector< ITensorHandle * > m_Outputs
This layer represents a division operation.
UnaryOperation
Definition: Types.hpp:104
Contains information about TensorInfos of a layer.
void SetQuantizationOffset(int32_t offset)
Definition: Tensor.cpp:491
std::vector< ITensorHandle * > m_Inputs
This layer represents a multiplication operation.
arm_compute::CLSubTensor & GetTensor() override
Depthwise Convolution 2D layer workload data.
uint32_t m_PadLeft
Padding left value in the width dimension.
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below...
Definition: Types.hpp:405