ArmNN
 22.08
ClCreateWorkloadTests.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
8 
16 
19 
22 #include <cl/ClTensorHandle.hpp>
23 #include <cl/ClWorkloadFactory.hpp>
26 
27 #include <doctest/doctest.h>
28 
30  std::initializer_list<unsigned int> expectedDimensions)
31 {
32  return CompareTensorHandleShape<IClTensorHandle>(tensorHandle, expectedDimensions);
33 }
34 
35 TEST_SUITE("CreateWorkloadCl")
36 {
37 template <armnn::DataType DataType>
38 static void ClCreateActivationWorkloadTest()
39 {
40  Graph graph;
41  ClWorkloadFactory factory =
42  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
43 
44  auto workload = CreateActivationWorkloadTest<ClActivationWorkload, DataType>(factory, graph);
45 
46  // Checks that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest).
47  ActivationQueueDescriptor queueDescriptor = workload->GetData();
48  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
49  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
50 
51  auto predResult = CompareIClTensorHandleShape(inputHandle, {1, 1});
52  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
53 
54  predResult = CompareIClTensorHandleShape(outputHandle, {1, 1});
55  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
56 }
57 
58 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateActivationFloatWorkload")
59 {
60  ClCreateActivationWorkloadTest<armnn::DataType::Float32>();
61 }
62 
63 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateActivationFloat16Workload")
64 {
65  ClCreateActivationWorkloadTest<armnn::DataType::Float16>();
66 }
67 
68 template <typename WorkloadType,
69  typename DescriptorType,
70  typename LayerType,
72 static void ClCreateElementwiseWorkloadTest()
73 {
74  Graph graph;
75  ClWorkloadFactory factory =
76  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
77 
78  auto workload = CreateElementwiseWorkloadTest<WorkloadType, DescriptorType, LayerType, DataType>(factory, graph);
79 
80  // Checks that inputs/outputs are as we expect them (see definition of CreateElementwiseWorkloadTest).
81  DescriptorType queueDescriptor = workload->GetData();
82  auto inputHandle1 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
83  auto inputHandle2 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
84  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
85  auto predResult = CompareIClTensorHandleShape(inputHandle1, {2, 3});
86  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
87  predResult = CompareIClTensorHandleShape(inputHandle2, {2, 3});
88  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
89  predResult = CompareIClTensorHandleShape(outputHandle, {2, 3});
90  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
91 }
92 
93 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateAdditionFloatWorkload")
94 {
95  ClCreateElementwiseWorkloadTest<ClAdditionWorkload,
99 }
100 
101 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateAdditionFloat16Workload")
102 {
103  ClCreateElementwiseWorkloadTest<ClAdditionWorkload,
107 }
108 
109 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSubtractionFloatWorkload")
110 {
111  ClCreateElementwiseWorkloadTest<ClSubtractionWorkload,
115 }
116 
117 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSubtractionFloat16Workload")
118 {
119  ClCreateElementwiseWorkloadTest<ClSubtractionWorkload,
123 }
124 
125 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMultiplicationFloatWorkloadTest")
126 {
127  ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload,
131 }
132 
133 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMultiplicationFloat16WorkloadTest")
134 {
135  ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload,
139 }
140 
141 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMultiplicationUint8WorkloadTest")
142 {
143  ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload,
147 }
148 
149 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDivisionFloatWorkloadTest")
150 {
151  ClCreateElementwiseWorkloadTest<ClDivisionWorkload,
155 }
156 
157 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDivisionFloat16WorkloadTest")
158 {
159  ClCreateElementwiseWorkloadTest<ClDivisionWorkload,
163 }
164 
165 template <typename WorkloadType,
166  typename DescriptorType,
168 static void ClCreateElementwiseUnaryWorkloadTest(armnn::UnaryOperation op)
169 {
170  Graph graph;
171  ClWorkloadFactory factory =
172  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
173 
174  auto workload = CreateElementwiseUnaryWorkloadTest<WorkloadType, DescriptorType, DataType>(factory, graph, op);
175 
176  DescriptorType queueDescriptor = workload->GetData();
177 
178  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
179  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
180 
181  auto predResult = CompareIClTensorHandleShape(inputHandle, {2, 3});
182  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
183 
184  predResult = CompareIClTensorHandleShape(outputHandle, {2, 3});
185  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
186 }
187 
188 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateRsqrtFloat32WorkloadTest")
189 {
190  ClCreateElementwiseUnaryWorkloadTest<ClRsqrtWorkload, RsqrtQueueDescriptor, armnn::DataType::Float32>(
191  UnaryOperation::Rsqrt);
192 }
193 
194 template <typename BatchNormalizationWorkloadType, armnn::DataType DataType>
195 static void ClCreateBatchNormalizationWorkloadTest(DataLayout dataLayout)
196 {
197  Graph graph;
198  ClWorkloadFactory factory =
199  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
200 
201  auto workload = CreateBatchNormalizationWorkloadTest<BatchNormalizationWorkloadType, DataType>
202  (factory, graph, dataLayout);
203 
204  // Checks that inputs/outputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest).
205  BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData();
206  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
207  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
208 
209  armnn::PredicateResult predResult(true);
210  switch (dataLayout)
211  {
212  case DataLayout::NHWC:
213  predResult = CompareIClTensorHandleShape(inputHandle, { 2, 4, 4, 3 });
214  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
215  predResult = CompareIClTensorHandleShape(outputHandle, { 2, 4, 4, 3 });
216  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
217  break;
218  default: // NCHW
219  predResult = CompareIClTensorHandleShape(inputHandle, { 2, 3, 4, 4 });
220  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
221  predResult = CompareIClTensorHandleShape(outputHandle, { 2, 3, 4, 4 });
222  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
223  }
224 }
225 
226 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateBatchNormalizationFloatNchwWorkload")
227 {
228  ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
229  armnn::DataType::Float32>(DataLayout::NCHW);
230 }
231 
232 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateBatchNormalizationFloat16NchwWorkload")
233 {
234  ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
235  armnn::DataType::Float16>(DataLayout::NCHW);
236 }
237 
238 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateBatchNormalizationFloatNhwcWorkload")
239 {
240  ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
241  armnn::DataType::Float32>(DataLayout::NHWC);
242 }
243 
244 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateBatchNormalizationNhwcFloat16NhwcWorkload")
245 {
246  ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
247  armnn::DataType::Float16>(DataLayout::NHWC);
248 }
249 
250 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvertFp16ToFp32Workload")
251 {
252  Graph graph;
253  ClWorkloadFactory factory =
254  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
255 
256  auto workload = CreateConvertFp16ToFp32WorkloadTest<ClConvertFp16ToFp32Workload>(factory, graph);
257 
258  ConvertFp16ToFp32QueueDescriptor queueDescriptor = workload->GetData();
259  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
260  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
261  auto predResult = CompareIClTensorHandleShape(inputHandle, {1, 3, 2, 3});
262  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
263  predResult = CompareIClTensorHandleShape(outputHandle, {1, 3, 2, 3});
264  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
265  CHECK((inputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F16));
266  CHECK((outputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F32));
267 }
268 
269 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvertFp32ToFp16Workload")
270 {
271  Graph graph;
272  ClWorkloadFactory factory =
273  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
274 
275  auto workload = CreateConvertFp32ToFp16WorkloadTest<ClConvertFp32ToFp16Workload>(factory, graph);
276 
277  ConvertFp32ToFp16QueueDescriptor queueDescriptor = workload->GetData();
278  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
279  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
280 
281  auto predResult = CompareIClTensorHandleShape(inputHandle, {1, 3, 2, 3});
282  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
283  predResult = CompareIClTensorHandleShape(outputHandle, {1, 3, 2, 3});
284  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
285  CHECK((inputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F32));
286  CHECK((outputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F16));
287 }
288 
289 template <typename Convolution2dWorkloadType, typename armnn::DataType DataType>
290 static void ClConvolution2dWorkloadTest(DataLayout dataLayout)
291 {
292  Graph graph;
293  ClWorkloadFactory factory =
294  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
295 
296  auto workload = CreateConvolution2dWorkloadTest<ClConvolution2dWorkload, DataType>(factory,
297  graph,
298  dataLayout);
299 
300  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({2, 3, 8, 16})
301  : std::initializer_list<unsigned int>({2, 8, 16, 3});
302  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({2, 2, 2, 10})
303  : std::initializer_list<unsigned int>({2, 2, 10, 2});
304 
305  // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest).
306  Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
307  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
308  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
309  CHECK((inputHandle->GetShape() == inputShape));
310  CHECK((outputHandle->GetShape() == outputShape));
311 }
312 
313 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvolution2dFloatNchwWorkload")
314 {
315  ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
316 }
317 
318 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvolution2dFloatNhwcWorkload")
319 {
320  ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
321 }
322 
323 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvolution2dFloat16NchwWorkload")
324 {
325  ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
326 }
327 
328 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvolution2dFloat16NhwcWorkload")
329 {
330  ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
331 }
332 
333 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvolution2dFastMathEnabledWorkload")
334 {
335  Graph graph;
336 
337  using ModelOptions = std::vector<BackendOptions>;
338  ModelOptions modelOptions = {};
339  BackendOptions gpuAcc("GpuAcc",
340  {
341  { "FastMathEnabled", true }
342  });
343  modelOptions.push_back(gpuAcc);
344 
345  ClWorkloadFactory factory =
346  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager(), modelOptions);
347 
348  auto workload =
349  CreateConvolution2dWorkloadFastMathTest<ClConvolution2dWorkload, armnn::DataType::Float32>(factory,
350  graph,
351  DataLayout::NCHW,
352  modelOptions);
353 
354  ARMNN_ASSERT(workload != nullptr);
355  auto conv2dWorkload = PolymorphicDowncast<ClConvolution2dWorkload*>(workload.get());
356  IgnoreUnused(conv2dWorkload);
357  ARMNN_ASSERT(conv2dWorkload != nullptr);
358  ARMNN_ASSERT(conv2dWorkload->GetConvolutionMethod() == arm_compute::ConvolutionMethod::WINOGRAD);
359 }
360 
361 TEST_CASE_FIXTURE(ClContextControlFixture, "ClReplaceInputOutputConvolution2dWorkload")
362 {
363  // Create Convolution2dWorkload with ClTensorHandle input and output
364  // Then replace the input and output with ClImportTensorHandle
365  Graph graph;
366  ClWorkloadFactory factory =
367  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
368 
369  auto workload =
370  CreateConvolution2dWorkloadTest<ClConvolution2dWorkload, DataType::Float32>(factory,
371  graph,
372  DataLayout::NHWC);
373 
374  TensorShape inputShape = std::initializer_list<unsigned int>({2, 8, 16, 3});
375  TensorShape outputShape = std::initializer_list<unsigned int>({2, 2, 10, 2});
376 
377  // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest).
378  Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
379  auto inputHandle = PolymorphicDowncast<ITensorHandle*>(queueDescriptor.m_Inputs[0]);
380  auto outputHandle = PolymorphicDowncast<ITensorHandle*>(queueDescriptor.m_Outputs[0]);
381  CHECK((inputHandle->GetShape() == inputShape));
382  CHECK((outputHandle->GetShape() == outputShape));
383  // The input and output handles are created correctly as ClTensorHandle
384  CHECK((dynamic_cast<ClTensorHandle*>(inputHandle) != nullptr));
385  CHECK((dynamic_cast<ClTensorHandle*>(outputHandle) != nullptr));
386 
387  // Replace with ImportTensorHandle
388  ClImportTensorHandleFactory importFactory(static_cast<MemorySourceFlags>(MemorySource::Malloc),
389  static_cast<MemorySourceFlags>(MemorySource::Malloc));
390 
391  TensorInfo inputInfo({ 2, 8, 16, 3 }, DataType::Float32);
392  TensorInfo outputInfo({ 2, 2, 10, 2 }, DataType::Float32);
393 
394  // create TensorHandle for memory import
395  auto inputImportHandle = importFactory.CreateTensorHandle(inputInfo);
396  auto outputImportHandle = importFactory.CreateTensorHandle(outputInfo);
397 
398  // Calling ReplaceInputTensorHandle and ReplaceOutputTensorHandle does not throw exception
399  // as Reconfigure function is implemented
400  workload->ReplaceInputTensorHandle(inputImportHandle.get(), 0);
401  workload->ReplaceOutputTensorHandle(outputImportHandle.get(), 0);
402 
403  // Correctly replaced with the import handles with correct information
404  queueDescriptor = workload->GetData();
405  auto replacedInputHandle = PolymorphicDowncast<ITensorHandle*>(queueDescriptor.m_Inputs[0]);
406  auto replacedOutputHandle = PolymorphicDowncast<ITensorHandle*>(queueDescriptor.m_Outputs[0]);
407  CHECK((replacedInputHandle->GetShape() == inputShape));
408  CHECK((replacedOutputHandle->GetShape() == outputShape));
409 
410  CHECK((inputImportHandle.get() == replacedInputHandle));
411  CHECK((inputImportHandle.get() == replacedInputHandle));
412 
413  CHECK((dynamic_cast<ClTensorHandle*>(replacedInputHandle) == nullptr));
414  CHECK((dynamic_cast<ClImportTensorHandle*>(replacedInputHandle) != nullptr));
415  CHECK((dynamic_cast<ClTensorHandle*>(replacedOutputHandle) == nullptr));
416  CHECK((dynamic_cast<ClImportTensorHandle*>(replacedOutputHandle) != nullptr));
417 }
418 
419 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvolution2dClCompiledContextWorkload")
420 {
421  using namespace armnn;
422 
423  const DataType inputType = DataType::QAsymmU8;
424  const DataType kernelType = DataType::QSymmS8;
425  const DataType biasType = DataType::Signed32;
426 
427  TensorInfo inputInfo ({ 1, 3, 1, 2 }, inputType, 0.5f, 128);
428  TensorInfo outputInfo({ 1, 3, 1, 3 }, inputType, 1.0f, 128);
429 
430  const std::vector<float> quantScales{ 0.5f, 0.75f, 1.0f };
431  constexpr unsigned int quantDimension = 0;
432 
433  TensorInfo kernelInfo({ 3, 1, 1, 2 }, kernelType, quantScales, quantDimension);
434 
435  const std::vector<float> biasQuantScales{ 0.25f, 0.375f, 0.5f };
436  TensorInfo biasInfo({ 3 }, biasType, biasQuantScales, quantDimension);
437 
438  std::vector<uint8_t> inputData =
439  {
440  138, 108, 138, 108, 138, 108
441  };
442 
443  std::vector<int8_t> kernelData =
444  {
445  1, 2, 1, 2, 1, 2
446  };
447 
448  std::vector<int32_t> biasData =
449  {
450  4, 4, 4
451  };
452 
453  std::vector<uint8_t> expectedOutputData =
454  {
455  121, 118, 115, 121, 118, 115, 121, 118, 115
456  };
457 
458 
459  Convolution2dDescriptor descriptor;
460  descriptor.m_StrideX = 1;
461  descriptor.m_StrideY = 1;
462  descriptor.m_PadLeft = 0;
463  descriptor.m_PadRight = 0;
464  descriptor.m_PadTop = 0;
465  descriptor.m_PadBottom = 0;
466  descriptor.m_BiasEnabled = true;
467  descriptor.m_DataLayout = DataLayout::NHWC;
468 
469  auto memoryManager = ClWorkloadFactoryHelper::GetMemoryManager();
470  auto clMemoryManager = armnn::PolymorphicPointerDowncast<armnn::ClMemoryManager>(memoryManager);
471  auto tensorHandleFactory = ClWorkloadFactoryHelper::GetTensorHandleFactory(memoryManager);
472 
473  std::unique_ptr<ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputInfo);
474  std::unique_ptr<armnn::ITensorHandle> weightsHandle = tensorHandleFactory.CreateTensorHandle(kernelInfo);
475  std::unique_ptr<armnn::ITensorHandle> biasHandle = tensorHandleFactory.CreateTensorHandle(biasInfo);
476  std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo);
477 
478 
479  WorkloadInfo workloadInfo;
480  ScopedTensorHandle weightTensor(kernelInfo);
481  ScopedTensorHandle biasTensor(biasInfo);
482 
483  AllocateAndCopyDataToITensorHandle(&weightTensor, kernelData.data());
484  AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
485 
486  Convolution2dQueueDescriptor queueDescriptor;
487  queueDescriptor.m_Parameters = descriptor;
488  queueDescriptor.m_Weight = &weightTensor;
489  queueDescriptor.m_Bias = &biasTensor;
490 
491  AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get());
492  AddInputToWorkload(queueDescriptor, workloadInfo, kernelInfo, weightsHandle.get());
493  AddInputToWorkload(queueDescriptor, workloadInfo, biasInfo, biasHandle.get());
494  AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get());
495 
496  // Initialize our m_CLCompileContext using default device and context
497  auto context = arm_compute::CLKernelLibrary::get().context();
498  auto device = arm_compute::CLKernelLibrary::get().get_device();
499  auto clCompileContext = arm_compute::CLCompileContext(context, device);
500 
501 
502 
503  // Check built programs are empty in context
504  CHECK(clCompileContext.get_built_programs().empty());
505 
506  auto workload = std::make_unique<ClConvolution2dWorkload>(queueDescriptor,
507  workloadInfo,
508  clMemoryManager->GetIntraLayerManager(),
509  clCompileContext);
510  ARMNN_ASSERT(workload != nullptr);
511  // Check built programs are not empty in context
512  CHECK(!clCompileContext.get_built_programs().empty());
513 }
514 
515 template <typename DepthwiseConvolutionWorkloadType, typename armnn::DataType DataType>
516 static void ClDepthwiseConvolutionWorkloadTest(DataLayout dataLayout)
517 {
518  Graph graph;
519  ClWorkloadFactory factory =
520  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
521 
522  auto workload = CreateDepthwiseConvolution2dWorkloadTest<DepthwiseConvolutionWorkloadType, DataType>
523  (factory, graph, dataLayout);
524 
525  // Checks that inputs/outputs are as we expect them (see definition of CreateDepthwiseConvolution2dWorkloadTest).
526  DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData();
527  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
528  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
529 
530  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
531  : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
532  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
533  : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
534 
535  CHECK((inputHandle->GetShape() == inputShape));
536  CHECK((outputHandle->GetShape() == outputShape));
537 }
538 
539 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDepthwiseConvolutionFloat32NhwcWorkload")
540 {
541  ClDepthwiseConvolutionWorkloadTest<ClDepthwiseConvolutionWorkload, DataType::Float32>(DataLayout::NHWC);
542 }
543 
544 template <typename Convolution2dWorkloadType, typename armnn::DataType DataType>
545 static void ClDirectConvolution2dWorkloadTest()
546 {
547  Graph graph;
548  ClWorkloadFactory factory =
549  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
550 
551  auto workload = CreateDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, DataType>(factory, graph);
552 
553  // Checks that outputs and inputs are as we expect them (see definition of CreateDirectConvolution2dWorkloadTest).
554  Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
555  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
556  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
557  auto predResult = CompareIClTensorHandleShape(inputHandle, {2, 3, 6, 6});
558  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
559  predResult = CompareIClTensorHandleShape(outputHandle, {2, 2, 6, 6});
560  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
561 }
562 
563 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDirectConvolution2dFloatWorkload")
564 {
565  ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>();
566 }
567 
568 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDirectConvolution2dFloat16Workload")
569 {
570  ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>();
571 }
572 
573 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDirectConvolution2dUint8Workload")
574 {
575  ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::QAsymmU8>();
576 }
577 
578 template <typename FullyConnectedWorkloadType, typename armnn::DataType DataType>
579 static void ClCreateFullyConnectedWorkloadTest()
580 {
581  Graph graph;
582  ClWorkloadFactory factory =
583  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
584 
585  auto workload =
586  CreateFullyConnectedWorkloadTest<FullyConnectedWorkloadType, DataType>(factory, graph);
587 
588  // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest).
589  FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
590  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
591  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
592  auto predResult = CompareIClTensorHandleShape(inputHandle, {3, 1, 4, 5});
593  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
594  predResult = CompareIClTensorHandleShape(outputHandle, {3, 7});
595  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
596 }
597 
598 
599 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateFullyConnectedFloatWorkloadTest")
600 {
601  ClCreateFullyConnectedWorkloadTest<ClFullyConnectedWorkload, armnn::DataType::Float32>();
602 }
603 
604 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateFullyConnectedFloat16WorkloadTest")
605 {
606  ClCreateFullyConnectedWorkloadTest<ClFullyConnectedWorkload, armnn::DataType::Float16>();
607 }
608 
609 template <typename NormalizationWorkloadType, typename armnn::DataType DataType>
610 static void ClNormalizationWorkloadTest(DataLayout dataLayout)
611 {
612  Graph graph;
613  ClWorkloadFactory factory =
614  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
615 
616  auto workload = CreateNormalizationWorkloadTest<NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
617 
618  // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
619  NormalizationQueueDescriptor queueDescriptor = workload->GetData();
620  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
621  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
622 
623  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 5, 5, 1})
624  : std::initializer_list<unsigned int>({3, 1, 5, 5});
625  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 5, 5, 1})
626  : std::initializer_list<unsigned int>({3, 1, 5, 5});
627 
628  CHECK((inputHandle->GetShape() == inputShape));
629  CHECK((outputHandle->GetShape() == outputShape));
630 }
631 
632 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateNormalizationFloat32NchwWorkload")
633 {
634  ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
635 }
636 
637 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateNormalizationFloat16NchwWorkload")
638 {
639  ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
640 }
641 
642 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateNormalizationFloat32NhwcWorkload")
643 {
644  ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
645 }
646 
647 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateNormalizationFloat16NhwcWorkload")
648 {
649  ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
650 }
651 
652 template <typename armnn::DataType DataType>
653 static void ClPooling2dWorkloadTest(DataLayout dataLayout)
654 {
655  Graph graph;
656  ClWorkloadFactory factory =
657  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
658 
659  auto workload = CreatePooling2dWorkloadTest<ClPooling2dWorkload, DataType>(factory, graph, dataLayout);
660 
661  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 2, 5, 5})
662  : std::initializer_list<unsigned int>({3, 5, 5, 2});
663  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 2, 2, 4})
664  : std::initializer_list<unsigned int>({3, 2, 4, 2});
665 
666  // Check that inputs/outputs are as we expect them (see definition of CreatePooling2dWorkloadTest).
667  Pooling2dQueueDescriptor queueDescriptor = workload->GetData();
668  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
669  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
670 
671  CHECK((inputHandle->GetShape() == inputShape));
672  CHECK((outputHandle->GetShape() == outputShape));
673 }
674 
675 TEST_CASE_FIXTURE(ClContextControlFixture, "CreatePooling2dFloatNchwWorkload")
676 {
677  ClPooling2dWorkloadTest<armnn::DataType::Float32>(DataLayout::NCHW);
678 }
679 
680 TEST_CASE_FIXTURE(ClContextControlFixture, "CreatePooling2dFloatNhwcWorkload")
681 {
682  ClPooling2dWorkloadTest<armnn::DataType::Float32>(DataLayout::NHWC);
683 }
684 
685 TEST_CASE_FIXTURE(ClContextControlFixture, "CreatePooling2dFloat16NchwWorkload")
686 {
687  ClPooling2dWorkloadTest<armnn::DataType::Float16>(DataLayout::NCHW);
688 }
689 
690 TEST_CASE_FIXTURE(ClContextControlFixture, "CreatePooling2dFloat16NhwcWorkload")
691 {
692  ClPooling2dWorkloadTest<armnn::DataType::Float16>(DataLayout::NHWC);
693 }
694 
695 static void ClCreatePreluWorkloadTest(const armnn::TensorShape& inputShape,
696  const armnn::TensorShape& alphaShape,
697  const armnn::TensorShape& outputShape,
698  armnn::DataType dataType)
699 {
700  Graph graph;
701  ClWorkloadFactory factory =
702  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
703 
704  auto workload = CreatePreluWorkloadTest<ClPreluWorkload>(factory,
705  graph,
706  inputShape,
707  alphaShape,
708  outputShape,
709  dataType);
710 
711  // Checks that outputs and inputs are as we expect them (see definition of CreatePreluWorkloadTest).
712  PreluQueueDescriptor queueDescriptor = workload->GetData();
713  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
714  auto alphaHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
715  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
716 
717  CHECK((inputHandle->GetShape() == inputShape));
718  CHECK((alphaHandle->GetShape() == alphaShape));
719  CHECK((outputHandle->GetShape() == outputShape));
720 }
721 
722 TEST_CASE_FIXTURE(ClContextControlFixture, "CreatePreluFloat16Workload")
723 {
724  ClCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float16);
725 }
726 
727 TEST_CASE_FIXTURE(ClContextControlFixture, "CreatePreluFloatWorkload")
728 {
729  ClCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float32);
730 }
731 
732 TEST_CASE_FIXTURE(ClContextControlFixture, "CreatePreluUint8Workload")
733 {
734  ClCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::QAsymmU8);
735 }
736 
737 template <typename armnn::DataType DataType>
738 static void ClCreateReshapeWorkloadTest()
739 {
740  Graph graph;
741  ClWorkloadFactory factory =
742  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
743 
744  auto workload = CreateReshapeWorkloadTest<ClReshapeWorkload, DataType>(factory, graph);
745 
746  // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest).
747  ReshapeQueueDescriptor queueDescriptor = workload->GetData();
748  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
749  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
750 
751  auto predResult = CompareIClTensorHandleShape(inputHandle, {4, 1});
752  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
753  predResult = CompareIClTensorHandleShape(outputHandle, {1, 4});
754  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
755 }
756 
757 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateReshapeFloatWorkload")
758 {
759  ClCreateReshapeWorkloadTest<armnn::DataType::Float32>();
760 }
761 
762 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateReshapeFloat16Workload")
763 {
764  ClCreateReshapeWorkloadTest<armnn::DataType::Float16>();
765 }
766 
767 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateReshapeUint8Workload")
768 {
769  ClCreateReshapeWorkloadTest<armnn::DataType::QAsymmU8>();
770 }
771 
772 template <typename SoftmaxWorkloadType, typename armnn::DataType DataType>
773 static void ClSoftmaxWorkloadTest()
774 {
775  Graph graph;
776  ClWorkloadFactory factory =
777  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
778 
779  auto workload = CreateSoftmaxWorkloadTest<SoftmaxWorkloadType, DataType>(factory, graph);
780 
781  // Checks that inputs/outputs are as we expect them (see definition of ClSoftmaxFloatWorkload).
782  SoftmaxQueueDescriptor queueDescriptor = workload->GetData();
783  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
784  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
785 
786  armnn::TensorInfo tensorInfo({4, 1}, DataType);
788  {
789  tensorInfo.SetQuantizationOffset(0);
790  tensorInfo.SetQuantizationScale(1.f / 256);
791  }
793  {
794  tensorInfo.SetQuantizationOffset(-128);
795  tensorInfo.SetQuantizationScale(1.f / 256);
796  }
797 
798  auto predResult = CompareIClTensorHandleShape(inputHandle, {4, 1});
799  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
800  predResult = CompareIClTensorHandleShape(outputHandle, {4, 1});
801  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
802 }
803 
804 
805 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSoftmaxFloat32WorkloadTest")
806 {
807  ClSoftmaxWorkloadTest<ClSoftmaxWorkload, armnn::DataType::Float32>();
808 }
809 
810 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSoftmaxFloat16WorkloadTest")
811 {
812  ClSoftmaxWorkloadTest<ClSoftmaxWorkload, armnn::DataType::Float16>();
813 }
814 
815 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSoftmaxQAsymmU8Workload")
816 {
817  ClSoftmaxWorkloadTest<ClSoftmaxWorkload, armnn::DataType::QAsymmU8>();
818 }
819 
820 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSoftmaxQAsymmS8Workload")
821 {
822  ClSoftmaxWorkloadTest<ClSoftmaxWorkload, armnn::DataType::QAsymmS8>();
823 }
824 
825 template <typename armnn::DataType DataType>
826 static void ClSplitterWorkloadTest()
827 {
828  Graph graph;
829  ClWorkloadFactory factory =
830  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
831 
832  auto workload = CreateSplitterWorkloadTest<ClSplitterWorkload, DataType>(factory, graph);
833 
834  // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest).
835  SplitterQueueDescriptor queueDescriptor = workload->GetData();
836  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
837  auto predResult = CompareIClTensorHandleShape(inputHandle, {5, 7, 7});
838  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
839 
840  auto outputHandle1 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]);
841  predResult = CompareIClTensorHandleShape(outputHandle1, {2, 7, 7});
842  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
843 
844  auto outputHandle2 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[2]);
845  predResult = CompareIClTensorHandleShape(outputHandle2, {2, 7, 7});
846  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
847 
848  auto outputHandle0 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
849  predResult = CompareIClTensorHandleShape(outputHandle0, {1, 7, 7});
850  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
851 }
852 
853 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSplitterFloatWorkload")
854 {
855  ClSplitterWorkloadTest<armnn::DataType::Float32>();
856 }
857 
858 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSplitterFloat16Workload")
859 {
860  ClSplitterWorkloadTest<armnn::DataType::Float16>();
861 }
862 
863 template <typename armnn::DataType DataType>
864 static void ClSplitterConcatTest()
865 {
866  // Tests that it is possible to decide which output of the splitter layer
867  // should be lined to which input of the concat layer.
868  // We test that is is possible to specify 0th output
869  // of the splitter to be the 1st input to the concat and the 1st output of the splitter to be 0th input
870  // of the concat.
871 
872  Graph graph;
873  ClWorkloadFactory factory =
874  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
875 
876  auto workloads =
877  CreateSplitterConcatWorkloadTest<ClSplitterWorkload, ClConcatWorkload, DataType>
878  (factory, graph);
879 
880  auto wlSplitter = std::move(workloads.first);
881  auto wlConcat = std::move(workloads.second);
882 
883  //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction.
884  armnn::ClSubTensorHandle* sOut0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
885  armnn::ClSubTensorHandle* sOut1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
886  armnn::ClSubTensorHandle* mIn0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlConcat->GetData().m_Inputs[0]);
887  armnn::ClSubTensorHandle* mIn1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlConcat->GetData().m_Inputs[1]);
888 
889  CHECK(sOut0);
890  CHECK(sOut1);
891  CHECK(mIn0);
892  CHECK(mIn1);
893 
894  //Fliped order of inputs/outputs.
895  bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0);
896  CHECK(validDataPointers);
897 
898 
899  //Also make sure that the inputs are subtensors of one tensor and outputs are sub tensors of another tensor.
900  bool validSubTensorParents = (mIn0->GetTensor().parent() == mIn1->GetTensor().parent())
901  && (sOut0->GetTensor().parent() == sOut1->GetTensor().parent());
902 
903  CHECK(validSubTensorParents);
904 }
905 
906 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSplitterConcatFloatWorkload")
907 {
908  ClSplitterConcatTest<armnn::DataType::Float32>();
909 }
910 
911 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSplitterConcatFloat16Workload")
912 {
913  ClSplitterConcatTest<armnn::DataType::Float16>();
914 }
915 
916 
917 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSingleOutputMultipleInputs")
918 {
919  // Test that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer.
920  // We create a splitter with two outputs. That each of those outputs is used by two different activation layers.
921 
922  Graph graph;
923  ClWorkloadFactory factory =
924  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
925 
926  std::unique_ptr<ClSplitterWorkload> wlSplitter;
927  std::unique_ptr<ClActivationWorkload> wlActiv0_0;
928  std::unique_ptr<ClActivationWorkload> wlActiv0_1;
929  std::unique_ptr<ClActivationWorkload> wlActiv1_0;
930  std::unique_ptr<ClActivationWorkload> wlActiv1_1;
931 
932  CreateSplitterMultipleInputsOneOutputWorkloadTest<ClSplitterWorkload,
933  ClActivationWorkload, armnn::DataType::Float32>(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1,
934  wlActiv1_0, wlActiv1_1);
935 
936  //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction.
937  armnn::ClSubTensorHandle* sOut0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
938  armnn::ClSubTensorHandle* sOut1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
939  armnn::ClSubTensorHandle* activ0_0Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv0_0->GetData().m_Inputs[0]);
940  armnn::ClSubTensorHandle* activ0_1Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv0_1->GetData().m_Inputs[0]);
941  armnn::ClSubTensorHandle* activ1_0Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv1_0->GetData().m_Inputs[0]);
942  armnn::ClSubTensorHandle* activ1_1Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv1_1->GetData().m_Inputs[0]);
943 
944 
945  CHECK(sOut0);
946  CHECK(sOut1);
947  CHECK(activ0_0Im);
948  CHECK(activ0_1Im);
949  CHECK(activ1_0Im);
950  CHECK(activ1_1Im);
951 
952  bool validDataPointers = (sOut0 == activ0_0Im) && (sOut0 == activ0_1Im) &&
953  (sOut1 == activ1_0Im) && (sOut1 == activ1_1Im);
954 
955  CHECK(validDataPointers);
956 }
957 
958 #if defined(ARMNNREF_ENABLED)
959 
960 // This test unit needs the reference backend, it's not available if the reference backend is not built
961 
962 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMemCopyWorkloadsCl")
963 {
964  ClWorkloadFactory factory =
965  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
966 
967  CreateMemCopyWorkloads<IClTensorHandle>(factory);
968 }
969 
970 #endif
971 
972 template <typename L2NormalizationWorkloadType, typename armnn::DataType DataType>
973 static void ClL2NormalizationWorkloadTest(DataLayout dataLayout)
974 {
975  Graph graph;
976  ClWorkloadFactory factory =
977  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
978 
979  auto workload =
980  CreateL2NormalizationWorkloadTest<L2NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
981 
982  // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
983  L2NormalizationQueueDescriptor queueDescriptor = workload->GetData();
984  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
985  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
986 
987  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 5, 20, 50, 67 })
988  : std::initializer_list<unsigned int>({ 5, 50, 67, 20 });
989  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 5, 20, 50, 67 })
990  : std::initializer_list<unsigned int>({ 5, 50, 67, 20 });
991 
992  CHECK((inputHandle->GetShape() == inputShape));
993  CHECK((outputHandle->GetShape() == outputShape));
994 }
995 
996 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateL2NormalizationFloatNchwWorkload")
997 {
998  ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
999 }
1000 
1001 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateL2NormalizationFloatNhwcWorkload")
1002 {
1003  ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
1004 }
1005 
1006 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateL2NormalizationFloat16NchwWorkload")
1007 {
1008  ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
1009 }
1010 
1011 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateL2NormalizationFloat16NhwcWorkload")
1012 {
1013  ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
1014 }
1015 
1016 template <typename LogSoftmaxWorkloadType, typename armnn::DataType DataType>
1017 static void ClCreateLogSoftmaxWorkloadTest()
1018 {
1019  Graph graph;
1020  ClWorkloadFactory factory =
1021  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1022 
1023  auto workload = CreateLogSoftmaxWorkloadTest<LogSoftmaxWorkloadType, DataType>(factory, graph);
1024 
1025  // Checks that outputs and inputs are as we expect them (see definition of CreateLogSoftmaxWorkloadTest).
1026  LogSoftmaxQueueDescriptor queueDescriptor = workload->GetData();
1027  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1028  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1029 
1030  auto predResult = CompareIClTensorHandleShape(inputHandle, {4, 1});
1031  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1032  predResult = CompareIClTensorHandleShape(outputHandle, {4, 1});
1033  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1034 }
1035 
1036 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateLogSoftmaxFloat32WorkloadTest")
1037 {
1038  ClCreateLogSoftmaxWorkloadTest<ClLogSoftmaxWorkload, armnn::DataType::Float32>();
1039 }
1040 
1041 template <typename LstmWorkloadType>
1042 static void ClCreateLstmWorkloadTest()
1043 {
1044  Graph graph;
1045  ClWorkloadFactory factory =
1046  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1047 
1048  auto workload = CreateLstmWorkloadTest<LstmWorkloadType>(factory, graph);
1049 
1050  LstmQueueDescriptor queueDescriptor = workload->GetData();
1051  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1052  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]);
1053  auto predResult = CompareIClTensorHandleShape(inputHandle, {2, 2});
1054  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1055  predResult = CompareIClTensorHandleShape(outputHandle, {2, 4});
1056  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1057 }
1058 
1059 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateLSTMWorkloadFloatWorkload")
1060 {
1061  ClCreateLstmWorkloadTest<ClLstmFloatWorkload>();
1062 }
1063 
1064 template <typename ResizeWorkloadType, typename armnn::DataType DataType>
1065 static void ClResizeWorkloadTest(DataLayout dataLayout)
1066 {
1067  Graph graph;
1068  ClWorkloadFactory factory =
1069  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1070 
1071  auto workload = CreateResizeBilinearWorkloadTest<ResizeWorkloadType, DataType>(factory, graph, dataLayout);
1072 
1073  auto queueDescriptor = workload->GetData();
1074 
1075  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1076  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1077 
1078  armnn::PredicateResult predResult(true);
1079  switch (dataLayout)
1080  {
1081  case DataLayout::NHWC:
1082  predResult = CompareIClTensorHandleShape(inputHandle, { 2, 4, 4, 3 });
1083  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1084  predResult = CompareIClTensorHandleShape(outputHandle, { 2, 2, 2, 3 });
1085  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1086  break;
1087  default: // DataLayout::NCHW
1088  predResult = CompareIClTensorHandleShape(inputHandle, { 2, 3, 4, 4 });
1089  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1090  predResult = CompareIClTensorHandleShape(outputHandle, { 2, 3, 2, 2 });
1091  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1092  }
1093 }
1094 
1095 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateResizeFloat32NchwWorkload")
1096 {
1097  ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
1098 }
1099 
1100 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateResizeFloat16NchwWorkload")
1101 {
1102  ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
1103 }
1104 
1105 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateResizeUint8NchwWorkload")
1106 {
1107  ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::QAsymmU8>(DataLayout::NCHW);
1108 }
1109 
1110 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateResizeFloat32NhwcWorkload")
1111 {
1112  ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
1113 }
1114 
1115 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateResizeFloat16NhwcWorkload")
1116 {
1117  ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
1118 }
1119 
1120 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateResizeUint8NhwcWorkload")
1121 {
1122  ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::QAsymmU8>(DataLayout::NHWC);
1123 }
1124 
1125 template <typename MeanWorkloadType, typename armnn::DataType DataType>
1126 static void ClMeanWorkloadTest()
1127 {
1128  Graph graph;
1129  ClWorkloadFactory factory =
1130  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1131 
1132  auto workload = CreateMeanWorkloadTest<MeanWorkloadType, DataType>(factory, graph);
1133 
1134  // Checks that inputs/outputs are as we expect them (see definition of CreateMeanWorkloadTest).
1135  MeanQueueDescriptor queueDescriptor = workload->GetData();
1136  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1137  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1138 
1139  // The first dimension (batch size) in both input and output is singular thus it has been reduced by ACL.
1140  auto predResult = CompareIClTensorHandleShape(inputHandle, { 1, 3, 7, 4 });
1141  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1142  predResult = CompareIClTensorHandleShape(outputHandle, { 1, 4 });
1143  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1144 }
1145 
1146 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMeanFloat32Workload")
1147 {
1148  ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::Float32>();
1149 }
1150 
1151 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMeanFloat16Workload")
1152 {
1153  ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::Float16>();
1154 }
1155 
1156 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMeanUint8Workload")
1157 {
1158  ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::QAsymmU8>();
1159 }
1160 
1161 template <typename ConcatWorkloadType, armnn::DataType DataType>
1162 static void ClCreateConcatWorkloadTest(std::initializer_list<unsigned int> outputShape,
1163  unsigned int concatAxis)
1164 {
1165  Graph graph;
1166  ClWorkloadFactory factory =
1167  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1168 
1169  auto workload = CreateConcatWorkloadTest<ConcatWorkloadType, DataType>(factory, graph, outputShape, concatAxis);
1170 
1171  ConcatQueueDescriptor queueDescriptor = workload->GetData();
1172  auto inputHandle0 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1173  auto inputHandle1 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
1174  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1175 
1176  auto predResult = CompareIClTensorHandleShape(inputHandle0, { 2, 3, 2, 5 });
1177  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1178  predResult = CompareIClTensorHandleShape(inputHandle1, { 2, 3, 2, 5 });
1179  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1180  predResult = CompareIClTensorHandleShape(outputHandle, outputShape);
1181  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1182 }
1183 
1184 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConcatDim0Float32Workload")
1185 {
1186  ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::Float32>({ 4, 3, 2, 5 }, 0);
1187 }
1188 
1189 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConcatDim1Float32Workload")
1190 {
1191  ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::Float32>({ 2, 6, 2, 5 }, 1);
1192 }
1193 
1194 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConcatDim3Float32Workload")
1195 {
1196  ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::Float32>({ 2, 3, 2, 10 }, 3);
1197 }
1198 
1199 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConcatDim0Uint8Workload")
1200 {
1201  ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::QAsymmU8>({ 4, 3, 2, 5 }, 0);
1202 }
1203 
1204 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConcatDim1Uint8Workload")
1205 {
1206  ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::QAsymmU8>({ 2, 6, 2, 5 }, 1);
1207 }
1208 
1209 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConcatDim3Uint8Workload")
1210 {
1211  ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::QAsymmU8>({ 2, 3, 2, 10 }, 3);
1212 }
1213 
1214 template <typename SpaceToDepthWorkloadType, typename armnn::DataType DataType>
1215 static void ClSpaceToDepthWorkloadTest()
1216 {
1217  Graph graph;
1218  ClWorkloadFactory factory =
1219  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1220 
1221  auto workload = CreateSpaceToDepthWorkloadTest<SpaceToDepthWorkloadType, DataType>(factory, graph);
1222 
1223  SpaceToDepthQueueDescriptor queueDescriptor = workload->GetData();
1224  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1225  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1226 
1227  auto predResult = CompareIClTensorHandleShape(inputHandle, { 1, 2, 2, 1 });
1228  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1229  predResult = CompareIClTensorHandleShape(outputHandle, { 1, 1, 1, 4 });
1230  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1231 }
1232 
1233 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSpaceToDepthFloat32Workload")
1234 {
1235  ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::Float32>();
1236 }
1237 
1238 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSpaceToDepthFloat16Workload")
1239 {
1240  ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::Float16>();
1241 }
1242 
1243 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSpaceToDepthQAsymm8Workload")
1244 {
1245  ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::QAsymmU8>();
1246 }
1247 
1248 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSpaceToDepthQSymm16Workload")
1249 {
1250  ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::QSymmS16>();
1251 }
1252 
1253 template <armnn::DataType DataType>
1254 static void ClCreateStackWorkloadTest(const std::initializer_list<unsigned int>& inputShape,
1255  const std::initializer_list<unsigned int>& outputShape,
1256  unsigned int axis,
1257  unsigned int numInputs)
1258 {
1259  armnn::Graph graph;
1260  ClWorkloadFactory factory =
1261  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1262 
1263  auto workload = CreateStackWorkloadTest<ClStackWorkload, DataType>(factory,
1264  graph,
1265  TensorShape(inputShape),
1266  TensorShape(outputShape),
1267  axis,
1268  numInputs);
1269 
1270  // Check inputs and output are as expected
1271  StackQueueDescriptor queueDescriptor = workload->GetData();
1272  for (unsigned int i = 0; i < numInputs; ++i)
1273  {
1274  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[i]);
1275  auto predResult1 = CompareIClTensorHandleShape(inputHandle, inputShape);
1276  CHECK_MESSAGE(predResult1.m_Result, predResult1.m_Message.str());
1277  }
1278  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1279  auto predResult2 = CompareIClTensorHandleShape(outputHandle, outputShape);
1280  CHECK_MESSAGE(predResult2.m_Result, predResult2.m_Message.str());
1281 }
1282 
1283 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateStackFloat32Workload")
1284 {
1285  ClCreateStackWorkloadTest<armnn::DataType::Float32>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
1286 }
1287 
1288 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateStackFloat16Workload")
1289 {
1290  ClCreateStackWorkloadTest<armnn::DataType::Float16>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
1291 }
1292 
1293 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateStackUint8Workload")
1294 {
1295  ClCreateStackWorkloadTest<armnn::DataType::QAsymmU8>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
1296 }
1297 
1298 
1299 template <typename QLstmWorkloadType>
1300 static void ClCreateQLstmWorkloadTest()
1301 {
1302  Graph graph;
1303  ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1304 
1305  auto workload = CreateQLstmWorkloadTest<QLstmWorkloadType>(factory, graph);
1306  QLstmQueueDescriptor queueDescriptor = workload->GetData();
1307 
1308  IAclTensorHandle* inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
1309  CHECK((inputHandle->GetShape() == TensorShape({2, 4})));
1310  CHECK((inputHandle->GetDataType() == arm_compute::DataType::QASYMM8_SIGNED));
1311 
1312  IAclTensorHandle* cellStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
1313  CHECK((cellStateOutHandle->GetShape() == TensorShape({2, 4})));
1314  CHECK((cellStateOutHandle->GetDataType() == arm_compute::DataType::QSYMM16));
1315 
1316  IAclTensorHandle* outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[2]);
1317  CHECK((outputHandle->GetShape() == TensorShape({2, 4})));
1318  CHECK((outputHandle->GetDataType() == arm_compute::DataType::QASYMM8_SIGNED));
1319 }
1320 
1321 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateQLstmWorkloadTest")
1322 {
1323  ClCreateQLstmWorkloadTest<ClQLstmWorkload>();
1324 }
1325 
1326 template <typename QuantizedLstmWorkloadType>
1327 static void ClCreateQuantizedLstmWorkloadTest()
1328 {
1329  using namespace armnn::armcomputetensorutils;
1330 
1331  Graph graph;
1332  ClWorkloadFactory factory =
1333  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1334 
1335  auto workload = CreateQuantizedLstmWorkloadTest<QuantizedLstmWorkloadType>(factory, graph);
1336 
1337  QuantizedLstmQueueDescriptor queueDescriptor = workload->GetData();
1338 
1339  IAclTensorHandle* inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
1340  CHECK((inputHandle->GetShape() == TensorShape({2, 2})));
1341  CHECK((inputHandle->GetDataType() == arm_compute::DataType::QASYMM8));
1342 
1343  IAclTensorHandle* cellStateInHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
1344  CHECK((cellStateInHandle->GetShape() == TensorShape({2, 4})));
1345  CHECK((cellStateInHandle->GetDataType() == arm_compute::DataType::QSYMM16));
1346 
1347  IAclTensorHandle* outputStateInHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[2]);
1348  CHECK((outputStateInHandle->GetShape() == TensorShape({2, 4})));
1349  CHECK((outputStateInHandle->GetDataType() == arm_compute::DataType::QASYMM8));
1350 
1351  IAclTensorHandle* cellStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
1352  CHECK((cellStateOutHandle->GetShape() == TensorShape({2, 4})));
1353  CHECK((cellStateOutHandle->GetDataType() == arm_compute::DataType::QSYMM16));
1354 
1355  IAclTensorHandle* outputStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
1356  CHECK((outputStateOutHandle->GetShape() == TensorShape({2, 4})));
1357  CHECK((outputStateOutHandle->GetDataType() == arm_compute::DataType::QASYMM8));
1358 }
1359 
1360 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateQuantizedLstmWorkload")
1361 {
1362  ClCreateQuantizedLstmWorkloadTest<ClQuantizedLstmWorkload>();
1363 }
1364 
1365 template <armnn::DataType DataType>
1366 static void ClCreateActivationWorkloadReplaceFunctionsTest()
1367 {
1368  std::shared_ptr<ClMemoryManager> memoryManager = std::make_shared<ClMemoryManager>(
1369  std::make_unique<arm_compute::CLBufferAllocator>());
1370 
1371  Graph graph;
1372  ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(memoryManager);
1373  // input and output are created as armnn::TensorInfo tensorInfo({1, 1}, DataType)
1374  auto workloadPtr = CreateActivationWorkloadTest<ClActivationWorkload, DataType>(factory, graph);
1375 
1376  // new input and output tensor handlers are created and then replace in the workload
1377  const ClTensorHandleFactory tensorHandleFactory(memoryManager);
1378  TensorInfo inputInfo({2 , 2}, DataType::Float16);
1379  TensorInfo outputInfo({2 , 2}, DataType::Float16);
1380  unique_ptr<ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputInfo, true);
1381  inputHandle->Manage();
1382  inputHandle->Allocate();
1383  unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo, true);
1384  outputHandle->Manage();
1385  outputHandle->Allocate();
1386 
1387  unsigned int slot = 0;
1388  CHECK_THROWS_AS(workloadPtr->ReplaceInputTensorHandle(inputHandle.get(), slot), UnimplementedException);
1389  CHECK_THROWS_AS(workloadPtr->ReplaceOutputTensorHandle(outputHandle.get(), slot), UnimplementedException);
1390 }
1391 
1392 TEST_CASE("ClReplaceFunctionsfromFloat32toFloat16ActivationWorkload")
1393 {
1394  ClCreateActivationWorkloadReplaceFunctionsTest<armnn::DataType::Float32>();
1395 }
1396 
1397 }
uint32_t m_PadBottom
Padding bottom value in the height dimension.
bool m_BiasEnabled
Enable/disable bias.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
DataLayout
Definition: Types.hpp:62
armnn::PredicateResult CompareIClTensorHandleShape(IClTensorHandle *tensorHandle, std::initializer_list< unsigned int > expectedDimensions)
TEST_SUITE("CreateWorkloadCl")
std::vector< BackendOptions > ModelOptions
A Convolution2dDescriptor for the Convolution2dLayer.
uint32_t m_PadRight
Padding right value in the width dimension.
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
void AllocateAndCopyDataToITensorHandle(armnn::ITensorHandle *tensorHandle, const void *memory)
virtual arm_compute::DataType GetDataType() const =0
uint32_t m_PadTop
Padding top value in the height dimension.
TEST_CASE_FIXTURE(ClContextControlFixture, "CopyBetweenNeonAndGpu")
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
DataType
Definition: Types.hpp:48
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
This factory creates ClImportTensorHandles that refer to imported memory tensors. ...
virtual TensorShape GetShape() const =0
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
This layer represents an addition operation.
Struct for the users to pass backend specific options.
This layer represents a subtraction operation.
std::vector< ITensorHandle * > m_Outputs
This layer represents a division operation.
UnaryOperation
Definition: Types.hpp:124
Contains information about TensorInfos of a layer.
void SetQuantizationOffset(int32_t offset)
Definition: Tensor.cpp:489
std::vector< ITensorHandle * > m_Inputs
std::unique_ptr< ITensorHandle > CreateTensorHandle(const TensorInfo &tensorInfo) const override
This layer represents a multiplication operation.
arm_compute::CLSubTensor & GetTensor() override
Depthwise Convolution 2D layer workload data.
uint32_t m_PadLeft
Padding left value in the width dimension.
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below...
Definition: Types.hpp:468