ArmNN
 22.02
ClCreateWorkloadTests.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
8 
16 
19 
22 #include <cl/ClTensorHandle.hpp>
23 #include <cl/ClWorkloadFactory.hpp>
26 
27 #include <doctest/doctest.h>
28 
30  std::initializer_list<unsigned int> expectedDimensions)
31 {
32  return CompareTensorHandleShape<IClTensorHandle>(tensorHandle, expectedDimensions);
33 }
34 
35 TEST_SUITE("CreateWorkloadCl")
36 {
37 template <armnn::DataType DataType>
38 static void ClCreateActivationWorkloadTest()
39 {
40  Graph graph;
41  ClWorkloadFactory factory =
42  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
43 
44  auto workload = CreateActivationWorkloadTest<ClActivationWorkload, DataType>(factory, graph);
45 
46  // Checks that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest).
47  ActivationQueueDescriptor queueDescriptor = workload->GetData();
48  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
49  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
50 
51  auto predResult = CompareIClTensorHandleShape(inputHandle, {1, 1});
52  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
53 
54  predResult = CompareIClTensorHandleShape(outputHandle, {1, 1});
55  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
56 }
57 
58 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateActivationFloatWorkload")
59 {
60  ClCreateActivationWorkloadTest<armnn::DataType::Float32>();
61 }
62 
63 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateActivationFloat16Workload")
64 {
65  ClCreateActivationWorkloadTest<armnn::DataType::Float16>();
66 }
67 
68 template <typename WorkloadType,
69  typename DescriptorType,
70  typename LayerType,
72 static void ClCreateElementwiseWorkloadTest()
73 {
74  Graph graph;
75  ClWorkloadFactory factory =
76  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
77 
78  auto workload = CreateElementwiseWorkloadTest<WorkloadType, DescriptorType, LayerType, DataType>(factory, graph);
79 
80  // Checks that inputs/outputs are as we expect them (see definition of CreateElementwiseWorkloadTest).
81  DescriptorType queueDescriptor = workload->GetData();
82  auto inputHandle1 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
83  auto inputHandle2 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
84  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
85  auto predResult = CompareIClTensorHandleShape(inputHandle1, {2, 3});
86  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
87  predResult = CompareIClTensorHandleShape(inputHandle2, {2, 3});
88  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
89  predResult = CompareIClTensorHandleShape(outputHandle, {2, 3});
90  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
91 }
92 
93 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateAdditionFloatWorkload")
94 {
95  ClCreateElementwiseWorkloadTest<ClAdditionWorkload,
99 }
100 
101 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateAdditionFloat16Workload")
102 {
103  ClCreateElementwiseWorkloadTest<ClAdditionWorkload,
107 }
108 
109 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSubtractionFloatWorkload")
110 {
111  ClCreateElementwiseWorkloadTest<ClSubtractionWorkload,
115 }
116 
117 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSubtractionFloat16Workload")
118 {
119  ClCreateElementwiseWorkloadTest<ClSubtractionWorkload,
123 }
124 
125 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMultiplicationFloatWorkloadTest")
126 {
127  ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload,
131 }
132 
133 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMultiplicationFloat16WorkloadTest")
134 {
135  ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload,
139 }
140 
141 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMultiplicationUint8WorkloadTest")
142 {
143  ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload,
147 }
148 
149 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDivisionFloatWorkloadTest")
150 {
151  ClCreateElementwiseWorkloadTest<ClDivisionWorkload,
155 }
156 
157 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDivisionFloat16WorkloadTest")
158 {
159  ClCreateElementwiseWorkloadTest<ClDivisionWorkload,
163 }
164 
165 template <typename WorkloadType,
166  typename DescriptorType,
168 static void ClCreateElementwiseUnaryWorkloadTest(armnn::UnaryOperation op)
169 {
170  Graph graph;
171  ClWorkloadFactory factory =
172  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
173 
174  auto workload = CreateElementwiseUnaryWorkloadTest<WorkloadType, DescriptorType, DataType>(factory, graph, op);
175 
176  DescriptorType queueDescriptor = workload->GetData();
177 
178  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
179  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
180 
181  auto predResult = CompareIClTensorHandleShape(inputHandle, {2, 3});
182  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
183 
184  predResult = CompareIClTensorHandleShape(outputHandle, {2, 3});
185  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
186 }
187 
188 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateRsqrtFloat32WorkloadTest")
189 {
190  ClCreateElementwiseUnaryWorkloadTest<ClRsqrtWorkload, RsqrtQueueDescriptor, armnn::DataType::Float32>(
191  UnaryOperation::Rsqrt);
192 }
193 
194 template <typename BatchNormalizationWorkloadType, armnn::DataType DataType>
195 static void ClCreateBatchNormalizationWorkloadTest(DataLayout dataLayout)
196 {
197  Graph graph;
198  ClWorkloadFactory factory =
199  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
200 
201  auto workload = CreateBatchNormalizationWorkloadTest<BatchNormalizationWorkloadType, DataType>
202  (factory, graph, dataLayout);
203 
204  // Checks that inputs/outputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest).
205  BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData();
206  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
207  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
208 
209  armnn::PredicateResult predResult(true);
210  switch (dataLayout)
211  {
212  case DataLayout::NHWC:
213  predResult = CompareIClTensorHandleShape(inputHandle, { 2, 4, 4, 3 });
214  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
215  predResult = CompareIClTensorHandleShape(outputHandle, { 2, 4, 4, 3 });
216  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
217  break;
218  default: // NCHW
219  predResult = CompareIClTensorHandleShape(inputHandle, { 2, 3, 4, 4 });
220  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
221  predResult = CompareIClTensorHandleShape(outputHandle, { 2, 3, 4, 4 });
222  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
223  }
224 }
225 
226 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateBatchNormalizationFloatNchwWorkload")
227 {
228  ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
229  armnn::DataType::Float32>(DataLayout::NCHW);
230 }
231 
232 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateBatchNormalizationFloat16NchwWorkload")
233 {
234  ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
235  armnn::DataType::Float16>(DataLayout::NCHW);
236 }
237 
238 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateBatchNormalizationFloatNhwcWorkload")
239 {
240  ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
241  armnn::DataType::Float32>(DataLayout::NHWC);
242 }
243 
244 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateBatchNormalizationNhwcFloat16NhwcWorkload")
245 {
246  ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
247  armnn::DataType::Float16>(DataLayout::NHWC);
248 }
249 
250 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvertFp16ToFp32Workload")
251 {
252  Graph graph;
253  ClWorkloadFactory factory =
254  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
255 
256  auto workload = CreateConvertFp16ToFp32WorkloadTest<ClConvertFp16ToFp32Workload>(factory, graph);
257 
258  ConvertFp16ToFp32QueueDescriptor queueDescriptor = workload->GetData();
259  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
260  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
261  auto predResult = CompareIClTensorHandleShape(inputHandle, {1, 3, 2, 3});
262  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
263  predResult = CompareIClTensorHandleShape(outputHandle, {1, 3, 2, 3});
264  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
265  CHECK((inputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F16));
266  CHECK((outputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F32));
267 }
268 
269 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvertFp32ToFp16Workload")
270 {
271  Graph graph;
272  ClWorkloadFactory factory =
273  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
274 
275  auto workload = CreateConvertFp32ToFp16WorkloadTest<ClConvertFp32ToFp16Workload>(factory, graph);
276 
277  ConvertFp32ToFp16QueueDescriptor queueDescriptor = workload->GetData();
278  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
279  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
280 
281  auto predResult = CompareIClTensorHandleShape(inputHandle, {1, 3, 2, 3});
282  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
283  predResult = CompareIClTensorHandleShape(outputHandle, {1, 3, 2, 3});
284  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
285  CHECK((inputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F32));
286  CHECK((outputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F16));
287 }
288 
289 template <typename Convolution2dWorkloadType, typename armnn::DataType DataType>
290 static void ClConvolution2dWorkloadTest(DataLayout dataLayout)
291 {
292  Graph graph;
293  ClWorkloadFactory factory =
294  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
295 
296  auto workload = CreateConvolution2dWorkloadTest<ClConvolution2dWorkload, DataType>(factory,
297  graph,
298  dataLayout);
299 
300  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({2, 3, 8, 16})
301  : std::initializer_list<unsigned int>({2, 8, 16, 3});
302  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({2, 2, 2, 10})
303  : std::initializer_list<unsigned int>({2, 2, 10, 2});
304 
305  // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest).
306  Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
307  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
308  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
309  CHECK((inputHandle->GetShape() == inputShape));
310  CHECK((outputHandle->GetShape() == outputShape));
311 }
312 
313 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvolution2dFloatNchwWorkload")
314 {
315  ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
316 }
317 
318 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvolution2dFloatNhwcWorkload")
319 {
320  ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
321 }
322 
323 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvolution2dFloat16NchwWorkload")
324 {
325  ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
326 }
327 
328 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvolution2dFloat16NhwcWorkload")
329 {
330  ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
331 }
332 
333 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvolution2dFastMathEnabledWorkload")
334 {
335  Graph graph;
336 
337  using ModelOptions = std::vector<BackendOptions>;
338  ModelOptions modelOptions = {};
339  BackendOptions gpuAcc("GpuAcc",
340  {
341  { "FastMathEnabled", true }
342  });
343  modelOptions.push_back(gpuAcc);
344 
345  ClWorkloadFactory factory =
346  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager(), modelOptions);
347 
348  auto workload =
349  CreateConvolution2dWorkloadFastMathTest<ClConvolution2dWorkload, armnn::DataType::Float32>(factory,
350  graph,
351  DataLayout::NCHW,
352  modelOptions);
353 
354  ARMNN_ASSERT(workload != nullptr);
355  auto conv2dWorkload = PolymorphicDowncast<ClConvolution2dWorkload*>(workload.get());
356  IgnoreUnused(conv2dWorkload);
357  ARMNN_ASSERT(conv2dWorkload != nullptr);
358  ARMNN_ASSERT(conv2dWorkload->GetConvolutionMethod() == arm_compute::ConvolutionMethod::WINOGRAD);
359 }
360 
361 TEST_CASE_FIXTURE(ClContextControlFixture, "ClReplaceInputOutputConvolution2dWorkload")
362 {
363  // Create Convolution2dWorkload with ClTensorHandle input and output
364  // Then replace the input and output with ClImportTensorHandle
365  Graph graph;
366  ClWorkloadFactory factory =
367  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
368 
369  auto workload =
370  CreateConvolution2dWorkloadTest<ClConvolution2dWorkload, DataType::Float32>(factory,
371  graph,
372  DataLayout::NHWC);
373 
374  TensorShape inputShape = std::initializer_list<unsigned int>({2, 8, 16, 3});
375  TensorShape outputShape = std::initializer_list<unsigned int>({2, 2, 10, 2});
376 
377  // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest).
378  Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
379  auto inputHandle = PolymorphicDowncast<ITensorHandle*>(queueDescriptor.m_Inputs[0]);
380  auto outputHandle = PolymorphicDowncast<ITensorHandle*>(queueDescriptor.m_Outputs[0]);
381  CHECK((inputHandle->GetShape() == inputShape));
382  CHECK((outputHandle->GetShape() == outputShape));
383  // The input and output handles are created correctly as ClTensorHandle
384  CHECK((dynamic_cast<ClTensorHandle*>(inputHandle) != nullptr));
385  CHECK((dynamic_cast<ClTensorHandle*>(outputHandle) != nullptr));
386 
387  // Replace with ImportTensorHandle
388  ClImportTensorHandleFactory importFactory(static_cast<MemorySourceFlags>(MemorySource::Malloc),
389  static_cast<MemorySourceFlags>(MemorySource::Malloc));
390 
391  TensorInfo inputInfo({ 2, 8, 16, 3 }, DataType::Float32);
392  TensorInfo outputInfo({ 2, 2, 10, 2 }, DataType::Float32);
393 
394  // create TensorHandle for memory import
395  auto inputImportHandle = importFactory.CreateTensorHandle(inputInfo);
396  auto outputImportHandle = importFactory.CreateTensorHandle(outputInfo);
397 
398  // Calling ReplaceInputTensorHandle and ReplaceOutputTensorHandle does not throw exception
399  // as Reconfigure function is implemented
400  workload->ReplaceInputTensorHandle(inputImportHandle.get(), 0);
401  workload->ReplaceOutputTensorHandle(outputImportHandle.get(), 0);
402 
403  // Correctly replaced with the import handles with correct information
404  queueDescriptor = workload->GetData();
405  auto replacedInputHandle = PolymorphicDowncast<ITensorHandle*>(queueDescriptor.m_Inputs[0]);
406  auto replacedOutputHandle = PolymorphicDowncast<ITensorHandle*>(queueDescriptor.m_Outputs[0]);
407  CHECK((replacedInputHandle->GetShape() == inputShape));
408  CHECK((replacedOutputHandle->GetShape() == outputShape));
409 
410  CHECK((inputImportHandle.get() == replacedInputHandle));
411  CHECK((inputImportHandle.get() == replacedInputHandle));
412 
413  CHECK((dynamic_cast<ClTensorHandle*>(replacedInputHandle) == nullptr));
414  CHECK((dynamic_cast<ClImportTensorHandle*>(replacedInputHandle) != nullptr));
415  CHECK((dynamic_cast<ClTensorHandle*>(replacedOutputHandle) == nullptr));
416  CHECK((dynamic_cast<ClImportTensorHandle*>(replacedOutputHandle) != nullptr));
417 }
418 
419 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvolution2dClCompiledContextWorkload")
420 {
421  using namespace armnn;
422 
423  const DataType inputType = DataType::QAsymmU8;
424  const DataType kernelType = DataType::QSymmS8;
425  const DataType biasType = DataType::Signed32;
426 
427  TensorInfo inputInfo ({ 1, 3, 1, 2 }, inputType, 0.5f, 128);
428  TensorInfo outputInfo({ 1, 3, 1, 3 }, inputType, 1.0f, 128);
429 
430  const std::vector<float> quantScales{ 0.5f, 0.75f, 1.0f };
431  constexpr unsigned int quantDimension = 0;
432 
433  TensorInfo kernelInfo({ 3, 1, 1, 2 }, kernelType, quantScales, quantDimension);
434 
435  const std::vector<float> biasQuantScales{ 0.25f, 0.375f, 0.5f };
436  TensorInfo biasInfo({ 3 }, biasType, biasQuantScales, quantDimension);
437 
438  std::vector<uint8_t> inputData =
439  {
440  138, 108, 138, 108, 138, 108
441  };
442 
443  std::vector<int8_t> kernelData =
444  {
445  1, 2, 1, 2, 1, 2
446  };
447 
448  std::vector<int32_t> biasData =
449  {
450  4, 4, 4
451  };
452 
453  std::vector<uint8_t> expectedOutputData =
454  {
455  121, 118, 115, 121, 118, 115, 121, 118, 115
456  };
457 
458 
459  Convolution2dDescriptor descriptor;
460  descriptor.m_StrideX = 1;
461  descriptor.m_StrideY = 1;
462  descriptor.m_PadLeft = 0;
463  descriptor.m_PadRight = 0;
464  descriptor.m_PadTop = 0;
465  descriptor.m_PadBottom = 0;
466  descriptor.m_BiasEnabled = true;
467  descriptor.m_DataLayout = DataLayout::NHWC;
468 
469  auto memoryManager = ClWorkloadFactoryHelper::GetMemoryManager();
470  auto clMemoryManager = armnn::PolymorphicPointerDowncast<armnn::ClMemoryManager>(memoryManager);
471  auto tensorHandleFactory = ClWorkloadFactoryHelper::GetTensorHandleFactory(memoryManager);
472 
473  std::unique_ptr<ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputInfo);
474  std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo);
475 
476 
477  WorkloadInfo workloadInfo;
478  ScopedTensorHandle weightTensor(kernelInfo);
479  ScopedTensorHandle biasTensor(biasInfo);
480 
481  AllocateAndCopyDataToITensorHandle(&weightTensor, kernelData.data());
482  AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
483 
484  Convolution2dQueueDescriptor queueDescriptor;
485  queueDescriptor.m_Parameters = descriptor;
486  queueDescriptor.m_Weight = &weightTensor;
487  queueDescriptor.m_Bias = &biasTensor;
488 
489  AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get());
490  AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get());
491 
492  // Initialize our m_CLCompileContext using default device and context
493  auto context = arm_compute::CLKernelLibrary::get().context();
494  auto device = arm_compute::CLKernelLibrary::get().get_device();
495  auto clCompileContext = arm_compute::CLCompileContext(context, device);
496 
497 
498 
499  // Check built programs are empty in context
500  CHECK(clCompileContext.get_built_programs().empty());
501 
502  auto workload = std::make_unique<ClConvolution2dWorkload>(queueDescriptor,
503  workloadInfo,
504  clMemoryManager->GetIntraLayerManager(),
505  clCompileContext);
506  ARMNN_ASSERT(workload != nullptr);
507  // Check built programs are not empty in context
508  CHECK(!clCompileContext.get_built_programs().empty());
509 }
510 
511 template <typename DepthwiseConvolutionWorkloadType, typename armnn::DataType DataType>
512 static void ClDepthwiseConvolutionWorkloadTest(DataLayout dataLayout)
513 {
514  Graph graph;
515  ClWorkloadFactory factory =
516  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
517 
518  auto workload = CreateDepthwiseConvolution2dWorkloadTest<DepthwiseConvolutionWorkloadType, DataType>
519  (factory, graph, dataLayout);
520 
521  // Checks that inputs/outputs are as we expect them (see definition of CreateDepthwiseConvolution2dWorkloadTest).
522  DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData();
523  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
524  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
525 
526  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
527  : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
528  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
529  : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
530 
531  CHECK((inputHandle->GetShape() == inputShape));
532  CHECK((outputHandle->GetShape() == outputShape));
533 }
534 
535 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDepthwiseConvolutionFloat32NhwcWorkload")
536 {
537  ClDepthwiseConvolutionWorkloadTest<ClDepthwiseConvolutionWorkload, DataType::Float32>(DataLayout::NHWC);
538 }
539 
540 template <typename Convolution2dWorkloadType, typename armnn::DataType DataType>
541 static void ClDirectConvolution2dWorkloadTest()
542 {
543  Graph graph;
544  ClWorkloadFactory factory =
545  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
546 
547  auto workload = CreateDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, DataType>(factory, graph);
548 
549  // Checks that outputs and inputs are as we expect them (see definition of CreateDirectConvolution2dWorkloadTest).
550  Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
551  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
552  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
553  auto predResult = CompareIClTensorHandleShape(inputHandle, {2, 3, 6, 6});
554  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
555  predResult = CompareIClTensorHandleShape(outputHandle, {2, 2, 6, 6});
556  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
557 }
558 
559 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDirectConvolution2dFloatWorkload")
560 {
561  ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>();
562 }
563 
564 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDirectConvolution2dFloat16Workload")
565 {
566  ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>();
567 }
568 
569 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDirectConvolution2dUint8Workload")
570 {
571  ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::QAsymmU8>();
572 }
573 
574 template <typename FullyConnectedWorkloadType, typename armnn::DataType DataType>
575 static void ClCreateFullyConnectedWorkloadTest()
576 {
577  Graph graph;
578  ClWorkloadFactory factory =
579  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
580 
581  auto workload =
582  CreateFullyConnectedWorkloadTest<FullyConnectedWorkloadType, DataType>(factory, graph);
583 
584  // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest).
585  FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
586  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
587  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
588  auto predResult = CompareIClTensorHandleShape(inputHandle, {3, 1, 4, 5});
589  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
590  predResult = CompareIClTensorHandleShape(outputHandle, {3, 7});
591  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
592 }
593 
594 
595 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateFullyConnectedFloatWorkloadTest")
596 {
597  ClCreateFullyConnectedWorkloadTest<ClFullyConnectedWorkload, armnn::DataType::Float32>();
598 }
599 
600 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateFullyConnectedFloat16WorkloadTest")
601 {
602  ClCreateFullyConnectedWorkloadTest<ClFullyConnectedWorkload, armnn::DataType::Float16>();
603 }
604 
605 template <typename NormalizationWorkloadType, typename armnn::DataType DataType>
606 static void ClNormalizationWorkloadTest(DataLayout dataLayout)
607 {
608  Graph graph;
609  ClWorkloadFactory factory =
610  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
611 
612  auto workload = CreateNormalizationWorkloadTest<NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
613 
614  // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
615  NormalizationQueueDescriptor queueDescriptor = workload->GetData();
616  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
617  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
618 
619  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 5, 5, 1})
620  : std::initializer_list<unsigned int>({3, 1, 5, 5});
621  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 5, 5, 1})
622  : std::initializer_list<unsigned int>({3, 1, 5, 5});
623 
624  CHECK((inputHandle->GetShape() == inputShape));
625  CHECK((outputHandle->GetShape() == outputShape));
626 }
627 
628 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateNormalizationFloat32NchwWorkload")
629 {
630  ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
631 }
632 
633 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateNormalizationFloat16NchwWorkload")
634 {
635  ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
636 }
637 
638 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateNormalizationFloat32NhwcWorkload")
639 {
640  ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
641 }
642 
643 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateNormalizationFloat16NhwcWorkload")
644 {
645  ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
646 }
647 
648 template <typename armnn::DataType DataType>
649 static void ClPooling2dWorkloadTest(DataLayout dataLayout)
650 {
651  Graph graph;
652  ClWorkloadFactory factory =
653  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
654 
655  auto workload = CreatePooling2dWorkloadTest<ClPooling2dWorkload, DataType>(factory, graph, dataLayout);
656 
657  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 2, 5, 5})
658  : std::initializer_list<unsigned int>({3, 5, 5, 2});
659  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 2, 2, 4})
660  : std::initializer_list<unsigned int>({3, 2, 4, 2});
661 
662  // Check that inputs/outputs are as we expect them (see definition of CreatePooling2dWorkloadTest).
663  Pooling2dQueueDescriptor queueDescriptor = workload->GetData();
664  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
665  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
666 
667  CHECK((inputHandle->GetShape() == inputShape));
668  CHECK((outputHandle->GetShape() == outputShape));
669 }
670 
671 TEST_CASE_FIXTURE(ClContextControlFixture, "CreatePooling2dFloatNchwWorkload")
672 {
673  ClPooling2dWorkloadTest<armnn::DataType::Float32>(DataLayout::NCHW);
674 }
675 
676 TEST_CASE_FIXTURE(ClContextControlFixture, "CreatePooling2dFloatNhwcWorkload")
677 {
678  ClPooling2dWorkloadTest<armnn::DataType::Float32>(DataLayout::NHWC);
679 }
680 
681 TEST_CASE_FIXTURE(ClContextControlFixture, "CreatePooling2dFloat16NchwWorkload")
682 {
683  ClPooling2dWorkloadTest<armnn::DataType::Float16>(DataLayout::NCHW);
684 }
685 
686 TEST_CASE_FIXTURE(ClContextControlFixture, "CreatePooling2dFloat16NhwcWorkload")
687 {
688  ClPooling2dWorkloadTest<armnn::DataType::Float16>(DataLayout::NHWC);
689 }
690 
691 static void ClCreatePreluWorkloadTest(const armnn::TensorShape& inputShape,
692  const armnn::TensorShape& alphaShape,
693  const armnn::TensorShape& outputShape,
694  armnn::DataType dataType)
695 {
696  Graph graph;
697  ClWorkloadFactory factory =
698  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
699 
700  auto workload = CreatePreluWorkloadTest<ClPreluWorkload>(factory,
701  graph,
702  inputShape,
703  alphaShape,
704  outputShape,
705  dataType);
706 
707  // Checks that outputs and inputs are as we expect them (see definition of CreatePreluWorkloadTest).
708  PreluQueueDescriptor queueDescriptor = workload->GetData();
709  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
710  auto alphaHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
711  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
712 
713  CHECK((inputHandle->GetShape() == inputShape));
714  CHECK((alphaHandle->GetShape() == alphaShape));
715  CHECK((outputHandle->GetShape() == outputShape));
716 }
717 
718 TEST_CASE_FIXTURE(ClContextControlFixture, "CreatePreluFloat16Workload")
719 {
720  ClCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float16);
721 }
722 
723 TEST_CASE_FIXTURE(ClContextControlFixture, "CreatePreluFloatWorkload")
724 {
725  ClCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float32);
726 }
727 
728 TEST_CASE_FIXTURE(ClContextControlFixture, "CreatePreluUint8Workload")
729 {
730  ClCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::QAsymmU8);
731 }
732 
733 template <typename armnn::DataType DataType>
734 static void ClCreateReshapeWorkloadTest()
735 {
736  Graph graph;
737  ClWorkloadFactory factory =
738  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
739 
740  auto workload = CreateReshapeWorkloadTest<ClReshapeWorkload, DataType>(factory, graph);
741 
742  // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest).
743  ReshapeQueueDescriptor queueDescriptor = workload->GetData();
744  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
745  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
746 
747  auto predResult = CompareIClTensorHandleShape(inputHandle, {4, 1});
748  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
749  predResult = CompareIClTensorHandleShape(outputHandle, {1, 4});
750  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
751 }
752 
753 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateReshapeFloatWorkload")
754 {
755  ClCreateReshapeWorkloadTest<armnn::DataType::Float32>();
756 }
757 
758 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateReshapeFloat16Workload")
759 {
760  ClCreateReshapeWorkloadTest<armnn::DataType::Float16>();
761 }
762 
763 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateReshapeUint8Workload")
764 {
765  ClCreateReshapeWorkloadTest<armnn::DataType::QAsymmU8>();
766 }
767 
768 template <typename SoftmaxWorkloadType, typename armnn::DataType DataType>
769 static void ClSoftmaxWorkloadTest()
770 {
771  Graph graph;
772  ClWorkloadFactory factory =
773  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
774 
775  auto workload = CreateSoftmaxWorkloadTest<SoftmaxWorkloadType, DataType>(factory, graph);
776 
777  // Checks that inputs/outputs are as we expect them (see definition of ClSoftmaxFloatWorkload).
778  SoftmaxQueueDescriptor queueDescriptor = workload->GetData();
779  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
780  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
781 
782  armnn::TensorInfo tensorInfo({4, 1}, DataType);
784  {
785  tensorInfo.SetQuantizationOffset(0);
786  tensorInfo.SetQuantizationScale(1.f / 256);
787  }
789  {
790  tensorInfo.SetQuantizationOffset(-128);
791  tensorInfo.SetQuantizationScale(1.f / 256);
792  }
793 
794  auto predResult = CompareIClTensorHandleShape(inputHandle, {4, 1});
795  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
796  predResult = CompareIClTensorHandleShape(outputHandle, {4, 1});
797  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
798 }
799 
800 
801 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSoftmaxFloat32WorkloadTest")
802 {
803  ClSoftmaxWorkloadTest<ClSoftmaxWorkload, armnn::DataType::Float32>();
804 }
805 
806 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSoftmaxFloat16WorkloadTest")
807 {
808  ClSoftmaxWorkloadTest<ClSoftmaxWorkload, armnn::DataType::Float16>();
809 }
810 
811 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSoftmaxQAsymmU8Workload")
812 {
813  ClSoftmaxWorkloadTest<ClSoftmaxWorkload, armnn::DataType::QAsymmU8>();
814 }
815 
816 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSoftmaxQAsymmS8Workload")
817 {
818  ClSoftmaxWorkloadTest<ClSoftmaxWorkload, armnn::DataType::QAsymmS8>();
819 }
820 
821 template <typename armnn::DataType DataType>
822 static void ClSplitterWorkloadTest()
823 {
824  Graph graph;
825  ClWorkloadFactory factory =
826  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
827 
828  auto workload = CreateSplitterWorkloadTest<ClSplitterWorkload, DataType>(factory, graph);
829 
830  // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest).
831  SplitterQueueDescriptor queueDescriptor = workload->GetData();
832  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
833  auto predResult = CompareIClTensorHandleShape(inputHandle, {5, 7, 7});
834  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
835 
836  auto outputHandle1 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]);
837  predResult = CompareIClTensorHandleShape(outputHandle1, {2, 7, 7});
838  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
839 
840  auto outputHandle2 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[2]);
841  predResult = CompareIClTensorHandleShape(outputHandle2, {2, 7, 7});
842  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
843 
844  auto outputHandle0 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
845  predResult = CompareIClTensorHandleShape(outputHandle0, {1, 7, 7});
846  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
847 }
848 
849 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSplitterFloatWorkload")
850 {
851  ClSplitterWorkloadTest<armnn::DataType::Float32>();
852 }
853 
854 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSplitterFloat16Workload")
855 {
856  ClSplitterWorkloadTest<armnn::DataType::Float16>();
857 }
858 
859 template <typename armnn::DataType DataType>
860 static void ClSplitterConcatTest()
861 {
862  // Tests that it is possible to decide which output of the splitter layer
863  // should be lined to which input of the concat layer.
864  // We test that is is possible to specify 0th output
865  // of the splitter to be the 1st input to the concat and the 1st output of the splitter to be 0th input
866  // of the concat.
867 
868  Graph graph;
869  ClWorkloadFactory factory =
870  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
871 
872  auto workloads =
873  CreateSplitterConcatWorkloadTest<ClSplitterWorkload, ClConcatWorkload, DataType>
874  (factory, graph);
875 
876  auto wlSplitter = std::move(workloads.first);
877  auto wlConcat = std::move(workloads.second);
878 
879  //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction.
880  armnn::ClSubTensorHandle* sOut0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
881  armnn::ClSubTensorHandle* sOut1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
882  armnn::ClSubTensorHandle* mIn0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlConcat->GetData().m_Inputs[0]);
883  armnn::ClSubTensorHandle* mIn1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlConcat->GetData().m_Inputs[1]);
884 
885  CHECK(sOut0);
886  CHECK(sOut1);
887  CHECK(mIn0);
888  CHECK(mIn1);
889 
890  //Fliped order of inputs/outputs.
891  bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0);
892  CHECK(validDataPointers);
893 
894 
895  //Also make sure that the inputs are subtensors of one tensor and outputs are sub tensors of another tensor.
896  bool validSubTensorParents = (mIn0->GetTensor().parent() == mIn1->GetTensor().parent())
897  && (sOut0->GetTensor().parent() == sOut1->GetTensor().parent());
898 
899  CHECK(validSubTensorParents);
900 }
901 
902 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSplitterConcatFloatWorkload")
903 {
904  ClSplitterConcatTest<armnn::DataType::Float32>();
905 }
906 
907 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSplitterConcatFloat16Workload")
908 {
909  ClSplitterConcatTest<armnn::DataType::Float16>();
910 }
911 
912 
913 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSingleOutputMultipleInputs")
914 {
915  // Test that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer.
916  // We create a splitter with two outputs. That each of those outputs is used by two different activation layers.
917 
918  Graph graph;
919  ClWorkloadFactory factory =
920  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
921 
922  std::unique_ptr<ClSplitterWorkload> wlSplitter;
923  std::unique_ptr<ClActivationWorkload> wlActiv0_0;
924  std::unique_ptr<ClActivationWorkload> wlActiv0_1;
925  std::unique_ptr<ClActivationWorkload> wlActiv1_0;
926  std::unique_ptr<ClActivationWorkload> wlActiv1_1;
927 
928  CreateSplitterMultipleInputsOneOutputWorkloadTest<ClSplitterWorkload,
929  ClActivationWorkload, armnn::DataType::Float32>(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1,
930  wlActiv1_0, wlActiv1_1);
931 
932  //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction.
933  armnn::ClSubTensorHandle* sOut0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
934  armnn::ClSubTensorHandle* sOut1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
935  armnn::ClSubTensorHandle* activ0_0Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv0_0->GetData().m_Inputs[0]);
936  armnn::ClSubTensorHandle* activ0_1Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv0_1->GetData().m_Inputs[0]);
937  armnn::ClSubTensorHandle* activ1_0Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv1_0->GetData().m_Inputs[0]);
938  armnn::ClSubTensorHandle* activ1_1Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv1_1->GetData().m_Inputs[0]);
939 
940 
941  CHECK(sOut0);
942  CHECK(sOut1);
943  CHECK(activ0_0Im);
944  CHECK(activ0_1Im);
945  CHECK(activ1_0Im);
946  CHECK(activ1_1Im);
947 
948  bool validDataPointers = (sOut0 == activ0_0Im) && (sOut0 == activ0_1Im) &&
949  (sOut1 == activ1_0Im) && (sOut1 == activ1_1Im);
950 
951  CHECK(validDataPointers);
952 }
953 
954 #if defined(ARMNNREF_ENABLED)
955 
956 // This test unit needs the reference backend, it's not available if the reference backend is not built
957 
958 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMemCopyWorkloadsCl")
959 {
960  ClWorkloadFactory factory =
961  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
962 
963  CreateMemCopyWorkloads<IClTensorHandle>(factory);
964 }
965 
966 #endif
967 
968 template <typename L2NormalizationWorkloadType, typename armnn::DataType DataType>
969 static void ClL2NormalizationWorkloadTest(DataLayout dataLayout)
970 {
971  Graph graph;
972  ClWorkloadFactory factory =
973  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
974 
975  auto workload =
976  CreateL2NormalizationWorkloadTest<L2NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
977 
978  // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
979  L2NormalizationQueueDescriptor queueDescriptor = workload->GetData();
980  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
981  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
982 
983  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 5, 20, 50, 67 })
984  : std::initializer_list<unsigned int>({ 5, 50, 67, 20 });
985  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 5, 20, 50, 67 })
986  : std::initializer_list<unsigned int>({ 5, 50, 67, 20 });
987 
988  CHECK((inputHandle->GetShape() == inputShape));
989  CHECK((outputHandle->GetShape() == outputShape));
990 }
991 
992 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateL2NormalizationFloatNchwWorkload")
993 {
994  ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
995 }
996 
997 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateL2NormalizationFloatNhwcWorkload")
998 {
999  ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
1000 }
1001 
1002 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateL2NormalizationFloat16NchwWorkload")
1003 {
1004  ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
1005 }
1006 
1007 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateL2NormalizationFloat16NhwcWorkload")
1008 {
1009  ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
1010 }
1011 
1012 template <typename LogSoftmaxWorkloadType, typename armnn::DataType DataType>
1013 static void ClCreateLogSoftmaxWorkloadTest()
1014 {
1015  Graph graph;
1016  ClWorkloadFactory factory =
1017  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1018 
1019  auto workload = CreateLogSoftmaxWorkloadTest<LogSoftmaxWorkloadType, DataType>(factory, graph);
1020 
1021  // Checks that outputs and inputs are as we expect them (see definition of CreateLogSoftmaxWorkloadTest).
1022  LogSoftmaxQueueDescriptor queueDescriptor = workload->GetData();
1023  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1024  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1025 
1026  auto predResult = CompareIClTensorHandleShape(inputHandle, {4, 1});
1027  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1028  predResult = CompareIClTensorHandleShape(outputHandle, {4, 1});
1029  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1030 }
1031 
1032 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateLogSoftmaxFloat32WorkloadTest")
1033 {
1034  ClCreateLogSoftmaxWorkloadTest<ClLogSoftmaxWorkload, armnn::DataType::Float32>();
1035 }
1036 
1037 template <typename LstmWorkloadType>
1038 static void ClCreateLstmWorkloadTest()
1039 {
1040  Graph graph;
1041  ClWorkloadFactory factory =
1042  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1043 
1044  auto workload = CreateLstmWorkloadTest<LstmWorkloadType>(factory, graph);
1045 
1046  LstmQueueDescriptor queueDescriptor = workload->GetData();
1047  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1048  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]);
1049  auto predResult = CompareIClTensorHandleShape(inputHandle, {2, 2});
1050  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1051  predResult = CompareIClTensorHandleShape(outputHandle, {2, 4});
1052  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1053 }
1054 
1055 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateLSTMWorkloadFloatWorkload")
1056 {
1057  ClCreateLstmWorkloadTest<ClLstmFloatWorkload>();
1058 }
1059 
1060 template <typename ResizeWorkloadType, typename armnn::DataType DataType>
1061 static void ClResizeWorkloadTest(DataLayout dataLayout)
1062 {
1063  Graph graph;
1064  ClWorkloadFactory factory =
1065  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1066 
1067  auto workload = CreateResizeBilinearWorkloadTest<ResizeWorkloadType, DataType>(factory, graph, dataLayout);
1068 
1069  auto queueDescriptor = workload->GetData();
1070 
1071  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1072  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1073 
1074  armnn::PredicateResult predResult(true);
1075  switch (dataLayout)
1076  {
1077  case DataLayout::NHWC:
1078  predResult = CompareIClTensorHandleShape(inputHandle, { 2, 4, 4, 3 });
1079  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1080  predResult = CompareIClTensorHandleShape(outputHandle, { 2, 2, 2, 3 });
1081  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1082  break;
1083  default: // DataLayout::NCHW
1084  predResult = CompareIClTensorHandleShape(inputHandle, { 2, 3, 4, 4 });
1085  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1086  predResult = CompareIClTensorHandleShape(outputHandle, { 2, 3, 2, 2 });
1087  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1088  }
1089 }
1090 
1091 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateResizeFloat32NchwWorkload")
1092 {
1093  ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
1094 }
1095 
1096 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateResizeFloat16NchwWorkload")
1097 {
1098  ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
1099 }
1100 
1101 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateResizeUint8NchwWorkload")
1102 {
1103  ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::QAsymmU8>(DataLayout::NCHW);
1104 }
1105 
1106 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateResizeFloat32NhwcWorkload")
1107 {
1108  ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
1109 }
1110 
1111 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateResizeFloat16NhwcWorkload")
1112 {
1113  ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
1114 }
1115 
1116 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateResizeUint8NhwcWorkload")
1117 {
1118  ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::QAsymmU8>(DataLayout::NHWC);
1119 }
1120 
1121 template <typename MeanWorkloadType, typename armnn::DataType DataType>
1122 static void ClMeanWorkloadTest()
1123 {
1124  Graph graph;
1125  ClWorkloadFactory factory =
1126  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1127 
1128  auto workload = CreateMeanWorkloadTest<MeanWorkloadType, DataType>(factory, graph);
1129 
1130  // Checks that inputs/outputs are as we expect them (see definition of CreateMeanWorkloadTest).
1131  MeanQueueDescriptor queueDescriptor = workload->GetData();
1132  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1133  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1134 
1135  // The first dimension (batch size) in both input and output is singular thus it has been reduced by ACL.
1136  auto predResult = CompareIClTensorHandleShape(inputHandle, { 1, 3, 7, 4 });
1137  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1138  predResult = CompareIClTensorHandleShape(outputHandle, { 1, 4 });
1139  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1140 }
1141 
1142 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMeanFloat32Workload")
1143 {
1144  ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::Float32>();
1145 }
1146 
1147 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMeanFloat16Workload")
1148 {
1149  ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::Float16>();
1150 }
1151 
1152 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMeanUint8Workload")
1153 {
1154  ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::QAsymmU8>();
1155 }
1156 
1157 template <typename ConcatWorkloadType, armnn::DataType DataType>
1158 static void ClCreateConcatWorkloadTest(std::initializer_list<unsigned int> outputShape,
1159  unsigned int concatAxis)
1160 {
1161  Graph graph;
1162  ClWorkloadFactory factory =
1163  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1164 
1165  auto workload = CreateConcatWorkloadTest<ConcatWorkloadType, DataType>(factory, graph, outputShape, concatAxis);
1166 
1167  ConcatQueueDescriptor queueDescriptor = workload->GetData();
1168  auto inputHandle0 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1169  auto inputHandle1 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
1170  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1171 
1172  auto predResult = CompareIClTensorHandleShape(inputHandle0, { 2, 3, 2, 5 });
1173  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1174  predResult = CompareIClTensorHandleShape(inputHandle1, { 2, 3, 2, 5 });
1175  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1176  predResult = CompareIClTensorHandleShape(outputHandle, outputShape);
1177  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1178 }
1179 
1180 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConcatDim0Float32Workload")
1181 {
1182  ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::Float32>({ 4, 3, 2, 5 }, 0);
1183 }
1184 
1185 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConcatDim1Float32Workload")
1186 {
1187  ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::Float32>({ 2, 6, 2, 5 }, 1);
1188 }
1189 
1190 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConcatDim3Float32Workload")
1191 {
1192  ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::Float32>({ 2, 3, 2, 10 }, 3);
1193 }
1194 
1195 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConcatDim0Uint8Workload")
1196 {
1197  ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::QAsymmU8>({ 4, 3, 2, 5 }, 0);
1198 }
1199 
1200 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConcatDim1Uint8Workload")
1201 {
1202  ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::QAsymmU8>({ 2, 6, 2, 5 }, 1);
1203 }
1204 
1205 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConcatDim3Uint8Workload")
1206 {
1207  ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::QAsymmU8>({ 2, 3, 2, 10 }, 3);
1208 }
1209 
1210 template <typename SpaceToDepthWorkloadType, typename armnn::DataType DataType>
1211 static void ClSpaceToDepthWorkloadTest()
1212 {
1213  Graph graph;
1214  ClWorkloadFactory factory =
1215  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1216 
1217  auto workload = CreateSpaceToDepthWorkloadTest<SpaceToDepthWorkloadType, DataType>(factory, graph);
1218 
1219  SpaceToDepthQueueDescriptor queueDescriptor = workload->GetData();
1220  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1221  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1222 
1223  auto predResult = CompareIClTensorHandleShape(inputHandle, { 1, 2, 2, 1 });
1224  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1225  predResult = CompareIClTensorHandleShape(outputHandle, { 1, 1, 1, 4 });
1226  CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1227 }
1228 
1229 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSpaceToDepthFloat32Workload")
1230 {
1231  ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::Float32>();
1232 }
1233 
1234 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSpaceToDepthFloat16Workload")
1235 {
1236  ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::Float16>();
1237 }
1238 
1239 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSpaceToDepthQAsymm8Workload")
1240 {
1241  ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::QAsymmU8>();
1242 }
1243 
1244 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSpaceToDepthQSymm16Workload")
1245 {
1246  ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::QSymmS16>();
1247 }
1248 
1249 template <armnn::DataType DataType>
1250 static void ClCreateStackWorkloadTest(const std::initializer_list<unsigned int>& inputShape,
1251  const std::initializer_list<unsigned int>& outputShape,
1252  unsigned int axis,
1253  unsigned int numInputs)
1254 {
1255  armnn::Graph graph;
1256  ClWorkloadFactory factory =
1257  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1258 
1259  auto workload = CreateStackWorkloadTest<ClStackWorkload, DataType>(factory,
1260  graph,
1261  TensorShape(inputShape),
1262  TensorShape(outputShape),
1263  axis,
1264  numInputs);
1265 
1266  // Check inputs and output are as expected
1267  StackQueueDescriptor queueDescriptor = workload->GetData();
1268  for (unsigned int i = 0; i < numInputs; ++i)
1269  {
1270  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[i]);
1271  auto predResult1 = CompareIClTensorHandleShape(inputHandle, inputShape);
1272  CHECK_MESSAGE(predResult1.m_Result, predResult1.m_Message.str());
1273  }
1274  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1275  auto predResult2 = CompareIClTensorHandleShape(outputHandle, outputShape);
1276  CHECK_MESSAGE(predResult2.m_Result, predResult2.m_Message.str());
1277 }
1278 
1279 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateStackFloat32Workload")
1280 {
1281  ClCreateStackWorkloadTest<armnn::DataType::Float32>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
1282 }
1283 
1284 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateStackFloat16Workload")
1285 {
1286  ClCreateStackWorkloadTest<armnn::DataType::Float16>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
1287 }
1288 
1289 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateStackUint8Workload")
1290 {
1291  ClCreateStackWorkloadTest<armnn::DataType::QAsymmU8>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
1292 }
1293 
1294 
1295 template <typename QLstmWorkloadType>
1296 static void ClCreateQLstmWorkloadTest()
1297 {
1298  Graph graph;
1299  ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1300 
1301  auto workload = CreateQLstmWorkloadTest<QLstmWorkloadType>(factory, graph);
1302  QLstmQueueDescriptor queueDescriptor = workload->GetData();
1303 
1304  IAclTensorHandle* inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
1305  CHECK((inputHandle->GetShape() == TensorShape({2, 4})));
1306  CHECK((inputHandle->GetDataType() == arm_compute::DataType::QASYMM8_SIGNED));
1307 
1308  IAclTensorHandle* cellStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
1309  CHECK((cellStateOutHandle->GetShape() == TensorShape({2, 4})));
1310  CHECK((cellStateOutHandle->GetDataType() == arm_compute::DataType::QSYMM16));
1311 
1312  IAclTensorHandle* outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[2]);
1313  CHECK((outputHandle->GetShape() == TensorShape({2, 4})));
1314  CHECK((outputHandle->GetDataType() == arm_compute::DataType::QASYMM8_SIGNED));
1315 }
1316 
1317 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateQLstmWorkloadTest")
1318 {
1319  ClCreateQLstmWorkloadTest<ClQLstmWorkload>();
1320 }
1321 
1322 template <typename QuantizedLstmWorkloadType>
1323 static void ClCreateQuantizedLstmWorkloadTest()
1324 {
1325  using namespace armnn::armcomputetensorutils;
1326 
1327  Graph graph;
1328  ClWorkloadFactory factory =
1329  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1330 
1331  auto workload = CreateQuantizedLstmWorkloadTest<QuantizedLstmWorkloadType>(factory, graph);
1332 
1333  QuantizedLstmQueueDescriptor queueDescriptor = workload->GetData();
1334 
1335  IAclTensorHandle* inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
1336  CHECK((inputHandle->GetShape() == TensorShape({2, 2})));
1337  CHECK((inputHandle->GetDataType() == arm_compute::DataType::QASYMM8));
1338 
1339  IAclTensorHandle* cellStateInHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
1340  CHECK((cellStateInHandle->GetShape() == TensorShape({2, 4})));
1341  CHECK((cellStateInHandle->GetDataType() == arm_compute::DataType::QSYMM16));
1342 
1343  IAclTensorHandle* outputStateInHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[2]);
1344  CHECK((outputStateInHandle->GetShape() == TensorShape({2, 4})));
1345  CHECK((outputStateInHandle->GetDataType() == arm_compute::DataType::QASYMM8));
1346 
1347  IAclTensorHandle* cellStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
1348  CHECK((cellStateOutHandle->GetShape() == TensorShape({2, 4})));
1349  CHECK((cellStateOutHandle->GetDataType() == arm_compute::DataType::QSYMM16));
1350 
1351  IAclTensorHandle* outputStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
1352  CHECK((outputStateOutHandle->GetShape() == TensorShape({2, 4})));
1353  CHECK((outputStateOutHandle->GetDataType() == arm_compute::DataType::QASYMM8));
1354 }
1355 
1356 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateQuantizedLstmWorkload")
1357 {
1358  ClCreateQuantizedLstmWorkloadTest<ClQuantizedLstmWorkload>();
1359 }
1360 
1361 template <armnn::DataType DataType>
1362 static void ClCreateActivationWorkloadReplaceFunctionsTest()
1363 {
1364  std::shared_ptr<ClMemoryManager> memoryManager = std::make_shared<ClMemoryManager>(
1365  std::make_unique<arm_compute::CLBufferAllocator>());
1366 
1367  Graph graph;
1368  ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(memoryManager);
1369  // input and output are created as armnn::TensorInfo tensorInfo({1, 1}, DataType)
1370  auto workloadPtr = CreateActivationWorkloadTest<ClActivationWorkload, DataType>(factory, graph);
1371 
1372  // new input and output tensor handlers are created and then replace in the workload
1373  const ClTensorHandleFactory tensorHandleFactory(memoryManager);
1374  TensorInfo inputInfo({2 , 2}, DataType::Float16);
1375  TensorInfo outputInfo({2 , 2}, DataType::Float16);
1376  unique_ptr<ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputInfo, true);
1377  inputHandle->Manage();
1378  inputHandle->Allocate();
1379  unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo, true);
1380  outputHandle->Manage();
1381  outputHandle->Allocate();
1382 
1383  unsigned int slot = 0;
1384  CHECK_THROWS_AS(workloadPtr->ReplaceInputTensorHandle(inputHandle.get(), slot), UnimplementedException);
1385  CHECK_THROWS_AS(workloadPtr->ReplaceOutputTensorHandle(outputHandle.get(), slot), UnimplementedException);
1386 }
1387 
1388 TEST_CASE("ClReplaceFunctionsfromFloat32toFloat16ActivationWorkload")
1389 {
1390  ClCreateActivationWorkloadReplaceFunctionsTest<armnn::DataType::Float32>();
1391 }
1392 
1393 }
uint32_t m_PadBottom
Padding bottom value in the height dimension.
bool m_BiasEnabled
Enable/disable bias.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
DataLayout
Definition: Types.hpp:49
armnn::PredicateResult CompareIClTensorHandleShape(IClTensorHandle *tensorHandle, std::initializer_list< unsigned int > expectedDimensions)
TEST_SUITE("CreateWorkloadCl")
std::vector< BackendOptions > ModelOptions
A Convolution2dDescriptor for the Convolution2dLayer.
uint32_t m_PadRight
Padding right value in the width dimension.
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
void AllocateAndCopyDataToITensorHandle(armnn::ITensorHandle *tensorHandle, const void *memory)
virtual arm_compute::DataType GetDataType() const =0
uint32_t m_PadTop
Padding top value in the height dimension.
TEST_CASE_FIXTURE(ClContextControlFixture, "CopyBetweenNeonAndGpu")
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
DataType
Definition: Types.hpp:35
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
This factory creates ClImportTensorHandles that refer to imported memory tensors. ...
virtual TensorShape GetShape() const =0
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
This layer represents an addition operation.
Struct for the users to pass backend specific options.
This layer represents a subtraction operation.
std::vector< ITensorHandle * > m_Outputs
This layer represents a division operation.
UnaryOperation
Definition: Types.hpp:111
Contains information about TensorInfos of a layer.
void SetQuantizationOffset(int32_t offset)
Definition: Tensor.cpp:491
std::vector< ITensorHandle * > m_Inputs
std::unique_ptr< ITensorHandle > CreateTensorHandle(const TensorInfo &tensorInfo) const override
This layer represents a multiplication operation.
arm_compute::CLSubTensor & GetTensor() override
Depthwise Convolution 2D layer workload data.
uint32_t m_PadLeft
Padding left value in the width dimension.
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below...
Definition: Types.hpp:458