ArmNN
 20.11
ClCreateWorkloadTests.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
8 
13 
16 
17 #include <cl/ClTensorHandle.hpp>
18 #include <cl/ClWorkloadFactory.hpp>
21 
22 boost::test_tools::predicate_result CompareIClTensorHandleShape(IClTensorHandle* tensorHandle,
23  std::initializer_list<unsigned int> expectedDimensions)
24 {
25  return CompareTensorHandleShape<IClTensorHandle>(tensorHandle, expectedDimensions);
26 }
27 
28 BOOST_FIXTURE_TEST_SUITE(CreateWorkloadCl, ClContextControlFixture)
29 
30 template <armnn::DataType DataType>
31 static void ClCreateActivationWorkloadTest()
32 {
33  Graph graph;
34  ClWorkloadFactory factory =
35  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
36 
37  auto workload = CreateActivationWorkloadTest<ClActivationWorkload, DataType>(factory, graph);
38 
39  // Checks that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest).
40  ActivationQueueDescriptor queueDescriptor = workload->GetData();
41  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
42  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
43 
44  BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {1, 1}));
45  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1, 1}));
46 }
47 
48 BOOST_AUTO_TEST_CASE(CreateActivationFloatWorkload)
49 {
50  ClCreateActivationWorkloadTest<armnn::DataType::Float32>();
51 }
52 
53 BOOST_AUTO_TEST_CASE(CreateActivationFloat16Workload)
54 {
55  ClCreateActivationWorkloadTest<armnn::DataType::Float16>();
56 }
57 
58 template <typename WorkloadType,
59  typename DescriptorType,
60  typename LayerType,
62 static void ClCreateElementwiseWorkloadTest()
63 {
64  Graph graph;
65  ClWorkloadFactory factory =
66  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
67 
68  auto workload = CreateElementwiseWorkloadTest<WorkloadType, DescriptorType, LayerType, DataType>(factory, graph);
69 
70  // Checks that inputs/outputs are as we expect them (see definition of CreateElementwiseWorkloadTest).
71  DescriptorType queueDescriptor = workload->GetData();
72  auto inputHandle1 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
73  auto inputHandle2 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
74  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
75  BOOST_TEST(CompareIClTensorHandleShape(inputHandle1, {2, 3}));
76  BOOST_TEST(CompareIClTensorHandleShape(inputHandle2, {2, 3}));
77  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 3}));
78 }
79 
80 BOOST_AUTO_TEST_CASE(CreateAdditionFloatWorkload)
81 {
82  ClCreateElementwiseWorkloadTest<ClAdditionWorkload,
86 }
87 
88 BOOST_AUTO_TEST_CASE(CreateAdditionFloat16Workload)
89 {
90  ClCreateElementwiseWorkloadTest<ClAdditionWorkload,
94 }
95 
96 BOOST_AUTO_TEST_CASE(CreateSubtractionFloatWorkload)
97 {
98  ClCreateElementwiseWorkloadTest<ClSubtractionWorkload,
102 }
103 
104 BOOST_AUTO_TEST_CASE(CreateSubtractionFloat16Workload)
105 {
106  ClCreateElementwiseWorkloadTest<ClSubtractionWorkload,
110 }
111 
112 BOOST_AUTO_TEST_CASE(CreateMultiplicationFloatWorkloadTest)
113 {
114  ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload,
118 }
119 
120 BOOST_AUTO_TEST_CASE(CreateMultiplicationFloat16WorkloadTest)
121 {
122  ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload,
126 }
127 
128 BOOST_AUTO_TEST_CASE(CreateMultiplicationUint8WorkloadTest)
129 {
130  ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload,
134 }
135 
136 BOOST_AUTO_TEST_CASE(CreateDivisionFloatWorkloadTest)
137 {
138  ClCreateElementwiseWorkloadTest<ClDivisionFloatWorkload,
142 }
143 
144 BOOST_AUTO_TEST_CASE(CreateDivisionFloat16WorkloadTest)
145 {
146  ClCreateElementwiseWorkloadTest<ClDivisionFloatWorkload,
150 }
151 
152 template <typename WorkloadType,
153  typename DescriptorType,
155 static void ClCreateElementwiseUnaryWorkloadTest(armnn::UnaryOperation op)
156 {
157  Graph graph;
158  ClWorkloadFactory factory =
159  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
160 
161  auto workload = CreateElementwiseUnaryWorkloadTest<WorkloadType, DescriptorType, DataType>(factory, graph, op);
162 
163  DescriptorType queueDescriptor = workload->GetData();
164 
165  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
166  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
167 
168  BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {2, 3}));
169  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 3}));
170 }
171 
172 BOOST_AUTO_TEST_CASE(CreateRsqrtFloat32WorkloadTest)
173 {
174  ClCreateElementwiseUnaryWorkloadTest<ClRsqrtWorkload, RsqrtQueueDescriptor, armnn::DataType::Float32>(
175  UnaryOperation::Rsqrt);
176 }
177 
178 template <typename BatchNormalizationWorkloadType, armnn::DataType DataType>
179 static void ClCreateBatchNormalizationWorkloadTest(DataLayout dataLayout)
180 {
181  Graph graph;
182  ClWorkloadFactory factory =
183  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
184 
185  auto workload = CreateBatchNormalizationWorkloadTest<BatchNormalizationWorkloadType, DataType>
186  (factory, graph, dataLayout);
187 
188  // Checks that inputs/outputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest).
189  BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData();
190  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
191  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
192 
193  switch (dataLayout)
194  {
195  case DataLayout::NHWC:
196  BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 4, 4, 3 }));
197  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 4, 4, 3 }));
198  break;
199  default: // NCHW
200  BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 3, 4, 4 }));
201  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 3, 4, 4 }));
202  }
203 }
204 
205 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloatNchwWorkload)
206 {
207  ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
208  armnn::DataType::Float32>(DataLayout::NCHW);
209 }
210 
211 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloat16NchwWorkload)
212 {
213  ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
214  armnn::DataType::Float16>(DataLayout::NCHW);
215 }
216 
217 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloatNhwcWorkload)
218 {
219  ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
220  armnn::DataType::Float32>(DataLayout::NHWC);
221 }
222 
223 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationNhwcFloat16NhwcWorkload)
224 {
225  ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
226  armnn::DataType::Float16>(DataLayout::NHWC);
227 }
228 
229 BOOST_AUTO_TEST_CASE(CreateConvertFp16ToFp32Workload)
230 {
231  Graph graph;
232  ClWorkloadFactory factory =
233  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
234 
235  auto workload = CreateConvertFp16ToFp32WorkloadTest<ClConvertFp16ToFp32Workload>(factory, graph);
236 
237  ConvertFp16ToFp32QueueDescriptor queueDescriptor = workload->GetData();
238  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
239  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
240 
241  BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {1, 3, 2, 3}));
242  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1, 3, 2, 3}));
243  BOOST_TEST((inputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F16));
244  BOOST_TEST((outputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F32));
245 }
246 
247 BOOST_AUTO_TEST_CASE(CreateConvertFp32ToFp16Workload)
248 {
249  Graph graph;
250  ClWorkloadFactory factory =
251  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
252 
253  auto workload = CreateConvertFp32ToFp16WorkloadTest<ClConvertFp32ToFp16Workload>(factory, graph);
254 
255  ConvertFp32ToFp16QueueDescriptor queueDescriptor = workload->GetData();
256  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
257  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
258 
259  BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {1, 3, 2, 3}));
260  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1, 3, 2, 3}));
261  BOOST_TEST((inputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F32));
262  BOOST_TEST((outputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F16));
263 }
264 
265 template <typename Convolution2dWorkloadType, typename armnn::DataType DataType>
266 static void ClConvolution2dWorkloadTest(DataLayout dataLayout)
267 {
268  Graph graph;
269  ClWorkloadFactory factory =
270  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
271 
272  auto workload = CreateConvolution2dWorkloadTest<ClConvolution2dWorkload, DataType>(factory,
273  graph,
274  dataLayout);
275 
276  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({2, 3, 8, 16})
277  : std::initializer_list<unsigned int>({2, 8, 16, 3});
278  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({2, 2, 2, 10})
279  : std::initializer_list<unsigned int>({2, 2, 10, 2});
280 
281  // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest).
282  Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
283  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
284  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
285  BOOST_TEST((inputHandle->GetShape() == inputShape));
286  BOOST_TEST((outputHandle->GetShape() == outputShape));
287 }
288 
289 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNchwWorkload)
290 {
291  ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
292 }
293 
294 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNhwcWorkload)
295 {
296  ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
297 }
298 
299 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NchwWorkload)
300 {
301  ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
302 }
303 
304 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NhwcWorkload)
305 {
306  ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
307 }
308 
309 BOOST_AUTO_TEST_CASE(CreateConvolution2dFastMathEnabledWorkload)
310 {
311  Graph graph;
312 
313  using ModelOptions = std::vector<BackendOptions>;
314  ModelOptions modelOptions = {};
315  BackendOptions gpuAcc("GpuAcc",
316  {
317  { "FastMathEnabled", true }
318  });
319  modelOptions.push_back(gpuAcc);
320 
321  ClWorkloadFactory factory =
322  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager(), modelOptions);
323 
324  auto workload =
325  CreateConvolution2dWorkloadFastMathTest<ClConvolution2dWorkload, armnn::DataType::Float32>(factory,
326  graph,
327  DataLayout::NCHW,
328  modelOptions);
329 
330  ARMNN_ASSERT(workload != nullptr);
331  auto conv2dWorkload = PolymorphicDowncast<ClConvolution2dWorkload*>(workload.get());
332  IgnoreUnused(conv2dWorkload);
333  ARMNN_ASSERT(conv2dWorkload != nullptr);
334  ARMNN_ASSERT(conv2dWorkload->GetConvolutionMethod() == arm_compute::ConvolutionMethod::WINOGRAD);
335 }
336 
337 template <typename DepthwiseConvolutionWorkloadType, typename armnn::DataType DataType>
338 static void ClDepthwiseConvolutionWorkloadTest(DataLayout dataLayout)
339 {
340  Graph graph;
341  ClWorkloadFactory factory =
342  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
343 
344  auto workload = CreateDepthwiseConvolution2dWorkloadTest<DepthwiseConvolutionWorkloadType, DataType>
345  (factory, graph, dataLayout);
346 
347  // Checks that inputs/outputs are as we expect them (see definition of CreateDepthwiseConvolution2dWorkloadTest).
348  DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData();
349  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
350  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
351 
352  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
353  : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
354  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
355  : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
356 
357  BOOST_TEST((inputHandle->GetShape() == inputShape));
358  BOOST_TEST((outputHandle->GetShape() == outputShape));
359 }
360 
361 BOOST_AUTO_TEST_CASE(CreateDepthwiseConvolutionFloat32NhwcWorkload)
362 {
363  ClDepthwiseConvolutionWorkloadTest<ClDepthwiseConvolutionWorkload, DataType::Float32>(DataLayout::NHWC);
364 }
365 
366 template <typename Convolution2dWorkloadType, typename armnn::DataType DataType>
367 static void ClDirectConvolution2dWorkloadTest()
368 {
369  Graph graph;
370  ClWorkloadFactory factory =
371  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
372 
373  auto workload = CreateDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, DataType>(factory, graph);
374 
375  // Checks that outputs and inputs are as we expect them (see definition of CreateDirectConvolution2dWorkloadTest).
376  Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
377  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
378  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
379  BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {2, 3, 6, 6}));
380  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 2, 6, 6}));
381 }
382 
383 BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dFloatWorkload)
384 {
385  ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>();
386 }
387 
388 BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dFloat16Workload)
389 {
390  ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>();
391 }
392 
393 BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dUint8Workload)
394 {
395  ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::QAsymmU8>();
396 }
397 
398 template <typename FullyConnectedWorkloadType, typename armnn::DataType DataType>
399 static void ClCreateFullyConnectedWorkloadTest()
400 {
401  Graph graph;
402  ClWorkloadFactory factory =
403  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
404 
405  auto workload =
406  CreateFullyConnectedWorkloadTest<FullyConnectedWorkloadType, DataType>(factory, graph);
407 
408  // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest).
409  FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
410  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
411  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
412  BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {3, 1, 4, 5}));
413  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {3, 7}));
414 }
415 
416 
417 BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloatWorkloadTest)
418 {
419  ClCreateFullyConnectedWorkloadTest<ClFullyConnectedWorkload, armnn::DataType::Float32>();
420 }
421 
422 BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloat16WorkloadTest)
423 {
424  ClCreateFullyConnectedWorkloadTest<ClFullyConnectedWorkload, armnn::DataType::Float16>();
425 }
426 
427 template <typename NormalizationWorkloadType, typename armnn::DataType DataType>
428 static void ClNormalizationWorkloadTest(DataLayout dataLayout)
429 {
430  Graph graph;
431  ClWorkloadFactory factory =
432  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
433 
434  auto workload = CreateNormalizationWorkloadTest<NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
435 
436  // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
437  NormalizationQueueDescriptor queueDescriptor = workload->GetData();
438  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
439  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
440 
441  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 5, 5, 1})
442  : std::initializer_list<unsigned int>({3, 1, 5, 5});
443  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 5, 5, 1})
444  : std::initializer_list<unsigned int>({3, 1, 5, 5});
445 
446  BOOST_TEST((inputHandle->GetShape() == inputShape));
447  BOOST_TEST((outputHandle->GetShape() == outputShape));
448 }
449 
450 BOOST_AUTO_TEST_CASE(CreateNormalizationFloat32NchwWorkload)
451 {
452  ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
453 }
454 
455 BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16NchwWorkload)
456 {
457  ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
458 }
459 
460 BOOST_AUTO_TEST_CASE(CreateNormalizationFloat32NhwcWorkload)
461 {
462  ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
463 }
464 
465 BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16NhwcWorkload)
466 {
467  ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
468 }
469 
470 template <typename armnn::DataType DataType>
471 static void ClPooling2dWorkloadTest(DataLayout dataLayout)
472 {
473  Graph graph;
474  ClWorkloadFactory factory =
475  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
476 
477  auto workload = CreatePooling2dWorkloadTest<ClPooling2dWorkload, DataType>(factory, graph, dataLayout);
478 
479  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 2, 5, 5})
480  : std::initializer_list<unsigned int>({3, 5, 5, 2});
481  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 2, 2, 4})
482  : std::initializer_list<unsigned int>({3, 2, 4, 2});
483 
484  // Check that inputs/outputs are as we expect them (see definition of CreatePooling2dWorkloadTest).
485  Pooling2dQueueDescriptor queueDescriptor = workload->GetData();
486  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
487  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
488 
489  BOOST_TEST((inputHandle->GetShape() == inputShape));
490  BOOST_TEST((outputHandle->GetShape() == outputShape));
491 }
492 
493 BOOST_AUTO_TEST_CASE(CreatePooling2dFloatNchwWorkload)
494 {
495  ClPooling2dWorkloadTest<armnn::DataType::Float32>(DataLayout::NCHW);
496 }
497 
498 BOOST_AUTO_TEST_CASE(CreatePooling2dFloatNhwcWorkload)
499 {
500  ClPooling2dWorkloadTest<armnn::DataType::Float32>(DataLayout::NHWC);
501 }
502 
503 BOOST_AUTO_TEST_CASE(CreatePooling2dFloat16NchwWorkload)
504 {
505  ClPooling2dWorkloadTest<armnn::DataType::Float16>(DataLayout::NCHW);
506 }
507 
508 BOOST_AUTO_TEST_CASE(CreatePooling2dFloat16NhwcWorkload)
509 {
510  ClPooling2dWorkloadTest<armnn::DataType::Float16>(DataLayout::NHWC);
511 }
512 
513 static void ClCreatePreluWorkloadTest(const armnn::TensorShape& inputShape,
514  const armnn::TensorShape& alphaShape,
515  const armnn::TensorShape& outputShape,
516  armnn::DataType dataType)
517 {
518  Graph graph;
519  ClWorkloadFactory factory =
520  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
521 
522  auto workload = CreatePreluWorkloadTest<ClPreluWorkload>(factory,
523  graph,
524  inputShape,
525  alphaShape,
526  outputShape,
527  dataType);
528 
529  // Checks that outputs and inputs are as we expect them (see definition of CreatePreluWorkloadTest).
530  PreluQueueDescriptor queueDescriptor = workload->GetData();
531  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
532  auto alphaHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
533  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
534 
535  BOOST_TEST((inputHandle->GetShape() == inputShape));
536  BOOST_TEST((alphaHandle->GetShape() == alphaShape));
537  BOOST_TEST((outputHandle->GetShape() == outputShape));
538 }
539 
540 BOOST_AUTO_TEST_CASE(CreatePreluFloat16Workload)
541 {
542  ClCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float16);
543 }
544 
545 BOOST_AUTO_TEST_CASE(CreatePreluFloatWorkload)
546 {
547  ClCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float32);
548 }
549 
550 BOOST_AUTO_TEST_CASE(CreatePreluUint8Workload)
551 {
552  ClCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::QAsymmU8);
553 }
554 
555 template <typename armnn::DataType DataType>
556 static void ClCreateReshapeWorkloadTest()
557 {
558  Graph graph;
559  ClWorkloadFactory factory =
560  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
561 
562  auto workload = CreateReshapeWorkloadTest<ClReshapeWorkload, DataType>(factory, graph);
563 
564  // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest).
565  ReshapeQueueDescriptor queueDescriptor = workload->GetData();
566  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
567  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
568 
569  BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {4, 1}));
570  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1, 4}));
571 }
572 
573 BOOST_AUTO_TEST_CASE(CreateReshapeFloatWorkload)
574 {
575  ClCreateReshapeWorkloadTest<armnn::DataType::Float32>();
576 }
577 
578 BOOST_AUTO_TEST_CASE(CreateReshapeFloat16Workload)
579 {
580  ClCreateReshapeWorkloadTest<armnn::DataType::Float16>();
581 }
582 
583 BOOST_AUTO_TEST_CASE(CreateReshapeUint8Workload)
584 {
585  ClCreateReshapeWorkloadTest<armnn::DataType::QAsymmU8>();
586 }
587 
588 template <typename SoftmaxWorkloadType, typename armnn::DataType DataType>
589 static void ClSoftmaxWorkloadTest()
590 {
591  Graph graph;
592  ClWorkloadFactory factory =
593  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
594 
595  auto workload = CreateSoftmaxWorkloadTest<SoftmaxWorkloadType, DataType>(factory, graph);
596 
597  // Checks that inputs/outputs are as we expect them (see definition of ClSoftmaxFloatWorkload).
598  SoftmaxQueueDescriptor queueDescriptor = workload->GetData();
599  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
600  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
601 
602  armnn::TensorInfo tensorInfo({4, 1}, DataType);
604  {
605  tensorInfo.SetQuantizationOffset(0);
606  tensorInfo.SetQuantizationScale(1.f / 256);
607  }
609  {
610  tensorInfo.SetQuantizationOffset(-128);
611  tensorInfo.SetQuantizationScale(1.f / 256);
612  }
613 
614  BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {4, 1}));
615  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {4, 1}));
616 }
617 
618 
619 BOOST_AUTO_TEST_CASE(CreateSoftmaxFloat32WorkloadTest)
620 {
621  ClSoftmaxWorkloadTest<ClSoftmaxWorkload, armnn::DataType::Float32>();
622 }
623 
624 BOOST_AUTO_TEST_CASE(CreateSoftmaxFloat16WorkloadTest)
625 {
626  ClSoftmaxWorkloadTest<ClSoftmaxWorkload, armnn::DataType::Float16>();
627 }
628 
629 BOOST_AUTO_TEST_CASE(CreateSoftmaxQAsymmU8Workload)
630 {
631  ClSoftmaxWorkloadTest<ClSoftmaxWorkload, armnn::DataType::QAsymmU8>();
632 }
633 
634 BOOST_AUTO_TEST_CASE(CreateSoftmaxQAsymmS8Workload)
635 {
636  ClSoftmaxWorkloadTest<ClSoftmaxWorkload, armnn::DataType::QAsymmS8>();
637 }
638 
639 template <typename armnn::DataType DataType>
640 static void ClSplitterWorkloadTest()
641 {
642  Graph graph;
643  ClWorkloadFactory factory =
644  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
645 
646  auto workload = CreateSplitterWorkloadTest<ClSplitterWorkload, DataType>(factory, graph);
647 
648  // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest).
649  SplitterQueueDescriptor queueDescriptor = workload->GetData();
650  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
651  BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {5, 7, 7}));
652 
653  auto outputHandle1 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]);
654  BOOST_TEST(CompareIClTensorHandleShape(outputHandle1, {2, 7, 7}));
655 
656  auto outputHandle2 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[2]);
657  BOOST_TEST(CompareIClTensorHandleShape(outputHandle2, {2, 7, 7}));
658 
659  auto outputHandle0 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
660  BOOST_TEST(CompareIClTensorHandleShape(outputHandle0, {1, 7, 7}));
661 }
662 
663 BOOST_AUTO_TEST_CASE(CreateSplitterFloatWorkload)
664 {
665  ClSplitterWorkloadTest<armnn::DataType::Float32>();
666 }
667 
668 BOOST_AUTO_TEST_CASE(CreateSplitterFloat16Workload)
669 {
670  ClSplitterWorkloadTest<armnn::DataType::Float16>();
671 }
672 
673 template <typename armnn::DataType DataType>
674 static void ClSplitterConcatTest()
675 {
676  // Tests that it is possible to decide which output of the splitter layer
677  // should be lined to which input of the concat layer.
678  // We test that is is possible to specify 0th output
679  // of the splitter to be the 1st input to the concat and the 1st output of the splitter to be 0th input
680  // of the concat.
681 
682  Graph graph;
683  ClWorkloadFactory factory =
684  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
685 
686  auto workloads =
687  CreateSplitterConcatWorkloadTest<ClSplitterWorkload, ClConcatWorkload, DataType>
688  (factory, graph);
689 
690  auto wlSplitter = std::move(workloads.first);
691  auto wlConcat = std::move(workloads.second);
692 
693  //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction.
694  armnn::ClSubTensorHandle* sOut0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
695  armnn::ClSubTensorHandle* sOut1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
696  armnn::ClSubTensorHandle* mIn0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlConcat->GetData().m_Inputs[0]);
697  armnn::ClSubTensorHandle* mIn1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlConcat->GetData().m_Inputs[1]);
698 
699  BOOST_TEST(sOut0);
700  BOOST_TEST(sOut1);
701  BOOST_TEST(mIn0);
702  BOOST_TEST(mIn1);
703 
704  //Fliped order of inputs/outputs.
705  bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0);
706  BOOST_TEST(validDataPointers);
707 
708 
709  //Also make sure that the inputs are subtensors of one tensor and outputs are sub tensors of another tensor.
710  bool validSubTensorParents = (mIn0->GetTensor().parent() == mIn1->GetTensor().parent())
711  && (sOut0->GetTensor().parent() == sOut1->GetTensor().parent());
712 
713  BOOST_TEST(validSubTensorParents);
714 }
715 
716 BOOST_AUTO_TEST_CASE(CreateSplitterConcatFloatWorkload)
717 {
718  ClSplitterConcatTest<armnn::DataType::Float32>();
719 }
720 
721 BOOST_AUTO_TEST_CASE(CreateSplitterConcatFloat16Workload)
722 {
723  ClSplitterConcatTest<armnn::DataType::Float16>();
724 }
725 
726 
727 BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputs)
728 {
729  // Test that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer.
730  // We create a splitter with two outputs. That each of those outputs is used by two different activation layers.
731 
732  Graph graph;
733  ClWorkloadFactory factory =
734  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
735 
736  std::unique_ptr<ClSplitterWorkload> wlSplitter;
737  std::unique_ptr<ClActivationWorkload> wlActiv0_0;
738  std::unique_ptr<ClActivationWorkload> wlActiv0_1;
739  std::unique_ptr<ClActivationWorkload> wlActiv1_0;
740  std::unique_ptr<ClActivationWorkload> wlActiv1_1;
741 
742  CreateSplitterMultipleInputsOneOutputWorkloadTest<ClSplitterWorkload,
743  ClActivationWorkload, armnn::DataType::Float32>(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1,
744  wlActiv1_0, wlActiv1_1);
745 
746  //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction.
747  armnn::ClSubTensorHandle* sOut0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
748  armnn::ClSubTensorHandle* sOut1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
749  armnn::ClSubTensorHandle* activ0_0Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv0_0->GetData().m_Inputs[0]);
750  armnn::ClSubTensorHandle* activ0_1Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv0_1->GetData().m_Inputs[0]);
751  armnn::ClSubTensorHandle* activ1_0Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv1_0->GetData().m_Inputs[0]);
752  armnn::ClSubTensorHandle* activ1_1Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv1_1->GetData().m_Inputs[0]);
753 
754 
755  BOOST_TEST(sOut0);
756  BOOST_TEST(sOut1);
757  BOOST_TEST(activ0_0Im);
758  BOOST_TEST(activ0_1Im);
759  BOOST_TEST(activ1_0Im);
760  BOOST_TEST(activ1_1Im);
761 
762  bool validDataPointers = (sOut0 == activ0_0Im) && (sOut0 == activ0_1Im) &&
763  (sOut1 == activ1_0Im) && (sOut1 == activ1_1Im);
764 
765  BOOST_TEST(validDataPointers);
766 }
767 
768 #if defined(ARMNNREF_ENABLED)
769 
770 // This test unit needs the reference backend, it's not available if the reference backend is not built
771 
772 BOOST_AUTO_TEST_CASE(CreateMemCopyWorkloadsCl)
773 {
774  ClWorkloadFactory factory =
775  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
776 
777  CreateMemCopyWorkloads<IClTensorHandle>(factory);
778 }
779 
780 #endif
781 
782 template <typename L2NormalizationWorkloadType, typename armnn::DataType DataType>
783 static void ClL2NormalizationWorkloadTest(DataLayout dataLayout)
784 {
785  Graph graph;
786  ClWorkloadFactory factory =
787  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
788 
789  auto workload =
790  CreateL2NormalizationWorkloadTest<L2NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
791 
792  // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
793  L2NormalizationQueueDescriptor queueDescriptor = workload->GetData();
794  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
795  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
796 
797  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 5, 20, 50, 67 })
798  : std::initializer_list<unsigned int>({ 5, 50, 67, 20 });
799  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 5, 20, 50, 67 })
800  : std::initializer_list<unsigned int>({ 5, 50, 67, 20 });
801 
802  BOOST_TEST((inputHandle->GetShape() == inputShape));
803  BOOST_TEST((outputHandle->GetShape() == outputShape));
804 }
805 
806 BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloatNchwWorkload)
807 {
808  ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
809 }
810 
811 BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloatNhwcWorkload)
812 {
813  ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
814 }
815 
816 BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat16NchwWorkload)
817 {
818  ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
819 }
820 
821 BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat16NhwcWorkload)
822 {
823  ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
824 }
825 
826 template <typename LogSoftmaxWorkloadType, typename armnn::DataType DataType>
827 static void ClCreateLogSoftmaxWorkloadTest()
828 {
829  Graph graph;
830  ClWorkloadFactory factory =
831  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
832 
833  auto workload = CreateLogSoftmaxWorkloadTest<LogSoftmaxWorkloadType, DataType>(factory, graph);
834 
835  // Checks that outputs and inputs are as we expect them (see definition of CreateLogSoftmaxWorkloadTest).
836  LogSoftmaxQueueDescriptor queueDescriptor = workload->GetData();
837  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
838  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
839 
840  BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {4, 1}));
841  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {4, 1}));
842 }
843 
844 BOOST_AUTO_TEST_CASE(CreateLogSoftmaxFloat32WorkloadTest)
845 {
846  ClCreateLogSoftmaxWorkloadTest<ClLogSoftmaxWorkload, armnn::DataType::Float32>();
847 }
848 
849 template <typename LstmWorkloadType>
850 static void ClCreateLstmWorkloadTest()
851 {
852  Graph graph;
853  ClWorkloadFactory factory =
854  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
855 
856  auto workload = CreateLstmWorkloadTest<LstmWorkloadType>(factory, graph);
857 
858  LstmQueueDescriptor queueDescriptor = workload->GetData();
859  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
860  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]);
861  BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 2 }));
862  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 4 }));
863 }
864 
865 BOOST_AUTO_TEST_CASE(CreateLSTMWorkloadFloatWorkload)
866 {
867  ClCreateLstmWorkloadTest<ClLstmFloatWorkload>();
868 }
869 
870 template <typename ResizeWorkloadType, typename armnn::DataType DataType>
871 static void ClResizeWorkloadTest(DataLayout dataLayout)
872 {
873  Graph graph;
874  ClWorkloadFactory factory =
875  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
876 
877  auto workload = CreateResizeBilinearWorkloadTest<ResizeWorkloadType, DataType>(factory, graph, dataLayout);
878 
879  auto queueDescriptor = workload->GetData();
880 
881  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
882  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
883 
884  switch (dataLayout)
885  {
886  case DataLayout::NHWC:
887  BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 4, 4, 3 }));
888  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 2, 2, 3 }));
889  break;
890  case DataLayout::NCHW:
891  default:
892  BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 3, 4, 4 }));
893  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 3, 2, 2 }));
894  }
895 }
896 
897 BOOST_AUTO_TEST_CASE(CreateResizeFloat32NchwWorkload)
898 {
899  ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
900 }
901 
902 BOOST_AUTO_TEST_CASE(CreateResizeFloat16NchwWorkload)
903 {
904  ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
905 }
906 
907 BOOST_AUTO_TEST_CASE(CreateResizeUint8NchwWorkload)
908 {
909  ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::QAsymmU8>(DataLayout::NCHW);
910 }
911 
912 BOOST_AUTO_TEST_CASE(CreateResizeFloat32NhwcWorkload)
913 {
914  ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
915 }
916 
917 BOOST_AUTO_TEST_CASE(CreateResizeFloat16NhwcWorkload)
918 {
919  ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
920 }
921 
922 BOOST_AUTO_TEST_CASE(CreateResizeUint8NhwcWorkload)
923 {
924  ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::QAsymmU8>(DataLayout::NHWC);
925 }
926 
927 template <typename MeanWorkloadType, typename armnn::DataType DataType>
928 static void ClMeanWorkloadTest()
929 {
930  Graph graph;
931  ClWorkloadFactory factory =
932  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
933 
934  auto workload = CreateMeanWorkloadTest<MeanWorkloadType, DataType>(factory, graph);
935 
936  // Checks that inputs/outputs are as we expect them (see definition of CreateMeanWorkloadTest).
937  MeanQueueDescriptor queueDescriptor = workload->GetData();
938  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
939  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
940 
941  // The first dimension (batch size) in both input and output is singular thus it has been reduced by ACL.
942  BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 1, 3, 7, 4 }));
943  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 1, 4 }));
944 }
945 
946 BOOST_AUTO_TEST_CASE(CreateMeanFloat32Workload)
947 {
948  ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::Float32>();
949 }
950 
951 BOOST_AUTO_TEST_CASE(CreateMeanFloat16Workload)
952 {
953  ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::Float16>();
954 }
955 
956 BOOST_AUTO_TEST_CASE(CreateMeanUint8Workload)
957 {
958  ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::QAsymmU8>();
959 }
960 
961 template <typename ConcatWorkloadType, armnn::DataType DataType>
962 static void ClCreateConcatWorkloadTest(std::initializer_list<unsigned int> outputShape,
963  unsigned int concatAxis)
964 {
965  Graph graph;
966  ClWorkloadFactory factory =
967  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
968 
969  auto workload = CreateConcatWorkloadTest<ConcatWorkloadType, DataType>(factory, graph, outputShape, concatAxis);
970 
971  ConcatQueueDescriptor queueDescriptor = workload->GetData();
972  auto inputHandle0 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
973  auto inputHandle1 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
974  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
975 
976  BOOST_TEST(CompareIClTensorHandleShape(inputHandle0, { 2, 3, 2, 5 }));
977  BOOST_TEST(CompareIClTensorHandleShape(inputHandle1, { 2, 3, 2, 5 }));
978  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, outputShape));
979 }
980 
981 BOOST_AUTO_TEST_CASE(CreateConcatDim0Float32Workload)
982 {
983  ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::Float32>({ 4, 3, 2, 5 }, 0);
984 }
985 
986 BOOST_AUTO_TEST_CASE(CreateConcatDim1Float32Workload)
987 {
988  ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::Float32>({ 2, 6, 2, 5 }, 1);
989 }
990 
991 BOOST_AUTO_TEST_CASE(CreateConcatDim3Float32Workload)
992 {
993  ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::Float32>({ 2, 3, 2, 10 }, 3);
994 }
995 
996 BOOST_AUTO_TEST_CASE(CreateConcatDim0Uint8Workload)
997 {
998  ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::QAsymmU8>({ 4, 3, 2, 5 }, 0);
999 }
1000 
1001 BOOST_AUTO_TEST_CASE(CreateConcatDim1Uint8Workload)
1002 {
1003  ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::QAsymmU8>({ 2, 6, 2, 5 }, 1);
1004 }
1005 
1006 BOOST_AUTO_TEST_CASE(CreateConcatDim3Uint8Workload)
1007 {
1008  ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::QAsymmU8>({ 2, 3, 2, 10 }, 3);
1009 }
1010 
1011 template <typename SpaceToDepthWorkloadType, typename armnn::DataType DataType>
1012 static void ClSpaceToDepthWorkloadTest()
1013 {
1014  Graph graph;
1015  ClWorkloadFactory factory =
1016  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1017 
1018  auto workload = CreateSpaceToDepthWorkloadTest<SpaceToDepthWorkloadType, DataType>(factory, graph);
1019 
1020  SpaceToDepthQueueDescriptor queueDescriptor = workload->GetData();
1021  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1022  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1023 
1024  BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 1, 2, 2, 1 }));
1025  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 1, 1, 1, 4 }));
1026 }
1027 
1028 BOOST_AUTO_TEST_CASE(CreateSpaceToDepthFloat32Workload)
1029 {
1030  ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::Float32>();
1031 }
1032 
1033 BOOST_AUTO_TEST_CASE(CreateSpaceToDepthFloat16Workload)
1034 {
1035  ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::Float16>();
1036 }
1037 
1038 BOOST_AUTO_TEST_CASE(CreateSpaceToDepthQAsymm8Workload)
1039 {
1040  ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::QAsymmU8>();
1041 }
1042 
1043 BOOST_AUTO_TEST_CASE(CreateSpaceToDepthQSymm16Workload)
1044 {
1045  ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::QSymmS16>();
1046 }
1047 
1048 template <armnn::DataType DataType>
1049 static void ClCreateStackWorkloadTest(const std::initializer_list<unsigned int>& inputShape,
1050  const std::initializer_list<unsigned int>& outputShape,
1051  unsigned int axis,
1052  unsigned int numInputs)
1053 {
1054  armnn::Graph graph;
1055  ClWorkloadFactory factory =
1056  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1057 
1058  auto workload = CreateStackWorkloadTest<ClStackWorkload, DataType>(factory,
1059  graph,
1060  TensorShape(inputShape),
1061  TensorShape(outputShape),
1062  axis,
1063  numInputs);
1064 
1065  // Check inputs and output are as expected
1066  StackQueueDescriptor queueDescriptor = workload->GetData();
1067  for (unsigned int i = 0; i < numInputs; ++i)
1068  {
1069  auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[i]);
1070  BOOST_TEST(CompareIClTensorHandleShape(inputHandle, inputShape));
1071  }
1072  auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1073  BOOST_TEST(CompareIClTensorHandleShape(outputHandle, outputShape));
1074 }
1075 
1076 BOOST_AUTO_TEST_CASE(CreateStackFloat32Workload)
1077 {
1078  ClCreateStackWorkloadTest<armnn::DataType::Float32>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
1079 }
1080 
1081 BOOST_AUTO_TEST_CASE(CreateStackFloat16Workload)
1082 {
1083  ClCreateStackWorkloadTest<armnn::DataType::Float16>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
1084 }
1085 
1086 BOOST_AUTO_TEST_CASE(CreateStackUint8Workload)
1087 {
1088  ClCreateStackWorkloadTest<armnn::DataType::QAsymmU8>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
1089 }
1090 
1091 
1092 template <typename QLstmWorkloadType>
1093 static void ClCreateQLstmWorkloadTest()
1094 {
1095  Graph graph;
1096  ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1097 
1098  auto workload = CreateQLstmWorkloadTest<QLstmWorkloadType>(factory, graph);
1099  QLstmQueueDescriptor queueDescriptor = workload->GetData();
1100 
1101  IAclTensorHandle* inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
1102  BOOST_TEST((inputHandle->GetShape() == TensorShape({2, 4})));
1103  BOOST_TEST((inputHandle->GetDataType() == arm_compute::DataType::QASYMM8_SIGNED));
1104 
1105  IAclTensorHandle* cellStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
1106  BOOST_TEST((cellStateOutHandle->GetShape() == TensorShape({2, 4})));
1107  BOOST_TEST((cellStateOutHandle->GetDataType() == arm_compute::DataType::QSYMM16));
1108 
1109  IAclTensorHandle* outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[2]);
1110  BOOST_TEST((outputHandle->GetShape() == TensorShape({2, 4})));
1111  BOOST_TEST((outputHandle->GetDataType() == arm_compute::DataType::QASYMM8_SIGNED));
1112 }
1113 
1114 BOOST_AUTO_TEST_CASE(CreateQLstmWorkloadTest)
1115 {
1116  ClCreateQLstmWorkloadTest<ClQLstmWorkload>();
1117 }
1118 
1119 template <typename QuantizedLstmWorkloadType>
1120 static void ClCreateQuantizedLstmWorkloadTest()
1121 {
1122  using namespace armnn::armcomputetensorutils;
1123 
1124  Graph graph;
1125  ClWorkloadFactory factory =
1126  ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1127 
1128  auto workload = CreateQuantizedLstmWorkloadTest<QuantizedLstmWorkloadType>(factory, graph);
1129 
1130  QuantizedLstmQueueDescriptor queueDescriptor = workload->GetData();
1131 
1132  IAclTensorHandle* inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
1133  BOOST_TEST((inputHandle->GetShape() == TensorShape({2, 2})));
1134  BOOST_TEST((inputHandle->GetDataType() == arm_compute::DataType::QASYMM8));
1135 
1136  IAclTensorHandle* cellStateInHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
1137  BOOST_TEST((cellStateInHandle->GetShape() == TensorShape({2, 4})));
1138  BOOST_TEST((cellStateInHandle->GetDataType() == arm_compute::DataType::QSYMM16));
1139 
1140  IAclTensorHandle* outputStateInHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[2]);
1141  BOOST_TEST((outputStateInHandle->GetShape() == TensorShape({2, 4})));
1142  BOOST_TEST((outputStateInHandle->GetDataType() == arm_compute::DataType::QASYMM8));
1143 
1144  IAclTensorHandle* cellStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
1145  BOOST_TEST((cellStateOutHandle->GetShape() == TensorShape({2, 4})));
1146  BOOST_TEST((cellStateOutHandle->GetDataType() == arm_compute::DataType::QSYMM16));
1147 
1148  IAclTensorHandle* outputStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
1149  BOOST_TEST((outputStateOutHandle->GetShape() == TensorShape({2, 4})));
1150  BOOST_TEST((outputStateOutHandle->GetDataType() == arm_compute::DataType::QASYMM8));
1151 }
1152 
1153 BOOST_AUTO_TEST_CASE(CreateQuantizedLstmWorkload)
1154 {
1155  ClCreateQuantizedLstmWorkloadTest<ClQuantizedLstmWorkload>();
1156 }
1157 
DataLayout
Definition: Types.hpp:50
std::vector< BackendOptions > ModelOptions
BOOST_AUTO_TEST_CASE(CreateActivationFloatWorkload)
void IgnoreUnused(Ts &&...)
virtual arm_compute::DataType GetDataType() const =0
DataType
Definition: Types.hpp:32
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
BOOST_AUTO_TEST_CASE(CheckConvolution2dLayer)
virtual TensorShape GetShape() const =0
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
This layer represents an addition operation.
Struct for the users to pass backend specific options.
BOOST_AUTO_TEST_SUITE_END()
This layer represents a subtraction operation.
std::vector< ITensorHandle * > m_Outputs
boost::test_tools::predicate_result CompareIClTensorHandleShape(IClTensorHandle *tensorHandle, std::initializer_list< unsigned int > expectedDimensions)
This layer represents a division operation.
UnaryOperation
Definition: Types.hpp:94
void SetQuantizationOffset(int32_t offset)
Definition: Tensor.cpp:480
std::vector< ITensorHandle * > m_Inputs
This layer represents a multiplication operation.
arm_compute::CLSubTensor & GetTensor() override