27 #include <doctest/doctest.h> 30 std::initializer_list<unsigned int> expectedDimensions)
32 return CompareTensorHandleShape<IClTensorHandle>(tensorHandle, expectedDimensions);
37 template <armnn::DataType DataType>
38 static void ClCreateActivationWorkloadTest()
42 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
44 auto workload = CreateActivationWorkloadTest<ClActivationWorkload, DataType>(factory, graph);
48 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
49 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
52 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
55 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
60 ClCreateActivationWorkloadTest<armnn::DataType::Float32>();
65 ClCreateActivationWorkloadTest<armnn::DataType::Float16>();
68 template <
typename WorkloadType,
69 typename DescriptorType,
72 static void ClCreateElementwiseWorkloadTest()
76 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
78 auto workload = CreateElementwiseWorkloadTest<WorkloadType, DescriptorType, LayerType, DataType>(factory, graph);
81 DescriptorType queueDescriptor = workload->GetData();
82 auto inputHandle1 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
83 auto inputHandle2 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
84 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
86 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
88 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
90 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
165 template <
typename WorkloadType,
166 typename DescriptorType,
172 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
174 auto workload = CreateElementwiseUnaryWorkloadTest<WorkloadType, DescriptorType, DataType>(factory, graph, op);
176 DescriptorType queueDescriptor = workload->GetData();
178 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
179 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
182 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
185 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
190 ClCreateElementwiseUnaryWorkloadTest<ClRsqrtWorkload, RsqrtQueueDescriptor, armnn::DataType::Float32>(
191 UnaryOperation::Rsqrt);
194 template <
typename BatchNormalizationWorkloadType, armnn::DataType DataType>
195 static void ClCreateBatchNormalizationWorkloadTest(
DataLayout dataLayout)
199 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
201 auto workload = CreateBatchNormalizationWorkloadTest<BatchNormalizationWorkloadType, DataType>
202 (factory, graph, dataLayout);
206 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
207 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
212 case DataLayout::NHWC:
214 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
216 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
220 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
222 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
254 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
256 auto workload = CreateConvertFp16ToFp32WorkloadTest<ClConvertFp16ToFp32Workload>(factory, graph);
259 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
260 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
262 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
264 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
265 CHECK((inputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F16));
266 CHECK((outputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F32));
273 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
275 auto workload = CreateConvertFp32ToFp16WorkloadTest<ClConvertFp32ToFp16Workload>(factory, graph);
278 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
279 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
282 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
284 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
285 CHECK((inputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F32));
286 CHECK((outputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F16));
289 template <
typename Convolution2dWorkloadType,
typename armnn::DataType DataType>
290 static void ClConvolution2dWorkloadTest(
DataLayout dataLayout)
294 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
296 auto workload = CreateConvolution2dWorkloadTest<ClConvolution2dWorkload, DataType>(factory,
300 TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({2, 3, 8, 16})
301 : std::initializer_list<unsigned int>({2, 8, 16, 3});
302 TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({2, 2, 2, 10})
303 : std::initializer_list<unsigned int>({2, 2, 10, 2});
307 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.
m_Inputs[0]);
308 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.
m_Outputs[0]);
309 CHECK((inputHandle->GetShape() == inputShape));
310 CHECK((outputHandle->GetShape() == outputShape));
315 ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
320 ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
325 ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
330 ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
341 {
"FastMathEnabled",
true }
343 modelOptions.push_back(gpuAcc);
346 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager(), modelOptions);
349 CreateConvolution2dWorkloadFastMathTest<ClConvolution2dWorkload, armnn::DataType::Float32>(factory,
355 auto conv2dWorkload = PolymorphicDowncast<ClConvolution2dWorkload*>(workload.get());
358 ARMNN_ASSERT(conv2dWorkload->GetConvolutionMethod() == arm_compute::ConvolutionMethod::WINOGRAD);
367 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
370 CreateConvolution2dWorkloadTest<ClConvolution2dWorkload, DataType::Float32>(factory,
374 TensorShape inputShape = std::initializer_list<unsigned int>({2, 8, 16, 3});
375 TensorShape outputShape = std::initializer_list<unsigned int>({2, 2, 10, 2});
379 auto inputHandle = PolymorphicDowncast<ITensorHandle*>(queueDescriptor.m_Inputs[0]);
380 auto outputHandle = PolymorphicDowncast<ITensorHandle*>(queueDescriptor.m_Outputs[0]);
381 CHECK((inputHandle->GetShape() == inputShape));
382 CHECK((outputHandle->GetShape() == outputShape));
384 CHECK((dynamic_cast<ClTensorHandle*>(inputHandle) !=
nullptr));
385 CHECK((dynamic_cast<ClTensorHandle*>(outputHandle) !=
nullptr));
389 static_cast<MemorySourceFlags>(MemorySource::Malloc));
391 TensorInfo inputInfo({ 2, 8, 16, 3 }, DataType::Float32);
392 TensorInfo outputInfo({ 2, 2, 10, 2 }, DataType::Float32);
400 workload->ReplaceInputTensorHandle(inputImportHandle.get(), 0);
401 workload->ReplaceOutputTensorHandle(outputImportHandle.get(), 0);
404 queueDescriptor = workload->GetData();
405 auto replacedInputHandle = PolymorphicDowncast<ITensorHandle*>(queueDescriptor.m_Inputs[0]);
406 auto replacedOutputHandle = PolymorphicDowncast<ITensorHandle*>(queueDescriptor.m_Outputs[0]);
407 CHECK((replacedInputHandle->GetShape() == inputShape));
408 CHECK((replacedOutputHandle->GetShape() == outputShape));
410 CHECK((inputImportHandle.get() == replacedInputHandle));
411 CHECK((inputImportHandle.get() == replacedInputHandle));
413 CHECK((dynamic_cast<ClTensorHandle*>(replacedInputHandle) ==
nullptr));
414 CHECK((dynamic_cast<ClImportTensorHandle*>(replacedInputHandle) !=
nullptr));
415 CHECK((dynamic_cast<ClTensorHandle*>(replacedOutputHandle) ==
nullptr));
416 CHECK((dynamic_cast<ClImportTensorHandle*>(replacedOutputHandle) !=
nullptr));
421 using namespace armnn;
427 TensorInfo inputInfo ({ 1, 3, 1, 2 }, inputType, 0.5f, 128);
428 TensorInfo outputInfo({ 1, 3, 1, 3 }, inputType, 1.0f, 128);
430 const std::vector<float> quantScales{ 0.5f, 0.75f, 1.0f };
431 constexpr
unsigned int quantDimension = 0;
433 TensorInfo kernelInfo({ 3, 1, 1, 2 }, kernelType, quantScales, quantDimension);
435 const std::vector<float> biasQuantScales{ 0.25f, 0.375f, 0.5f };
436 TensorInfo biasInfo({ 3 }, biasType, biasQuantScales, quantDimension);
438 std::vector<uint8_t> inputData =
440 138, 108, 138, 108, 138, 108
443 std::vector<int8_t> kernelData =
448 std::vector<int32_t> biasData =
453 std::vector<uint8_t> expectedOutputData =
455 121, 118, 115, 121, 118, 115, 121, 118, 115
469 auto memoryManager = ClWorkloadFactoryHelper::GetMemoryManager();
470 auto clMemoryManager = armnn::PolymorphicPointerDowncast<armnn::ClMemoryManager>(memoryManager);
471 auto tensorHandleFactory = ClWorkloadFactoryHelper::GetTensorHandleFactory(memoryManager);
473 std::unique_ptr<ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputInfo);
474 std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo);
486 queueDescriptor.m_Weight = &weightTensor;
487 queueDescriptor.m_Bias = &biasTensor;
489 AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get());
490 AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get());
493 auto context = arm_compute::CLKernelLibrary::get().context();
494 auto device = arm_compute::CLKernelLibrary::get().get_device();
495 auto clCompileContext = arm_compute::CLCompileContext(context, device);
500 CHECK(clCompileContext.get_built_programs().empty());
502 auto workload = std::make_unique<ClConvolution2dWorkload>(queueDescriptor,
504 clMemoryManager->GetIntraLayerManager(),
508 CHECK(!clCompileContext.get_built_programs().empty());
511 template <
typename DepthwiseConvolutionWorkloadType,
typename armnn::DataType DataType>
512 static void ClDepthwiseConvolutionWorkloadTest(
DataLayout dataLayout)
516 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
518 auto workload = CreateDepthwiseConvolution2dWorkloadTest<DepthwiseConvolutionWorkloadType, DataType>
519 (factory, graph, dataLayout);
523 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
524 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
527 : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
529 : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
531 CHECK((inputHandle->GetShape() == inputShape));
532 CHECK((outputHandle->GetShape() == outputShape));
537 ClDepthwiseConvolutionWorkloadTest<ClDepthwiseConvolutionWorkload, DataType::Float32>(
DataLayout::NHWC);
540 template <
typename Convolution2dWorkloadType,
typename armnn::DataType DataType>
541 static void ClDirectConvolution2dWorkloadTest()
545 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
547 auto workload = CreateDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, DataType>(factory, graph);
551 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
552 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
554 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
556 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
561 ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>();
566 ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>();
571 ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::QAsymmU8>();
574 template <
typename FullyConnectedWorkloadType,
typename armnn::DataType DataType>
575 static void ClCreateFullyConnectedWorkloadTest()
579 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
582 CreateFullyConnectedWorkloadTest<FullyConnectedWorkloadType, DataType>(factory, graph);
586 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
587 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
589 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
591 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
597 ClCreateFullyConnectedWorkloadTest<ClFullyConnectedWorkload, armnn::DataType::Float32>();
602 ClCreateFullyConnectedWorkloadTest<ClFullyConnectedWorkload, armnn::DataType::Float16>();
605 template <
typename NormalizationWorkloadType,
typename armnn::DataType DataType>
606 static void ClNormalizationWorkloadTest(
DataLayout dataLayout)
610 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
612 auto workload = CreateNormalizationWorkloadTest<NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
616 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
617 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
620 : std::initializer_list<unsigned int>({3, 1, 5, 5});
622 : std::initializer_list<unsigned int>({3, 1, 5, 5});
624 CHECK((inputHandle->GetShape() == inputShape));
625 CHECK((outputHandle->GetShape() == outputShape));
630 ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float32>(
DataLayout::NCHW);
635 ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float16>(
DataLayout::NCHW);
640 ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float32>(
DataLayout::NHWC);
645 ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float16>(
DataLayout::NHWC);
648 template <
typename armnn::DataType DataType>
649 static void ClPooling2dWorkloadTest(
DataLayout dataLayout)
653 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
655 auto workload = CreatePooling2dWorkloadTest<ClPooling2dWorkload, DataType>(factory, graph, dataLayout);
658 : std::initializer_list<unsigned int>({3, 5, 5, 2});
660 : std::initializer_list<unsigned int>({3, 2, 4, 2});
664 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.
m_Inputs[0]);
665 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.
m_Outputs[0]);
667 CHECK((inputHandle->GetShape() == inputShape));
668 CHECK((outputHandle->GetShape() == outputShape));
698 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
700 auto workload = CreatePreluWorkloadTest<ClPreluWorkload>(factory,
709 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
710 auto alphaHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
711 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
713 CHECK((inputHandle->GetShape() == inputShape));
714 CHECK((alphaHandle->GetShape() == alphaShape));
715 CHECK((outputHandle->GetShape() == outputShape));
720 ClCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 },
DataType::Float16);
725 ClCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 },
DataType::Float32);
730 ClCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 },
DataType::QAsymmU8);
733 template <
typename armnn::DataType DataType>
734 static void ClCreateReshapeWorkloadTest()
738 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
740 auto workload = CreateReshapeWorkloadTest<ClReshapeWorkload, DataType>(factory, graph);
744 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
745 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
748 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
750 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
755 ClCreateReshapeWorkloadTest<armnn::DataType::Float32>();
760 ClCreateReshapeWorkloadTest<armnn::DataType::Float16>();
765 ClCreateReshapeWorkloadTest<armnn::DataType::QAsymmU8>();
768 template <
typename SoftmaxWorkloadType,
typename armnn::DataType DataType>
769 static void ClSoftmaxWorkloadTest()
773 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
775 auto workload = CreateSoftmaxWorkloadTest<SoftmaxWorkloadType, DataType>(factory, graph);
779 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
780 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
786 tensorInfo.SetQuantizationScale(1.f / 256);
790 tensorInfo.SetQuantizationOffset(-128);
791 tensorInfo.SetQuantizationScale(1.f / 256);
795 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
797 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
803 ClSoftmaxWorkloadTest<ClSoftmaxWorkload, armnn::DataType::Float32>();
808 ClSoftmaxWorkloadTest<ClSoftmaxWorkload, armnn::DataType::Float16>();
813 ClSoftmaxWorkloadTest<ClSoftmaxWorkload, armnn::DataType::QAsymmU8>();
818 ClSoftmaxWorkloadTest<ClSoftmaxWorkload, armnn::DataType::QAsymmS8>();
821 template <
typename armnn::DataType DataType>
822 static void ClSplitterWorkloadTest()
826 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
828 auto workload = CreateSplitterWorkloadTest<ClSplitterWorkload, DataType>(factory, graph);
832 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
834 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
836 auto outputHandle1 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]);
838 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
840 auto outputHandle2 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[2]);
842 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
844 auto outputHandle0 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
846 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
851 ClSplitterWorkloadTest<armnn::DataType::Float32>();
856 ClSplitterWorkloadTest<armnn::DataType::Float16>();
859 template <
typename armnn::DataType DataType>
860 static void ClSplitterConcatTest()
870 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
873 CreateSplitterConcatWorkloadTest<ClSplitterWorkload, ClConcatWorkload, DataType>
876 auto wlSplitter = std::move(workloads.first);
877 auto wlConcat = std::move(workloads.second);
891 bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0);
892 CHECK(validDataPointers);
896 bool validSubTensorParents = (mIn0->
GetTensor().parent() == mIn1->
GetTensor().parent())
899 CHECK(validSubTensorParents);
904 ClSplitterConcatTest<armnn::DataType::Float32>();
909 ClSplitterConcatTest<armnn::DataType::Float16>();
920 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
922 std::unique_ptr<ClSplitterWorkload> wlSplitter;
923 std::unique_ptr<ClActivationWorkload> wlActiv0_0;
924 std::unique_ptr<ClActivationWorkload> wlActiv0_1;
925 std::unique_ptr<ClActivationWorkload> wlActiv1_0;
926 std::unique_ptr<ClActivationWorkload> wlActiv1_1;
930 wlActiv1_0, wlActiv1_1);
948 bool validDataPointers = (sOut0 == activ0_0Im) && (sOut0 == activ0_1Im) &&
949 (sOut1 == activ1_0Im) && (sOut1 == activ1_1Im);
951 CHECK(validDataPointers);
954 #if defined(ARMNNREF_ENABLED) 961 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
963 CreateMemCopyWorkloads<IClTensorHandle>(factory);
968 template <
typename L2NormalizationWorkloadType,
typename armnn::DataType DataType>
969 static void ClL2NormalizationWorkloadTest(
DataLayout dataLayout)
973 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
976 CreateL2NormalizationWorkloadTest<L2NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
980 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
981 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
984 : std::initializer_list<unsigned int>({ 5, 50, 67, 20 });
986 : std::initializer_list<unsigned int>({ 5, 50, 67, 20 });
988 CHECK((inputHandle->GetShape() == inputShape));
989 CHECK((outputHandle->GetShape() == outputShape));
994 ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float32>(
DataLayout::NCHW);
999 ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float32>(
DataLayout::NHWC);
1004 ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float16>(
DataLayout::NCHW);
1009 ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float16>(
DataLayout::NHWC);
1012 template <
typename LogSoftmaxWorkloadType,
typename armnn::DataType DataType>
1013 static void ClCreateLogSoftmaxWorkloadTest()
1017 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1019 auto workload = CreateLogSoftmaxWorkloadTest<LogSoftmaxWorkloadType, DataType>(factory, graph);
1023 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1024 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1027 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1029 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1034 ClCreateLogSoftmaxWorkloadTest<ClLogSoftmaxWorkload, armnn::DataType::Float32>();
1037 template <
typename LstmWorkloadType>
1038 static void ClCreateLstmWorkloadTest()
1042 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1044 auto workload = CreateLstmWorkloadTest<LstmWorkloadType>(factory, graph);
1047 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1048 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]);
1050 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1052 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1057 ClCreateLstmWorkloadTest<ClLstmFloatWorkload>();
1060 template <
typename ResizeWorkloadType,
typename armnn::DataType DataType>
1061 static void ClResizeWorkloadTest(
DataLayout dataLayout)
1065 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1067 auto workload = CreateResizeBilinearWorkloadTest<ResizeWorkloadType, DataType>(factory, graph, dataLayout);
1069 auto queueDescriptor = workload->GetData();
1071 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1072 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1079 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1081 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1085 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1087 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1093 ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float32>(
DataLayout::NCHW);
1098 ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float16>(
DataLayout::NCHW);
1103 ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::QAsymmU8>(
DataLayout::NCHW);
1108 ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float32>(
DataLayout::NHWC);
1113 ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float16>(
DataLayout::NHWC);
1118 ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::QAsymmU8>(
DataLayout::NHWC);
1121 template <
typename MeanWorkloadType,
typename armnn::DataType DataType>
1122 static void ClMeanWorkloadTest()
1126 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1128 auto workload = CreateMeanWorkloadTest<MeanWorkloadType, DataType>(factory, graph);
1132 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1133 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1137 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1139 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1144 ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::Float32>();
1149 ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::Float16>();
1154 ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::QAsymmU8>();
1157 template <
typename ConcatWorkloadType, armnn::DataType DataType>
1158 static void ClCreateConcatWorkloadTest(std::initializer_list<unsigned int> outputShape,
1159 unsigned int concatAxis)
1163 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1165 auto workload = CreateConcatWorkloadTest<ConcatWorkloadType, DataType>(factory, graph, outputShape, concatAxis);
1168 auto inputHandle0 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1169 auto inputHandle1 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
1170 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1173 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1175 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1177 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1182 ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::Float32>({ 4, 3, 2, 5 }, 0);
1187 ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::Float32>({ 2, 6, 2, 5 }, 1);
1192 ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::Float32>({ 2, 3, 2, 10 }, 3);
1197 ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::QAsymmU8>({ 4, 3, 2, 5 }, 0);
1202 ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::QAsymmU8>({ 2, 6, 2, 5 }, 1);
1207 ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::QAsymmU8>({ 2, 3, 2, 10 }, 3);
1210 template <
typename SpaceToDepthWorkloadType,
typename armnn::DataType DataType>
1211 static void ClSpaceToDepthWorkloadTest()
1215 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1217 auto workload = CreateSpaceToDepthWorkloadTest<SpaceToDepthWorkloadType, DataType>(factory, graph);
1220 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1221 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1224 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1226 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1231 ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::Float32>();
1236 ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::Float16>();
1241 ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::QAsymmU8>();
1246 ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::QSymmS16>();
1249 template <armnn::DataType DataType>
1250 static void ClCreateStackWorkloadTest(
const std::initializer_list<unsigned int>& inputShape,
1251 const std::initializer_list<unsigned int>& outputShape,
1253 unsigned int numInputs)
1257 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1259 auto workload = CreateStackWorkloadTest<ClStackWorkload, DataType>(factory,
1268 for (
unsigned int i = 0; i < numInputs; ++i)
1270 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[i]);
1272 CHECK_MESSAGE(predResult1.m_Result, predResult1.m_Message.str());
1274 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1276 CHECK_MESSAGE(predResult2.m_Result, predResult2.m_Message.str());
1281 ClCreateStackWorkloadTest<armnn::DataType::Float32>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
1286 ClCreateStackWorkloadTest<armnn::DataType::Float16>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
1291 ClCreateStackWorkloadTest<armnn::DataType::QAsymmU8>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
1295 template <
typename QLstmWorkloadType>
1296 static void ClCreateQLstmWorkloadTest()
1299 ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1301 auto workload = CreateQLstmWorkloadTest<QLstmWorkloadType>(factory, graph);
1304 IAclTensorHandle* inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
1306 CHECK((inputHandle->
GetDataType() == arm_compute::DataType::QASYMM8_SIGNED));
1308 IAclTensorHandle* cellStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
1310 CHECK((cellStateOutHandle->
GetDataType() == arm_compute::DataType::QSYMM16));
1312 IAclTensorHandle* outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[2]);
1314 CHECK((outputHandle->
GetDataType() == arm_compute::DataType::QASYMM8_SIGNED));
1319 ClCreateQLstmWorkloadTest<ClQLstmWorkload>();
1322 template <
typename QuantizedLstmWorkloadType>
1323 static void ClCreateQuantizedLstmWorkloadTest()
1329 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1331 auto workload = CreateQuantizedLstmWorkloadTest<QuantizedLstmWorkloadType>(factory, graph);
1335 IAclTensorHandle* inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
1337 CHECK((inputHandle->
GetDataType() == arm_compute::DataType::QASYMM8));
1339 IAclTensorHandle* cellStateInHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
1341 CHECK((cellStateInHandle->
GetDataType() == arm_compute::DataType::QSYMM16));
1343 IAclTensorHandle* outputStateInHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[2]);
1345 CHECK((outputStateInHandle->
GetDataType() == arm_compute::DataType::QASYMM8));
1347 IAclTensorHandle* cellStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
1349 CHECK((cellStateOutHandle->
GetDataType() == arm_compute::DataType::QSYMM16));
1351 IAclTensorHandle* outputStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
1353 CHECK((outputStateOutHandle->
GetDataType() == arm_compute::DataType::QASYMM8));
1358 ClCreateQuantizedLstmWorkloadTest<ClQuantizedLstmWorkload>();
1361 template <armnn::DataType DataType>
1362 static void ClCreateActivationWorkloadReplaceFunctionsTest()
1364 std::shared_ptr<ClMemoryManager> memoryManager = std::make_shared<ClMemoryManager>(
1365 std::make_unique<arm_compute::CLBufferAllocator>());
1370 auto workloadPtr = CreateActivationWorkloadTest<ClActivationWorkload, DataType>(factory, graph);
1376 unique_ptr<ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputInfo,
true);
1377 inputHandle->Manage();
1378 inputHandle->Allocate();
1379 unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo,
true);
1380 outputHandle->Manage();
1381 outputHandle->Allocate();
1383 unsigned int slot = 0;
1385 CHECK_THROWS_AS(workloadPtr->ReplaceOutputTensorHandle(outputHandle.get(), slot),
UnimplementedException);
1388 TEST_CASE(
"ClReplaceFunctionsfromFloat32toFloat16ActivationWorkload")
1390 ClCreateActivationWorkloadReplaceFunctionsTest<armnn::DataType::Float32>();
uint32_t m_PadBottom
Padding bottom value in the height dimension.
bool m_BiasEnabled
Enable/disable bias.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
armnn::PredicateResult CompareIClTensorHandleShape(IClTensorHandle *tensorHandle, std::initializer_list< unsigned int > expectedDimensions)
TEST_SUITE("CreateWorkloadCl")
std::vector< BackendOptions > ModelOptions
A Convolution2dDescriptor for the Convolution2dLayer.
uint32_t m_PadRight
Padding right value in the width dimension.
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
LayerDescriptor m_Parameters
void AllocateAndCopyDataToITensorHandle(armnn::ITensorHandle *tensorHandle, const void *memory)
virtual arm_compute::DataType GetDataType() const =0
uint32_t m_PadTop
Padding top value in the height dimension.
TEST_CASE_FIXTURE(ClContextControlFixture, "CopyBetweenNeonAndGpu")
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
#define ARMNN_ASSERT(COND)
This factory creates ClImportTensorHandles that refer to imported memory tensors. ...
virtual TensorShape GetShape() const =0
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
This layer represents an addition operation.
Struct for the users to pass backend specific options.
This layer represents a subtraction operation.
std::vector< ITensorHandle * > m_Outputs
This layer represents a division operation.
Contains information about TensorInfos of a layer.
void SetQuantizationOffset(int32_t offset)
std::vector< ITensorHandle * > m_Inputs
std::unique_ptr< ITensorHandle > CreateTensorHandle(const TensorInfo &tensorInfo) const override
This layer represents a multiplication operation.
arm_compute::CLSubTensor & GetTensor() override
Depthwise Convolution 2D layer workload data.
uint32_t m_PadLeft
Padding left value in the width dimension.
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below...