// // Copyright © 2017 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // #include "ClContextControlFixture.hpp" #include "ClWorkloadFactoryHelper.hpp" #include #include #include #include #include #include #include boost::test_tools::predicate_result CompareIClTensorHandleShape(IClTensorHandle* tensorHandle, std::initializer_list expectedDimensions) { return CompareTensorHandleShape(tensorHandle, expectedDimensions); } BOOST_FIXTURE_TEST_SUITE(CreateWorkloadCl, ClContextControlFixture) template static void ClCreateActivationWorkloadTest() { Graph graph; ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); auto workload = CreateActivationWorkloadTest(factory, graph); // Checks that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest). ActivationQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {1, 1})); BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1, 1})); } BOOST_AUTO_TEST_CASE(CreateActivationFloatWorkload) { ClCreateActivationWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateActivationFloat16Workload) { ClCreateActivationWorkloadTest(); } template static void ClCreateElementwiseWorkloadTest() { Graph graph; ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); auto workload = CreateElementwiseWorkloadTest(factory, graph); // Checks that inputs/outputs are as we expect them (see definition of CreateElementwiseWorkloadTest). DescriptorType queueDescriptor = workload->GetData(); auto inputHandle1 = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto inputHandle2 = boost::polymorphic_downcast(queueDescriptor.m_Inputs[1]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); BOOST_TEST(CompareIClTensorHandleShape(inputHandle1, {2, 3})); BOOST_TEST(CompareIClTensorHandleShape(inputHandle2, {2, 3})); BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 3})); } BOOST_AUTO_TEST_CASE(CreateAdditionFloatWorkload) { ClCreateElementwiseWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateAdditionFloat16Workload) { ClCreateElementwiseWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateSubtractionFloatWorkload) { ClCreateElementwiseWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateSubtractionFloat16Workload) { ClCreateElementwiseWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateMultiplicationFloatWorkloadTest) { ClCreateElementwiseWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateMultiplicationFloat16WorkloadTest) { ClCreateElementwiseWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateMultiplicationUint8WorkloadTest) { ClCreateElementwiseWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateDivisionFloatWorkloadTest) { ClCreateElementwiseWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateDivisionFloat16WorkloadTest) { ClCreateElementwiseWorkloadTest(); } template static void ClCreateBatchNormalizationWorkloadTest(DataLayout dataLayout) { Graph graph; ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); auto workload = CreateBatchNormalizationWorkloadTest (factory, graph, dataLayout); // Checks that inputs/outputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest). BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); switch (dataLayout) { case DataLayout::NHWC: BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 4, 4, 3 })); BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 4, 4, 3 })); break; default: // NCHW BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 3, 4, 4 })); BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 3, 4, 4 })); } } BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloatNchwWorkload) { ClCreateBatchNormalizationWorkloadTest(DataLayout::NCHW); } BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloat16NchwWorkload) { ClCreateBatchNormalizationWorkloadTest(DataLayout::NCHW); } BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloatNhwcWorkload) { ClCreateBatchNormalizationWorkloadTest(DataLayout::NHWC); } BOOST_AUTO_TEST_CASE(CreateBatchNormalizationNhwcFloat16NhwcWorkload) { ClCreateBatchNormalizationWorkloadTest(DataLayout::NHWC); } BOOST_AUTO_TEST_CASE(CreateConvertFp16ToFp32Workload) { Graph graph; ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); auto workload = CreateConvertFp16ToFp32WorkloadTest(factory, graph); ConvertFp16ToFp32QueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {1, 3, 2, 3})); BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1, 3, 2, 3})); BOOST_TEST((inputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F16)); BOOST_TEST((outputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F32)); } BOOST_AUTO_TEST_CASE(CreateConvertFp32ToFp16Workload) { Graph graph; ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); auto workload = CreateConvertFp32ToFp16WorkloadTest(factory, graph); ConvertFp32ToFp16QueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {1, 3, 2, 3})); BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1, 3, 2, 3})); BOOST_TEST((inputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F32)); BOOST_TEST((outputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F16)); } template static void ClConvolution2dWorkloadTest(DataLayout dataLayout) { Graph graph; ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); auto workload = CreateConvolution2dWorkloadTest(factory, graph, dataLayout); TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list({2, 3, 8, 16}) : std::initializer_list({2, 8, 16, 3}); TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list({2, 2, 2, 10}) : std::initializer_list({2, 2, 10, 2}); // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest). Convolution2dQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); BOOST_TEST((inputHandle->GetShape() == inputShape)); BOOST_TEST((outputHandle->GetShape() == outputShape)); } BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNchwWorkload) { ClConvolution2dWorkloadTest(DataLayout::NCHW); } BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNhwcWorkload) { ClConvolution2dWorkloadTest(DataLayout::NHWC); } BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NchwWorkload) { ClConvolution2dWorkloadTest(DataLayout::NCHW); } BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NhwcWorkload) { ClConvolution2dWorkloadTest(DataLayout::NHWC); } template static void ClDepthwiseConvolutionWorkloadTest(DataLayout dataLayout) { Graph graph; ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); auto workload = CreateDepthwiseConvolution2dWorkloadTest (factory, graph, dataLayout); // Checks that inputs/outputs are as we expect them (see definition of CreateDepthwiseConvolution2dWorkloadTest). DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list({ 2, 2, 5, 5 }) : std::initializer_list({ 2, 5, 5, 2 }); TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list({ 2, 2, 5, 5 }) : std::initializer_list({ 2, 5, 5, 2 }); BOOST_TEST((inputHandle->GetShape() == inputShape)); BOOST_TEST((outputHandle->GetShape() == outputShape)); } BOOST_AUTO_TEST_CASE(CreateDepthwiseConvolutionFloat32NhwcWorkload) { ClDepthwiseConvolutionWorkloadTest(DataLayout::NHWC); } template static void ClDirectConvolution2dWorkloadTest() { Graph graph; ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); auto workload = CreateDirectConvolution2dWorkloadTest(factory, graph); // Checks that outputs and inputs are as we expect them (see definition of CreateDirectConvolution2dWorkloadTest). Convolution2dQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {2, 3, 6, 6})); BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 2, 6, 6})); } BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dFloatWorkload) { ClDirectConvolution2dWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dFloat16Workload) { ClDirectConvolution2dWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dUint8Workload) { ClDirectConvolution2dWorkloadTest(); } template static void ClCreateFullyConnectedWorkloadTest() { Graph graph; ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); auto workload = CreateFullyConnectedWorkloadTest(factory, graph); // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest). FullyConnectedQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {3, 1, 4, 5})); BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {3, 7})); } BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloatWorkloadTest) { ClCreateFullyConnectedWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloat16WorkloadTest) { ClCreateFullyConnectedWorkloadTest(); } template static void ClNormalizationWorkloadTest(DataLayout dataLayout) { Graph graph; ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); auto workload = CreateNormalizationWorkloadTest(factory, graph, dataLayout); // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest). NormalizationQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list({3, 5, 5, 1}) : std::initializer_list({3, 1, 5, 5}); TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list({3, 5, 5, 1}) : std::initializer_list({3, 1, 5, 5}); BOOST_TEST((inputHandle->GetShape() == inputShape)); BOOST_TEST((outputHandle->GetShape() == outputShape)); } BOOST_AUTO_TEST_CASE(CreateNormalizationFloat32NchwWorkload) { ClNormalizationWorkloadTest(DataLayout::NCHW); } BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16NchwWorkload) { ClNormalizationWorkloadTest(DataLayout::NCHW); } BOOST_AUTO_TEST_CASE(CreateNormalizationFloat32NhwcWorkload) { ClNormalizationWorkloadTest(DataLayout::NHWC); } BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16NhwcWorkload) { ClNormalizationWorkloadTest(DataLayout::NHWC); } template static void ClPooling2dWorkloadTest(DataLayout dataLayout) { Graph graph; ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); auto workload = CreatePooling2dWorkloadTest(factory, graph, dataLayout); TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list({3, 2, 5, 5}) : std::initializer_list({3, 5, 5, 2}); TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list({3, 2, 2, 4}) : std::initializer_list({3, 2, 4, 2}); // Check that inputs/outputs are as we expect them (see definition of CreatePooling2dWorkloadTest). Pooling2dQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); BOOST_TEST((inputHandle->GetShape() == inputShape)); BOOST_TEST((outputHandle->GetShape() == outputShape)); } BOOST_AUTO_TEST_CASE(CreatePooling2dFloatNchwWorkload) { ClPooling2dWorkloadTest(DataLayout::NCHW); } BOOST_AUTO_TEST_CASE(CreatePooling2dFloatNhwcWorkload) { ClPooling2dWorkloadTest(DataLayout::NHWC); } BOOST_AUTO_TEST_CASE(CreatePooling2dFloat16NchwWorkload) { ClPooling2dWorkloadTest(DataLayout::NCHW); } BOOST_AUTO_TEST_CASE(CreatePooling2dFloat16NhwcWorkload) { ClPooling2dWorkloadTest(DataLayout::NHWC); } static void ClCreatePreluWorkloadTest(const armnn::TensorShape& inputShape, const armnn::TensorShape& alphaShape, const armnn::TensorShape& outputShape, armnn::DataType dataType) { Graph graph; ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); auto workload = CreatePreluWorkloadTest(factory, graph, inputShape, alphaShape, outputShape, dataType); // Checks that outputs and inputs are as we expect them (see definition of CreatePreluWorkloadTest). PreluQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto alphaHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[1]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); BOOST_TEST((inputHandle->GetShape() == inputShape)); BOOST_TEST((alphaHandle->GetShape() == alphaShape)); BOOST_TEST((outputHandle->GetShape() == outputShape)); } BOOST_AUTO_TEST_CASE(CreatePreluFloat16Workload) { ClCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float16); } BOOST_AUTO_TEST_CASE(CreatePreluFloatWorkload) { ClCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float32); } BOOST_AUTO_TEST_CASE(CreatePreluUint8Workload) { ClCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::QuantisedAsymm8); } template static void ClCreateReshapeWorkloadTest() { Graph graph; ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); auto workload = CreateReshapeWorkloadTest(factory, graph); // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest). ReshapeQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {4, 1})); BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1, 4})); } BOOST_AUTO_TEST_CASE(CreateReshapeFloatWorkload) { ClCreateReshapeWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateReshapeFloat16Workload) { ClCreateReshapeWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateReshapeUint8Workload) { ClCreateReshapeWorkloadTest(); } template static void ClSoftmaxWorkloadTest() { Graph graph; ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); auto workload = CreateSoftmaxWorkloadTest(factory, graph); // Checks that inputs/outputs are as we expect them (see definition of ClSoftmaxFloatWorkload). SoftmaxQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {4, 1})); BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {4, 1})); } BOOST_AUTO_TEST_CASE(CreateSoftmaxFloatWorkloadTest) { ClSoftmaxWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateSoftmaxFloat16WorkloadTest) { ClSoftmaxWorkloadTest(); } template static void ClSplitterWorkloadTest() { Graph graph; ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); auto workload = CreateSplitterWorkloadTest(factory, graph); // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest). SplitterQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {5, 7, 7})); auto outputHandle1 = boost::polymorphic_downcast(queueDescriptor.m_Outputs[1]); BOOST_TEST(CompareIClTensorHandleShape(outputHandle1, {2, 7, 7})); auto outputHandle2 = boost::polymorphic_downcast(queueDescriptor.m_Outputs[2]); BOOST_TEST(CompareIClTensorHandleShape(outputHandle2, {2, 7, 7})); auto outputHandle0 = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); BOOST_TEST(CompareIClTensorHandleShape(outputHandle0, {1, 7, 7})); } BOOST_AUTO_TEST_CASE(CreateSplitterFloatWorkload) { ClSplitterWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateSplitterFloat16Workload) { ClSplitterWorkloadTest(); } template static void ClSplitterConcatTest() { // Tests that it is possible to decide which output of the splitter layer // should be lined to which input of the concat layer. // We test that is is possible to specify 0th output // of the splitter to be the 1st input to the concat and the 1st output of the splitter to be 0th input // of the concat. Graph graph; ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); auto workloads = CreateSplitterConcatWorkloadTest (factory, graph); auto wlSplitter = std::move(workloads.first); auto wlConcat = std::move(workloads.second); //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction. armnn::ClSubTensorHandle* sOut0 = dynamic_cast(wlSplitter->GetData().m_Outputs[0]); armnn::ClSubTensorHandle* sOut1 = dynamic_cast(wlSplitter->GetData().m_Outputs[1]); armnn::ClSubTensorHandle* mIn0 = dynamic_cast(wlConcat->GetData().m_Inputs[0]); armnn::ClSubTensorHandle* mIn1 = dynamic_cast(wlConcat->GetData().m_Inputs[1]); BOOST_TEST(sOut0); BOOST_TEST(sOut1); BOOST_TEST(mIn0); BOOST_TEST(mIn1); //Fliped order of inputs/outputs. bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0); BOOST_TEST(validDataPointers); //Also make sure that the inputs are subtensors of one tensor and outputs are sub tensors of another tensor. bool validSubTensorParents = (mIn0->GetTensor().parent() == mIn1->GetTensor().parent()) && (sOut0->GetTensor().parent() == sOut1->GetTensor().parent()); BOOST_TEST(validSubTensorParents); } BOOST_AUTO_TEST_CASE(CreateSplitterConcatFloatWorkload) { ClSplitterConcatTest(); } BOOST_AUTO_TEST_CASE(CreateSplitterConcatFloat16Workload) { ClSplitterConcatTest(); } BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputs) { // Test that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer. // We create a splitter with two outputs. That each of those outputs is used by two different activation layers. Graph graph; ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); std::unique_ptr wlSplitter; std::unique_ptr wlActiv0_0; std::unique_ptr wlActiv0_1; std::unique_ptr wlActiv1_0; std::unique_ptr wlActiv1_1; CreateSplitterMultipleInputsOneOutputWorkloadTest(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1, wlActiv1_0, wlActiv1_1); //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction. armnn::ClSubTensorHandle* sOut0 = dynamic_cast(wlSplitter->GetData().m_Outputs[0]); armnn::ClSubTensorHandle* sOut1 = dynamic_cast(wlSplitter->GetData().m_Outputs[1]); armnn::ClSubTensorHandle* activ0_0Im = dynamic_cast(wlActiv0_0->GetData().m_Inputs[0]); armnn::ClSubTensorHandle* activ0_1Im = dynamic_cast(wlActiv0_1->GetData().m_Inputs[0]); armnn::ClSubTensorHandle* activ1_0Im = dynamic_cast(wlActiv1_0->GetData().m_Inputs[0]); armnn::ClSubTensorHandle* activ1_1Im = dynamic_cast(wlActiv1_1->GetData().m_Inputs[0]); BOOST_TEST(sOut0); BOOST_TEST(sOut1); BOOST_TEST(activ0_0Im); BOOST_TEST(activ0_1Im); BOOST_TEST(activ1_0Im); BOOST_TEST(activ1_1Im); bool validDataPointers = (sOut0 == activ0_0Im) && (sOut0 == activ0_1Im) && (sOut1 == activ1_0Im) && (sOut1 == activ1_1Im); BOOST_TEST(validDataPointers); } BOOST_AUTO_TEST_CASE(CreateMemCopyWorkloadsCl) { ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); CreateMemCopyWorkloads(factory); } template static void ClL2NormalizationWorkloadTest(DataLayout dataLayout) { Graph graph; ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); auto workload = CreateL2NormalizationWorkloadTest(factory, graph, dataLayout); // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest). L2NormalizationQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list({ 5, 20, 50, 67 }) : std::initializer_list({ 5, 50, 67, 20 }); TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list({ 5, 20, 50, 67 }) : std::initializer_list({ 5, 50, 67, 20 }); BOOST_TEST((inputHandle->GetShape() == inputShape)); BOOST_TEST((outputHandle->GetShape() == outputShape)); } BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloatNchwWorkload) { ClL2NormalizationWorkloadTest(DataLayout::NCHW); } BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloatNhwcWorkload) { ClL2NormalizationWorkloadTest(DataLayout::NHWC); } BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat16NchwWorkload) { ClL2NormalizationWorkloadTest(DataLayout::NCHW); } BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat16NhwcWorkload) { ClL2NormalizationWorkloadTest(DataLayout::NHWC); } template static void ClCreateLstmWorkloadTest() { Graph graph; ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); auto workload = CreateLstmWorkloadTest(factory, graph); LstmQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[1]); BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 2 })); BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 4 })); } BOOST_AUTO_TEST_CASE(CreateLSTMWorkloadFloatWorkload) { ClCreateLstmWorkloadTest(); } template static void ClResizeWorkloadTest(DataLayout dataLayout) { Graph graph; ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); auto workload = CreateResizeBilinearWorkloadTest(factory, graph, dataLayout); auto queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); switch (dataLayout) { case DataLayout::NHWC: BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 4, 4, 3 })); BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 2, 2, 3 })); break; case DataLayout::NCHW: default: BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 3, 4, 4 })); BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 3, 2, 2 })); } } BOOST_AUTO_TEST_CASE(CreateResizeFloat32NchwWorkload) { ClResizeWorkloadTest(DataLayout::NCHW); } BOOST_AUTO_TEST_CASE(CreateResizeFloat16NchwWorkload) { ClResizeWorkloadTest(DataLayout::NCHW); } BOOST_AUTO_TEST_CASE(CreateResizeUint8NchwWorkload) { ClResizeWorkloadTest(DataLayout::NCHW); } BOOST_AUTO_TEST_CASE(CreateResizeFloat32NhwcWorkload) { ClResizeWorkloadTest(DataLayout::NHWC); } BOOST_AUTO_TEST_CASE(CreateResizeFloat16NhwcWorkload) { ClResizeWorkloadTest(DataLayout::NHWC); } BOOST_AUTO_TEST_CASE(CreateResizeUint8NhwcWorkload) { ClResizeWorkloadTest(DataLayout::NHWC); } template static void ClMeanWorkloadTest() { Graph graph; ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); auto workload = CreateMeanWorkloadTest(factory, graph); // Checks that inputs/outputs are as we expect them (see definition of CreateMeanWorkloadTest). MeanQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); // The first dimension (batch size) in both input and output is singular thus it has been reduced by ACL. BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 1, 3, 7, 4 })); BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 1, 4 })); } BOOST_AUTO_TEST_CASE(CreateMeanFloat32Workload) { ClMeanWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateMeanFloat16Workload) { ClMeanWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateMeanUint8Workload) { ClMeanWorkloadTest(); } template static void ClCreateConcatWorkloadTest(std::initializer_list outputShape, unsigned int concatAxis) { Graph graph; ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); auto workload = CreateConcatWorkloadTest(factory, graph, outputShape, concatAxis); ConcatQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle0 = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto inputHandle1 = boost::polymorphic_downcast(queueDescriptor.m_Inputs[1]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); BOOST_TEST(CompareIClTensorHandleShape(inputHandle0, { 2, 3, 2, 5 })); BOOST_TEST(CompareIClTensorHandleShape(inputHandle1, { 2, 3, 2, 5 })); BOOST_TEST(CompareIClTensorHandleShape(outputHandle, outputShape)); } BOOST_AUTO_TEST_CASE(CreateConcatDim0Float32Workload) { ClCreateConcatWorkloadTest({ 4, 3, 2, 5 }, 0); } BOOST_AUTO_TEST_CASE(CreateConcatDim1Float32Workload) { ClCreateConcatWorkloadTest({ 2, 6, 2, 5 }, 1); } BOOST_AUTO_TEST_CASE(CreateConcatDim3Float32Workload) { ClCreateConcatWorkloadTest({ 2, 3, 2, 10 }, 3); } BOOST_AUTO_TEST_CASE(CreateConcatDim0Uint8Workload) { ClCreateConcatWorkloadTest({ 4, 3, 2, 5 }, 0); } BOOST_AUTO_TEST_CASE(CreateConcatDim1Uint8Workload) { ClCreateConcatWorkloadTest({ 2, 6, 2, 5 }, 1); } BOOST_AUTO_TEST_CASE(CreateConcatDim3Uint8Workload) { ClCreateConcatWorkloadTest({ 2, 3, 2, 10 }, 3); } template static void ClSpaceToDepthWorkloadTest() { Graph graph; ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); auto workload = CreateSpaceToDepthWorkloadTest(factory, graph); SpaceToDepthQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 1, 2, 2, 1 })); BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 1, 1, 1, 4 })); } BOOST_AUTO_TEST_CASE(CreateSpaceToDepthFloat32Workload) { ClSpaceToDepthWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateSpaceToDepthFloat16Workload) { ClSpaceToDepthWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateSpaceToDepthQAsymm8Workload) { ClSpaceToDepthWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateSpaceToDepthQSymm16Workload) { ClSpaceToDepthWorkloadTest(); } template static void ClCreateStackWorkloadTest(const std::initializer_list& inputShape, const std::initializer_list& outputShape, unsigned int axis, unsigned int numInputs) { armnn::Graph graph; ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); auto workload = CreateStackWorkloadTest(factory, graph, TensorShape(inputShape), TensorShape(outputShape), axis, numInputs); // Check inputs and output are as expected StackQueueDescriptor queueDescriptor = workload->GetData(); for (unsigned int i = 0; i < numInputs; ++i) { auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[i]); BOOST_TEST(CompareIClTensorHandleShape(inputHandle, inputShape)); } auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); BOOST_TEST(CompareIClTensorHandleShape(outputHandle, outputShape)); } BOOST_AUTO_TEST_CASE(CreateStackFloat32Workload) { ClCreateStackWorkloadTest({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2); } BOOST_AUTO_TEST_CASE(CreateStackUint8Workload) { ClCreateStackWorkloadTest({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2); } template static void ClCreateQuantizedLstmWorkloadTest() { using namespace armnn::armcomputetensorutils; using boost::polymorphic_downcast; Graph graph; ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); auto workload = CreateQuantizedLstmWorkloadTest(factory, graph); QuantizedLstmQueueDescriptor queueDescriptor = workload->GetData(); IAclTensorHandle* inputHandle = polymorphic_downcast(queueDescriptor.m_Inputs[0]); BOOST_TEST((inputHandle->GetShape() == TensorShape({2, 2}))); BOOST_TEST((inputHandle->GetDataType() == arm_compute::DataType::QASYMM8)); IAclTensorHandle* cellStateInHandle = polymorphic_downcast(queueDescriptor.m_Inputs[1]); BOOST_TEST((cellStateInHandle->GetShape() == TensorShape({2, 4}))); BOOST_TEST((cellStateInHandle->GetDataType() == arm_compute::DataType::QSYMM16)); IAclTensorHandle* outputStateInHandle = polymorphic_downcast(queueDescriptor.m_Inputs[2]); BOOST_TEST((outputStateInHandle->GetShape() == TensorShape({2, 4}))); BOOST_TEST((outputStateInHandle->GetDataType() == arm_compute::DataType::QASYMM8)); IAclTensorHandle* cellStateOutHandle = polymorphic_downcast(queueDescriptor.m_Outputs[0]); BOOST_TEST((cellStateOutHandle->GetShape() == TensorShape({2, 4}))); BOOST_TEST((cellStateOutHandle->GetDataType() == arm_compute::DataType::QSYMM16)); IAclTensorHandle* outputStateOutHandle = polymorphic_downcast(queueDescriptor.m_Outputs[1]); BOOST_TEST((outputStateOutHandle->GetShape() == TensorShape({2, 4}))); BOOST_TEST((outputStateOutHandle->GetDataType() == arm_compute::DataType::QASYMM8)); } BOOST_AUTO_TEST_CASE(CreateQuantizedLstmWorkload) { ClCreateQuantizedLstmWorkloadTest(); } BOOST_AUTO_TEST_SUITE_END()