// // Copyright © 2017 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // #include #include #include #include #include #include BOOST_AUTO_TEST_SUITE(CreateWorkloadNeon) namespace { bool TestNeonTensorHandleInfo(armnn::INeonTensorHandle* handle, const armnn::TensorInfo& expectedInfo) { using namespace armnn::armcomputetensorutils; const arm_compute::ITensorInfo* handleInfo = handle->GetTensor().info(); const arm_compute::TensorInfo expectedAclInfo = BuildArmComputeTensorInfo(expectedInfo); if (handleInfo->data_type() != expectedAclInfo.data_type()) { return false; } if (handleInfo->num_dimensions() != expectedAclInfo.num_dimensions()) { return false; } if (handleInfo->quantization_info() != expectedAclInfo.quantization_info()) { return false; } for (std::size_t d = 0; d < expectedAclInfo.num_dimensions(); ++d) { if (handleInfo->dimension(d) != expectedAclInfo.dimension(d)) { return false; } } return true; } } // namespace template static void NeonCreateActivationWorkloadTest() { Graph graph; NeonWorkloadFactory factory; auto workload = CreateActivationWorkloadTest(factory, graph); // Checks that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest). ActivationQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({1, 1}, DataType))); BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 1}, DataType))); } #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC BOOST_AUTO_TEST_CASE(CreateActivationFloat16Workload) { NeonCreateActivationWorkloadTest(); } #endif BOOST_AUTO_TEST_CASE(CreateActivationFloatWorkload) { NeonCreateActivationWorkloadTest(); } template static void NeonCreateArithmethicWorkloadTest() { Graph graph; NeonWorkloadFactory factory; auto workload = CreateArithmeticWorkloadTest(factory, graph); DescriptorType queueDescriptor = workload->GetData(); auto inputHandle1 = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto inputHandle2 = boost::polymorphic_downcast(queueDescriptor.m_Inputs[1]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); BOOST_TEST(TestNeonTensorHandleInfo(inputHandle1, TensorInfo({2, 3}, DataType))); BOOST_TEST(TestNeonTensorHandleInfo(inputHandle2, TensorInfo({2, 3}, DataType))); BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 3}, DataType))); } #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC BOOST_AUTO_TEST_CASE(CreateAdditionFloat16Workload) { NeonCreateArithmethicWorkloadTest(); } #endif BOOST_AUTO_TEST_CASE(CreateAdditionFloatWorkload) { NeonCreateArithmethicWorkloadTest(); } #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC BOOST_AUTO_TEST_CASE(CreateSubtractionFloat16Workload) { NeonCreateArithmethicWorkloadTest(); } #endif BOOST_AUTO_TEST_CASE(CreateSubtractionFloatWorkload) { NeonCreateArithmethicWorkloadTest(); } #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC BOOST_AUTO_TEST_CASE(CreateMultiplicationFloat16Workload) { NeonCreateArithmethicWorkloadTest(); } #endif BOOST_AUTO_TEST_CASE(CreateMultiplicationFloatWorkload) { NeonCreateArithmethicWorkloadTest(); } template static void NeonCreateBatchNormalizationWorkloadTest(DataLayout dataLayout) { Graph graph; NeonWorkloadFactory factory; auto workload = CreateBatchNormalizationWorkloadTest (factory, graph, dataLayout); // Checks that outputs and inputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest). BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 4, 4} : TensorShape{2, 4, 4, 3}; TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 4, 4} : TensorShape{2, 4, 4, 3}; BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType))); BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType))); } #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloat16NchwWorkload) { NeonCreateBatchNormalizationWorkloadTest(DataLayout::NCHW); } BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloat16NhwcWorkload) { NeonCreateBatchNormalizationWorkloadTest(DataLayout::NHWC); } #endif BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloatNchwWorkload) { NeonCreateBatchNormalizationWorkloadTest(DataLayout::NCHW); } BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloatNhwcWorkload) { NeonCreateBatchNormalizationWorkloadTest(DataLayout::NHWC); } template static void NeonCreateConvolution2dWorkloadTest(DataLayout dataLayout = DataLayout::NCHW) { Graph graph; NeonWorkloadFactory factory; auto workload = CreateConvolution2dWorkloadTest(factory, graph, dataLayout); TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 8, 16} : TensorShape{2, 8, 16, 3}; TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 2, 2, 10} : TensorShape{2, 2, 10, 2}; // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest). Convolution2dQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType))); BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType))); } #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NchwWorkload) { NeonCreateConvolution2dWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NhwcWorkload) { NeonCreateConvolution2dWorkloadTest(DataLayout::NHWC); } #endif BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNchwWorkload) { NeonCreateConvolution2dWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNhwcWorkload) { NeonCreateConvolution2dWorkloadTest(DataLayout::NHWC); } template static void NeonCreateDepthWiseConvolutionWorkloadTest(DataLayout dataLayout) { Graph graph; NeonWorkloadFactory factory; auto workload = CreateDepthwiseConvolution2dWorkloadTest(factory, graph, dataLayout); // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest). DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); std::initializer_list inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list({ 2, 2, 5, 5 }) : std::initializer_list({ 2, 5, 5, 2 }); std::initializer_list outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list({ 2, 2, 5, 5 }) : std::initializer_list({ 2, 5, 5, 2 }); BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType))); BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType))); } BOOST_AUTO_TEST_CASE(CreateDepthWiseConvolution2dFloat32NhwcWorkload) { NeonCreateDepthWiseConvolutionWorkloadTest(DataLayout::NHWC); } #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC BOOST_AUTO_TEST_CASE(CreateDepthWiseConvolution2dFloat16NhwcWorkload) { NeonCreateDepthWiseConvolutionWorkloadTest(DataLayout::NHWC); } #endif template static void NeonCreateFullyConnectedWorkloadTest() { Graph graph; NeonWorkloadFactory factory; auto workload = CreateFullyConnectedWorkloadTest(factory, graph); // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest). FullyConnectedQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({3, 1, 4, 5}, DataType))); BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({3, 7}, DataType))); } #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloat16Workload) { NeonCreateFullyConnectedWorkloadTest(); } #endif BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloatWorkload) { NeonCreateFullyConnectedWorkloadTest(); } template static void NeonCreateNormalizationWorkloadTest(DataLayout dataLayout) { Graph graph; NeonWorkloadFactory factory; auto workload = CreateNormalizationWorkloadTest(factory, graph, dataLayout); // Checks that outputs and inputs are as we expect them (see definition of CreateNormalizationWorkloadTest). NormalizationQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 5, 5, 1} : TensorShape{3, 1, 5, 5}; TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 5, 5, 1} : TensorShape{3, 1, 5, 5}; BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType))); BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType))); } #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16NchwWorkload) { NeonCreateNormalizationWorkloadTest(DataLayout::NCHW); } BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16NhwcWorkload) { NeonCreateNormalizationWorkloadTest(DataLayout::NHWC); } #endif BOOST_AUTO_TEST_CASE(CreateNormalizationFloatNchwWorkload) { NeonCreateNormalizationWorkloadTest(DataLayout::NCHW); } BOOST_AUTO_TEST_CASE(CreateNormalizationFloatNhwcWorkload) { NeonCreateNormalizationWorkloadTest(DataLayout::NHWC); } template static void NeonCreatePooling2dWorkloadTest(DataLayout dataLayout = DataLayout::NCHW) { Graph graph; NeonWorkloadFactory factory; auto workload = CreatePooling2dWorkloadTest (factory, graph, dataLayout); TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 2, 5, 5} : TensorShape{3, 5, 5, 2}; TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 2, 2, 4} : TensorShape{3, 2, 4, 2}; // Checks that outputs and inputs are as we expect them (see definition of CreatePooling2dWorkloadTest). Pooling2dQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType))); BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType))); } #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC BOOST_AUTO_TEST_CASE(CreatePooling2dFloat16Workload) { NeonCreatePooling2dWorkloadTest(); } #endif BOOST_AUTO_TEST_CASE(CreatePooling2dFloatNchwWorkload) { NeonCreatePooling2dWorkloadTest(DataLayout::NCHW); } BOOST_AUTO_TEST_CASE(CreatePooling2dFloatNhwcWorkload) { NeonCreatePooling2dWorkloadTest(DataLayout::NHWC); } BOOST_AUTO_TEST_CASE(CreatePooling2dUint8NchwWorkload) { NeonCreatePooling2dWorkloadTest(DataLayout::NCHW); } BOOST_AUTO_TEST_CASE(CreatePooling2dUint8NhwcWorkload) { NeonCreatePooling2dWorkloadTest(DataLayout::NHWC); } template static void NeonCreateReshapeWorkloadTest() { Graph graph; NeonWorkloadFactory factory; auto workload = CreateReshapeWorkloadTest(factory, graph); // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest). ReshapeQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({4, 1}, DataType))); BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 4}, DataType))); } #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC BOOST_AUTO_TEST_CASE(CreateReshapeFloat16Workload) { NeonCreateReshapeWorkloadTest(); } #endif BOOST_AUTO_TEST_CASE(CreateReshapeFloatWorkload) { NeonCreateReshapeWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateReshapeUint8Workload) { NeonCreateReshapeWorkloadTest(); } template static void NeonCreateSoftmaxWorkloadTest() { Graph graph; NeonWorkloadFactory factory; auto workload = CreateSoftmaxWorkloadTest(factory, graph); // Checks that outputs and inputs are as we expect them (see definition of CreateSoftmaxWorkloadTest). SoftmaxQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({4, 1}, DataType))); BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({4, 1}, DataType))); } #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC BOOST_AUTO_TEST_CASE(CreateSoftmaxFloat16Workload) { NeonCreateSoftmaxWorkloadTest(); } #endif BOOST_AUTO_TEST_CASE(CreateSoftmaxFloatWorkload) { NeonCreateSoftmaxWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateSplitterWorkload) { Graph graph; NeonWorkloadFactory factory; auto workload = CreateSplitterWorkloadTest(factory, graph); // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest). SplitterQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({5, 7, 7}, DataType::Float32))); auto outputHandle0 = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); BOOST_TEST(TestNeonTensorHandleInfo(outputHandle0, TensorInfo({1, 7, 7}, DataType::Float32))); auto outputHandle1 = boost::polymorphic_downcast(queueDescriptor.m_Outputs[1]); BOOST_TEST(TestNeonTensorHandleInfo(outputHandle1, TensorInfo({2, 7, 7}, DataType::Float32))); auto outputHandle2 = boost::polymorphic_downcast(queueDescriptor.m_Outputs[2]); BOOST_TEST(TestNeonTensorHandleInfo(outputHandle2, TensorInfo({2, 7, 7}, DataType::Float32))); } BOOST_AUTO_TEST_CASE(CreateSplitterMerger) { // Tests that it is possible to decide which output of the splitter layer // should be lined to which input of the merger layer. // We tested that is is possible to specify 0th output // of the splitter to be the 1st input to the merger, and the 1st output of the splitter to be 0th input // of the merger. Graph graph; NeonWorkloadFactory factory; auto workloads = CreateSplitterMergerWorkloadTest(factory, graph); auto wlSplitter = std::move(workloads.first); auto wlMerger = std::move(workloads.second); //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction. armnn::INeonTensorHandle* sOut0 = dynamic_cast(wlSplitter->GetData().m_Outputs[0]); armnn::INeonTensorHandle* sOut1 = dynamic_cast(wlSplitter->GetData().m_Outputs[1]); armnn::INeonTensorHandle* mIn0 = dynamic_cast(wlMerger->GetData().m_Inputs[0]); armnn::INeonTensorHandle* mIn1 = dynamic_cast(wlMerger->GetData().m_Inputs[1]); BOOST_TEST(sOut0); BOOST_TEST(sOut1); BOOST_TEST(mIn0); BOOST_TEST(mIn1); bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0); BOOST_TEST(validDataPointers); } BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputs) { // Tests that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer. // We created a splitter with two outputs. That each of those outputs is used by two different activation layers Graph graph; NeonWorkloadFactory factory; std::unique_ptr wlSplitter; std::unique_ptr wlActiv0_0; std::unique_ptr wlActiv0_1; std::unique_ptr wlActiv1_0; std::unique_ptr wlActiv1_1; CreateSplitterMultipleInputsOneOutputWorkloadTest(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1, wlActiv1_0, wlActiv1_1); armnn::INeonTensorHandle* sOut0 = dynamic_cast(wlSplitter->GetData().m_Outputs[0]); armnn::INeonTensorHandle* sOut1 = dynamic_cast(wlSplitter->GetData().m_Outputs[1]); armnn::INeonTensorHandle* activ0_0Im = dynamic_cast(wlActiv0_0->GetData().m_Inputs[0]); armnn::INeonTensorHandle* activ0_1Im = dynamic_cast(wlActiv0_1->GetData().m_Inputs[0]); armnn::INeonTensorHandle* activ1_0Im = dynamic_cast(wlActiv1_0->GetData().m_Inputs[0]); armnn::INeonTensorHandle* activ1_1Im = dynamic_cast(wlActiv1_1->GetData().m_Inputs[0]); BOOST_TEST(sOut0); BOOST_TEST(sOut1); BOOST_TEST(activ0_0Im); BOOST_TEST(activ0_1Im); BOOST_TEST(activ1_0Im); BOOST_TEST(activ1_1Im); bool validDataPointers = (sOut0 == activ0_0Im) && (sOut0 == activ0_1Im) && (sOut1 == activ1_0Im) && (sOut1 == activ1_1Im); BOOST_TEST(validDataPointers); } BOOST_AUTO_TEST_CASE(CreateMemCopyWorkloadsNeon) { NeonWorkloadFactory factory; CreateMemCopyWorkloads(factory); } template static void NeonCreateL2NormalizationWorkloadTest(DataLayout dataLayout) { Graph graph; NeonWorkloadFactory factory; auto workload = CreateL2NormalizationWorkloadTest(factory, graph, dataLayout); // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest). L2NormalizationQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{ 5, 20, 50, 67 } : TensorShape{ 5, 50, 67, 20 }; TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{ 5, 20, 50, 67 } : TensorShape{ 5, 50, 67, 20 }; BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType))); BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType))); } #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat16NchwWorkload) { NeonCreateL2NormalizationWorkloadTest(DataLayout::NCHW); } BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat16NhwcWorkload) { NeonCreateL2NormalizationWorkloadTest(DataLayout::NHWC); } #endif BOOST_AUTO_TEST_CASE(CreateL2NormalizationNchwWorkload) { NeonCreateL2NormalizationWorkloadTest(DataLayout::NCHW); } BOOST_AUTO_TEST_CASE(CreateL2NormalizationNhwcWorkload) { NeonCreateL2NormalizationWorkloadTest(DataLayout::NHWC); } BOOST_AUTO_TEST_SUITE_END()