diff options
Diffstat (limited to 'src/armnn/backends/test')
-rw-r--r-- | src/armnn/backends/test/ArmComputeCl.cpp | 3 | ||||
-rw-r--r-- | src/armnn/backends/test/ArmComputeNeon.cpp | 9 | ||||
-rw-r--r-- | src/armnn/backends/test/Conv2dTestImpl.hpp | 129 | ||||
-rw-r--r-- | src/armnn/backends/test/CreateWorkloadCl.cpp | 27 | ||||
-rw-r--r-- | src/armnn/backends/test/CreateWorkloadNeon.cpp | 11 | ||||
-rw-r--r-- | src/armnn/backends/test/CreateWorkloadRef.cpp | 11 | ||||
-rw-r--r-- | src/armnn/backends/test/FullyConnectedTestImpl.hpp | 7 | ||||
-rw-r--r-- | src/armnn/backends/test/IsLayerSupportedTest.cpp | 3 | ||||
-rw-r--r-- | src/armnn/backends/test/LayerTests.cpp | 777 | ||||
-rw-r--r-- | src/armnn/backends/test/LayerTests.hpp | 3 | ||||
-rw-r--r-- | src/armnn/backends/test/MemCopyTests.cpp | 2 | ||||
-rw-r--r-- | src/armnn/backends/test/NormTestImpl.hpp | 3 | ||||
-rw-r--r-- | src/armnn/backends/test/Reference.cpp | 3 | ||||
-rw-r--r-- | src/armnn/backends/test/SoftmaxTestImpl.hpp | 5 | ||||
-rw-r--r-- | src/armnn/backends/test/SplitterTestImpl.hpp | 187 |
15 files changed, 776 insertions, 404 deletions
diff --git a/src/armnn/backends/test/ArmComputeCl.cpp b/src/armnn/backends/test/ArmComputeCl.cpp index c45a82db63..ae42d03ee3 100644 --- a/src/armnn/backends/test/ArmComputeCl.cpp +++ b/src/armnn/backends/test/ArmComputeCl.cpp @@ -62,6 +62,9 @@ ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1, DepthwiseConvoluti ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, true) ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, false) +ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dAsymmetric, DepthwiseConvolution2dAsymmetricTest, true) +ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dAsymmetric, DepthwiseConvolution2dAsymmetricTest, false) + // Splitter BOOST_AUTO_TEST_CASE(SimpleSplitter) { diff --git a/src/armnn/backends/test/ArmComputeNeon.cpp b/src/armnn/backends/test/ArmComputeNeon.cpp index a81b7cdcd7..0a78b75e2e 100644 --- a/src/armnn/backends/test/ArmComputeNeon.cpp +++ b/src/armnn/backends/test/ArmComputeNeon.cpp @@ -88,6 +88,9 @@ ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1, DepthwiseConvoluti ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, true) ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, false) +ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dAsymmetric, DepthwiseConvolution2dAsymmetricTest, true) +ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dAsymmetric, DepthwiseConvolution2dAsymmetricTest, false) + namespace { @@ -134,6 +137,10 @@ BOOST_AUTO_TEST_CASE(DepthwiseConv2dUtils) // Supported shape 2x2 armnn::TensorInfo weightsInfo2x2({ 1, 1, 2, 2 }, armnn::DataType::Float32); BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(1, 1), weightsInfo2x2)); + + // Asymmetric padding + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(1, 1, 1, 1, 2, 1, 2), + weightsInfo3x3)); } // Pooling @@ -235,6 +242,8 @@ ARMNN_AUTO_TEST_CASE(AddBroadcast1Element, AdditionBroadcast1ElementTest) // Mul ARMNN_AUTO_TEST_CASE(SimpleMultiplication, MultiplicationTest) +ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1Element, MultiplicationBroadcast1ElementTest) +ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1DVector, MultiplicationBroadcast1DVectorTest) // Batch Norm ARMNN_AUTO_TEST_CASE(BatchNorm, BatchNormTest) diff --git a/src/armnn/backends/test/Conv2dTestImpl.hpp b/src/armnn/backends/test/Conv2dTestImpl.hpp index 0c0511b234..0c34beaa33 100644 --- a/src/armnn/backends/test/Conv2dTestImpl.hpp +++ b/src/armnn/backends/test/Conv2dTestImpl.hpp @@ -60,8 +60,6 @@ void ApplyBias(std::vector<T>& v, float vScale, int32_t vOffset, } } - - template<typename T, typename B> LayerTestResult<T, 4> SimpleConvolution2dTestImpl(armnn::IWorkloadFactory& workloadFactory, const boost::multi_array<T, 4>& input, @@ -87,6 +85,8 @@ LayerTestResult<T, 4> SimpleConvolution2dTestImpl(armnn::IWorkloadFactory& workl unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[2]); unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[3]); + unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[1]); + unsigned int kernelDepthMul = boost::numeric_cast<unsigned int>(kernel.shape()[0]); bool biasEnabled = bias.size() > 0; @@ -102,7 +102,7 @@ LayerTestResult<T, 4> SimpleConvolution2dTestImpl(armnn::IWorkloadFactory& workl armnn::TensorInfo inputTensorInfo({2*inputNum, inputChannels, inputHeight, inputWidth}, armnn::GetDataType<T>()); armnn::TensorInfo outputTensorInfo({2*outputNum, outputChannels, outputHeight, outputWidth}, armnn::GetDataType<T>()); - armnn::TensorInfo kernelDesc({outputChannels, inputChannels, kernelHeight, kernelWidth}, armnn::GetDataType<T>()); + armnn::TensorInfo kernelDesc({kernelDepthMul, kernelChannels, kernelHeight, kernelWidth}, armnn::GetDataType<T>()); armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, armnn::GetDataType<B>()); // Set quantization parameters if the requested type is a quantized type. @@ -186,6 +186,120 @@ LayerTestResult<T, 4> SimpleConvolution2dTestImpl(armnn::IWorkloadFactory& workl CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]); + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} + +template<typename T, typename B> +LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestImpl(armnn::IWorkloadFactory& workloadFactory, + const boost::multi_array<T, 4>& input, + const boost::multi_array<T, 4>& kernel, + const boost::multi_array<B, 1>& bias, + const boost::multi_array<T, 4>& outputExpected, + float qScale, + int32_t qOffset, + uint32_t padLeft = 0, + uint32_t padTop = 0, + uint32_t padRight = 0, + uint32_t padBottom = 0, + uint32_t strideX = 1, + uint32_t strideY = 1) +{ + unsigned int inputNum = boost::numeric_cast<unsigned int>(input.shape()[0]); + unsigned int inputChannels = boost::numeric_cast<unsigned int>(input.shape()[1]); + unsigned int inputHeight = boost::numeric_cast<unsigned int>(input.shape()[2]); + unsigned int inputWidth = boost::numeric_cast<unsigned int>(input.shape()[3]); + unsigned int kernelChanMul = boost::numeric_cast<unsigned int>(kernel.shape()[0]); + unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[1]); + unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[2]); + unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[3]); + unsigned int outputNum = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]); + unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]); + unsigned int outputHeight = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]); + unsigned int outputWidth = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]); + + // If a bias is used, its size must equal the number of output channels + bool biasEnabled = bias.size() > 0; + BOOST_ASSERT(!biasEnabled || bias.size() == outputChannels); + + // create the tensors + armnn::TensorInfo inputTensorInfo({inputNum, inputChannels, inputHeight, inputWidth}, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo({outputNum, outputChannels, outputHeight, outputWidth}, + armnn::GetDataType<T>()); + armnn::TensorInfo kernelDesc({kernelChanMul, kernelChannels, kernelHeight, kernelWidth}, armnn::GetDataType<T>()); + armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, armnn::GetDataType<B>()); + + // Set quantization parameters if the requested type is a quantized type. + if (armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + kernelDesc.SetQuantizationScale(qScale); + kernelDesc.SetQuantizationOffset(qOffset); + biasDesc.SetQuantizationScale(qScale*qScale); + biasDesc.SetQuantizationOffset(0); + } + + // Construct the input data + std::vector<T> inputData; + inputData.assign(input.data(), input.data() + inputChannels*inputHeight*inputWidth); + auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData); + + // Construct the output data, with bias applied, as appropriate + std::vector<T> outputData; + outputData.assign(outputExpected.data(), outputExpected.data() + outputChannels*outputHeight*outputWidth); + if (biasEnabled) + { + std::vector<T> biasV; + biasV.assign(bias.data(), bias.data() + outputChannels); + ApplyBias(outputData, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), + biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(), + outputWidth, outputHeight); + } + + LayerTestResult<T, 4> ret(outputTensorInfo); + ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc); + AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]); + + armnn::ScopedCpuTensorHandle biasTensor(biasDesc); + if (biasEnabled) + { + AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]); + } + + armnn::DepthwiseConvolution2dQueueDescriptor data; + data.m_Weight = &weightsTensor; + data.m_Bias = &biasTensor; // still set this whether or not bias is enabled - can be a source of bugs + data.m_Parameters.m_StrideX = strideX; + data.m_Parameters.m_StrideY = strideY; + data.m_Parameters.m_PadLeft = padLeft; + data.m_Parameters.m_PadRight = padRight; + data.m_Parameters.m_PadTop = padTop; + data.m_Parameters.m_PadBottom = padBottom; + data.m_Parameters.m_BiasEnabled = biasEnabled; + + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info); + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]); + + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); @@ -306,6 +420,7 @@ LayerTestResult<T, 4> DepthwiseConvolution2dDepthMul1TestImpl(armnn::IWorkloadFa CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); @@ -478,6 +593,7 @@ LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(armnn::IWorkloadFactory& wo CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); @@ -485,8 +601,6 @@ LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(armnn::IWorkloadFactory& wo return ret; } - - template<typename T> LayerTestResult<T,4> Convolution1dTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale, @@ -595,6 +709,7 @@ LayerTestResult<T,4> Convolution1dTestImpl(armnn::IWorkloadFactory& workloadFact CopyDataToITensorHandle(inputHandle.get(), inputData.data()); + workloadFactory.Finalize(); workload->Execute(); // output @@ -692,7 +807,9 @@ LayerTestResult<T,4> CompareConvolution2dTestImpl(armnn::IWorkloadFactory& workl CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); + refWorkloadFactory.Finalize(); workloadRef->Execute(); CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); @@ -792,7 +909,9 @@ LayerTestResult<T, 4> CompareDepthwiseConvolution2dTestImpl(armnn::IWorkloadFact CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); + refWorkloadFactory.Finalize(); workloadRef->Execute(); CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); diff --git a/src/armnn/backends/test/CreateWorkloadCl.cpp b/src/armnn/backends/test/CreateWorkloadCl.cpp index 3f320d80e9..f83bb12bbe 100644 --- a/src/armnn/backends/test/CreateWorkloadCl.cpp +++ b/src/armnn/backends/test/CreateWorkloadCl.cpp @@ -23,7 +23,6 @@ BOOST_AUTO_TEST_CASE(CreateActivationWorkload) { Graph graph; ClWorkloadFactory factory; - factory.LoadOpenClRuntime(); auto workload = CreateActivationWorkloadTest<ClActivationFloat32Workload>(factory, graph); @@ -40,7 +39,6 @@ BOOST_AUTO_TEST_CASE(CreateAdditionWorkload) { Graph graph; ClWorkloadFactory factory; - factory.LoadOpenClRuntime(); auto workload = CreateAdditionWorkloadTest<ClAdditionFloat32Workload>(factory, graph); @@ -58,7 +56,6 @@ BOOST_AUTO_TEST_CASE(CreateBatchNormalizationWorkload) { Graph graph; ClWorkloadFactory factory; - factory.LoadOpenClRuntime(); auto workload = CreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloat32Workload>(factory, graph); @@ -136,7 +133,6 @@ BOOST_AUTO_TEST_CASE(CreateMultiplicationWorkload) { Graph graph; ClWorkloadFactory factory; - factory.LoadOpenClRuntime(); auto workload = CreateMultiplicationWorkloadTest<ClMultiplicationFloat32Workload>(factory, graph); @@ -155,7 +151,6 @@ BOOST_AUTO_TEST_CASE(CreateNormalizationWorkload) { Graph graph; ClWorkloadFactory factory; - factory.LoadOpenClRuntime(); auto workload = CreateNormalizationWorkloadTest<ClNormalizationFloat32Workload>(factory, graph); @@ -172,7 +167,6 @@ BOOST_AUTO_TEST_CASE(CreatePooling2dWorkload) { Graph graph; ClWorkloadFactory factory; - factory.LoadOpenClRuntime(); auto workload = CreatePooling2dWorkloadTest<ClPooling2dFloat32Workload>(factory, graph); @@ -190,7 +184,6 @@ static void ClCreateReshapeWorkloadTest() { Graph graph; ClWorkloadFactory factory; - factory.LoadOpenClRuntime(); auto workload = CreateReshapeWorkloadTest<ReshapeWorkloadType>(factory, graph); @@ -217,7 +210,6 @@ BOOST_AUTO_TEST_CASE(CreateSoftmaxWorkload) { Graph graph; ClWorkloadFactory factory; - factory.LoadOpenClRuntime(); auto workload = CreateSoftmaxWorkloadTest<ClSoftmaxFloat32Workload>(factory, graph); @@ -234,20 +226,24 @@ BOOST_AUTO_TEST_CASE(CreateSplitterWorkload) { Graph graph; ClWorkloadFactory factory; - factory.LoadOpenClRuntime(); auto workload = CreateSplitterWorkloadTest<ClSplitterFloat32Workload>(factory, graph); // check that outputs are as we expect them (see definition of CreateSplitterWorkloadTest) SplitterQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); - BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {7})); - auto outputHandle0 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle0, {4})); + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {5, 7, 7})); + auto outputHandle1 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle1, {1})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle1, {2, 7, 7})); + auto outputHandle2 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[2]); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle2, {2})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle2, {2, 7, 7})); + + auto outputHandle0 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); + // NOTE: At the moment the CL collapses the tensor to a 2 dim when dimension zero = 1 + // we are raising this difference between the NEON and CL libs as an issue with the compute library team + BOOST_TEST(CompareIClTensorHandleShape(outputHandle0, {7, 7})); } BOOST_AUTO_TEST_CASE(CreateSplitterMerger) @@ -260,7 +256,6 @@ BOOST_AUTO_TEST_CASE(CreateSplitterMerger) Graph graph; ClWorkloadFactory factory; - factory.LoadOpenClRuntime(); auto workloads = CreateSplitterMergerWorkloadTest<ClSplitterFloat32Workload, ClMergerFloat32Workload>(factory, graph); @@ -332,7 +327,6 @@ BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputs) BOOST_AUTO_TEST_CASE(CreateMemCopyWorkloadsCl) { ClWorkloadFactory factory; - factory.LoadOpenClRuntime(); CreateMemCopyWorkloads<CopyFromCpuToClWorkload,CopyFromClToCpuWorkload,IClTensorHandle>(factory); } @@ -340,7 +334,6 @@ BOOST_AUTO_TEST_CASE(CreateL2NormalizationWorkload) { Graph graph; ClWorkloadFactory factory; - factory.LoadOpenClRuntime(); auto workload = CreateL2NormalizationWorkloadTest<ClL2NormalizationFloat32Workload>(factory, graph); diff --git a/src/armnn/backends/test/CreateWorkloadNeon.cpp b/src/armnn/backends/test/CreateWorkloadNeon.cpp index 807937ba2b..4d91fbfd31 100644 --- a/src/armnn/backends/test/CreateWorkloadNeon.cpp +++ b/src/armnn/backends/test/CreateWorkloadNeon.cpp @@ -214,13 +214,16 @@ BOOST_AUTO_TEST_CASE(CreateSplitterWorkload) // check that outputs are as we expect them (see definition of CreateSplitterWorkloadTest) SplitterQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]); - BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({1, 7}, DataType::Float32))); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({5, 7, 7}, DataType::Float32))); + auto outputHandle0 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]); - BOOST_TEST(TestNeonTensorHandleInfo(outputHandle0, TensorInfo({1, 4}, DataType::Float32))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle0, TensorInfo({1, 7, 7}, DataType::Float32))); + auto outputHandle1 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[1]); - BOOST_TEST(TestNeonTensorHandleInfo(outputHandle1, TensorInfo({1, 1}, DataType::Float32))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle1, TensorInfo({2, 7, 7}, DataType::Float32))); + auto outputHandle2 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[2]); - BOOST_TEST(TestNeonTensorHandleInfo(outputHandle2, TensorInfo({1, 2}, DataType::Float32))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle2, TensorInfo({2, 7, 7}, DataType::Float32))); } BOOST_AUTO_TEST_CASE(CreateSplitterMerger) diff --git a/src/armnn/backends/test/CreateWorkloadRef.cpp b/src/armnn/backends/test/CreateWorkloadRef.cpp index e0eacebe1a..abc46e4361 100644 --- a/src/armnn/backends/test/CreateWorkloadRef.cpp +++ b/src/armnn/backends/test/CreateWorkloadRef.cpp @@ -241,13 +241,16 @@ static void RefCreateSplitterWorkloadTest() // check that outputs are as we expect them (see definition of CreateSplitterWorkloadTest) SplitterQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast<ConstCpuTensorHandle*>(queueDescriptor.m_Inputs[0]); - BOOST_TEST((inputHandle->GetTensorInfo() == TensorInfo({ 1, 7 }, SplitterWorkloadType::ms_DataType))); + BOOST_TEST((inputHandle->GetTensorInfo() == TensorInfo({ 5, 7, 7 }, SplitterWorkloadType::ms_DataType))); + auto outputHandle0 = boost::polymorphic_downcast<CpuTensorHandle*>(queueDescriptor.m_Outputs[0]); - BOOST_TEST((outputHandle0->GetTensorInfo() == TensorInfo({ 1, 4 }, SplitterWorkloadType::ms_DataType))); + BOOST_TEST((outputHandle0->GetTensorInfo() == TensorInfo({ 1, 7, 7 }, SplitterWorkloadType::ms_DataType))); + auto outputHandle1 = boost::polymorphic_downcast<CpuTensorHandle*>(queueDescriptor.m_Outputs[1]); - BOOST_TEST((outputHandle1->GetTensorInfo() == TensorInfo({ 1, 1 }, SplitterWorkloadType::ms_DataType))); + BOOST_TEST((outputHandle1->GetTensorInfo() == TensorInfo({ 2, 7, 7 }, SplitterWorkloadType::ms_DataType))); + auto outputHandle2 = boost::polymorphic_downcast<CpuTensorHandle*>(queueDescriptor.m_Outputs[2]); - BOOST_TEST((outputHandle2->GetTensorInfo() == TensorInfo({ 1, 2 }, SplitterWorkloadType::ms_DataType))); + BOOST_TEST((outputHandle2->GetTensorInfo() == TensorInfo({ 2, 7, 7 }, SplitterWorkloadType::ms_DataType))); } BOOST_AUTO_TEST_CASE(CreateSplitterFloat32Workload) diff --git a/src/armnn/backends/test/FullyConnectedTestImpl.hpp b/src/armnn/backends/test/FullyConnectedTestImpl.hpp index 479da3fabc..d2379ec10e 100644 --- a/src/armnn/backends/test/FullyConnectedTestImpl.hpp +++ b/src/armnn/backends/test/FullyConnectedTestImpl.hpp @@ -10,9 +10,9 @@ LayerTestResult<T, 2> SimpleFullyConnectedTestImpl( armnn::TensorInfo outputTensorInfo, armnn::TensorInfo weightsDesc, armnn::TensorInfo biasesDesc, - boost::multi_array<T, 2> weights, - boost::multi_array<B, 1> bias, - boost::multi_array<T, 4> input, + boost::multi_array<T, 2>& weights, + boost::multi_array<B, 1>& bias, + boost::multi_array<T, 4>& input, bool biasEnabled, bool transposeWeights) { @@ -41,6 +41,7 @@ LayerTestResult<T, 2> SimpleFullyConnectedTestImpl( outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&result.output[0][0], outputHandle.get()); diff --git a/src/armnn/backends/test/IsLayerSupportedTest.cpp b/src/armnn/backends/test/IsLayerSupportedTest.cpp index 4b4c9f6099..af7ba923ec 100644 --- a/src/armnn/backends/test/IsLayerSupportedTest.cpp +++ b/src/armnn/backends/test/IsLayerSupportedTest.cpp @@ -9,7 +9,6 @@ #include "backends/CpuTensorHandle.hpp" #include "backends/RefWorkloadFactory.hpp" -#include <Layers.hpp> #include <string> #include <iostream> @@ -67,4 +66,4 @@ BOOST_AUTO_TEST_CASE(IsLayerSupportedUint8Cl) } #endif //#ifdef ARMCOMPUTECL_ENABLED -BOOST_AUTO_TEST_SUITE_END()
\ No newline at end of file +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/backends/test/LayerTests.cpp b/src/armnn/backends/test/LayerTests.cpp index 9eed2dbf78..a10e4bd7a0 100644 --- a/src/armnn/backends/test/LayerTests.cpp +++ b/src/armnn/backends/test/LayerTests.cpp @@ -6,8 +6,10 @@ #include "test/TensorHelpers.hpp" #include "TensorCopyUtils.hpp" +#include "Permute.hpp" #include <boost/test/unit_test.hpp> +#include <boost/assert.hpp> #include "armnn/LayerSupport.hpp" @@ -342,11 +344,11 @@ LayerTestResult<T, 4> SimpleConvolution2dAsymmetricPaddingTestCommon(armnn::IWor std::vector<T> myVec(outputDesc.GetNumElements(), 0); boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>( QuantizedVector<T>(qScale, qOffset, { - -4723, -7044, -9324, -6253, -3542, -7140, -10580, -13940, -9300, -5230, -9590, -14120, -18520, -12290, -6860, -9980, -14560, -18960, -12560, -7000, -7518, -10904, -14144, -9318, -5152, + -5032, -7256, -9376, -6142, -3368, }))); return SimpleConvolution2dTestImpl<T>(workloadFactory, @@ -357,9 +359,79 @@ LayerTestResult<T, 4> SimpleConvolution2dAsymmetricPaddingTestCommon(armnn::IWor qScale, qOffset, 1, // padding left - 2, // padding top + 1, // padding top 2, // padding right - 1); // padding bottom + 2); // padding bottom +} + +template<typename T> +LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale, + int32_t qOffset, + bool biasEnabled) +{ + // Use a single-batch 2-channel 5x5 image as input + armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5 }, armnn::GetDataType<T>()); + auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>( + QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(), { + 0, 1, 2, 3, 4, + 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, + + 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, + 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, + 45, 46, 47, 48, 49 + }))); + + // Use a depth multiplier of 1 on a 2-channel 4x4 kernel + armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, armnn::GetDataType<T>()); + auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>( + QuantizedVector<T>(kernelTensorInfo.GetQuantizationScale(), kernelTensorInfo.GetQuantizationOffset(), { + 32, 31, 30, 29, + 28, 27, 26, 25, + 24, 23, 22, 21, + 20, 19, 18, 17, + + 16, 15, 14, 13, + 12, 11, 10, 9, + 8, 7, 6, 5, + 4, 3, 2, 1 + }))); + + // Expected output is 1 batch of a 2-channel 5x5 image + // calculated using the python tensorflow library with strideX=1, strideY=1 + armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5 }, armnn::GetDataType<T>()); + boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>( + QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), { + 1062, 1580, 1850, 1530, 1117, + 2140, 3108, 3500, 2842, 2042, + 3580, 5068, 5460, 4342, 3062, + 3618, 5072, 5390, 4248, 2971, + 3074, 4282, 4510, 3533, 2457, + 1550, 2284, 2362, 1955, 1428, + 2910, 4206, 4342, 3528, 2536, + 3390, 4886, 5022, 4068, 2916, + 3566, 5056, 5182, 4133, 2922, + 3100, 4352, 4452, 3517, 2465 + }))); + + return DepthwiseConvolution2dAsymmetricTestImpl<T>(workloadFactory, + input, + kernel, + GetBias2<typename FullyConnectedBiasTypeForInputType<T>::Type>(biasEnabled, qScale, qOffset), + expectedOutput, + qScale, + qOffset, + 1, // padding left + 1, // padding top + 2, // padding right + 2, // padding bottom + 1, // strideX + 1); // strideY } LayerTestResult<float, 4> @@ -385,6 +457,12 @@ LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul1Test(armnn::IWorkloadFa return DepthwiseConvolution2dDepthMul1TestImpl<float, float>(workloadFactory, 0.0f, 0, biasEnabled); } +LayerTestResult<float, 4> DepthwiseConvolution2dAsymmetricTest(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled) +{ + return DepthwiseConvolution2dAsymmetricTestCommon<float>(workloadFactory, 0.0f, 0, biasEnabled); +} + LayerTestResult<uint8_t, 4> DepthwiseConvolution2dUint8Test(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled) { @@ -493,138 +571,85 @@ LayerTestResult<uint8_t, 3> CopyViaSplitterUint8Test(armnn::IWorkloadFactory& wo LayerTestResult<float,3> MergerTest(armnn::IWorkloadFactory& workloadFactory) { - unsigned int outputWidth = 5; + unsigned int outputWidth = 3; unsigned int outputHeight = 6; unsigned int outputChannels = 3; - unsigned int inputWidth1 = 2; - unsigned int inputHeight1 = 2; - unsigned int inputChannels1 = 3; - - unsigned int inputWidth2 = 2; - unsigned int inputHeight2 = 4; - unsigned int inputChannels2 = 3; - - unsigned int inputWidth3 = 3; - unsigned int inputHeight3 = 6; - unsigned int inputChannels3 = 2; + unsigned int inputWidth1 = 3; + unsigned int inputHeight1 = 6; + unsigned int inputChannels1 = 2; - unsigned int inputWidth4 = 3; - unsigned int inputHeight4 = 6; - unsigned int inputChannels4 = 1; + unsigned int inputWidth2 = 3; + unsigned int inputHeight2 = 6; + unsigned int inputChannels2 = 1; // Define the tensor descriptors armnn::TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, armnn::DataType::Float32); armnn::TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, armnn::DataType::Float32); armnn::TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, armnn::DataType::Float32); - armnn::TensorInfo inputTensorInfo3({ inputChannels3, inputHeight3, inputWidth3 }, armnn::DataType::Float32); - armnn::TensorInfo inputTensorInfo4({ inputChannels4, inputHeight4, inputWidth4 }, armnn::DataType::Float32); LayerTestResult<float,3> ret(outputTensorInfo); - ret.outputExpected = MakeTensor<float, 3>(outputTensorInfo, std::vector<float>( - { - 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, - 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, - 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, - 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, - 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, - 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, - - 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, - 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, - 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, - 46.0f, 47.0f, 48.0f, 49.0f, 50.0f, - 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, - 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, - - 61.0f, 62.0f, 63.0f, 64.0f, 65.0f, - 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, - 71.0f, 72.0f, 73.0f, 74.0f, 75.0f, - 76.0f, 77.0f, 78.0f, 79.0f, 80.0f, - 81.0f, 82.0f, 83.0f, 84.0f, 85.0f, - 86.0f, 87.0f, 88.0f, 89.0f, 90.0f, + { + 1.0f, 2.0f, 3.0f, + 4.0f, 5.0f, 6.0f, + 7.0f, 8.0f, 9.0f, + 10.0f, 11.0f, 12.0f, + 13.0f, 14.0f, 15.0f, + 16.0f, 17.0f, 18.0f, + + 19.0f, 20.0f, 21.0f, + 22.0f, 23.0f, 24.0f, + 25.0f, 26.0f, 27.0f, + 28.0f, 29.0f, 30.0f, + 31.0f, 32.0f, 33.0f, + 34.0f, 35.0f, 36.0f, + 37.0f, 38.0f, 39.0f, + 40.0f, 41.0f, 42.0f, + 43.0f, 44.0f, 45.0f, + 46.0f, 47.0f, 48.0f, + 49.0f, 50.0f, 51.0f, + 52.0f, 53.0f, 54.0f, }) ); - auto input1 = MakeTensor<float, 3>(inputTensorInfo1, std::vector<float>( { - 1.0f, 2.0f, - 6.0f, 7.0f, - - 31.0f, 32.0f, - 36.0f, 37.0f, + 1.0f, 2.0f, 3.0f, + 4.0f, 5.0f, 6.0f, + 7.0f, 8.0f, 9.0f, + 10.0f, 11.0f, 12.0f, + 13.0f, 14.0f, 15.0f, + 16.0f, 17.0f, 18.0f, - 61.0f, 62.0f, - 66.0f, 67.0f, + 19.0f, 20.0f, 21.0f, + 22.0f, 23.0f, 24.0f, + 25.0f, 26.0f, 27.0f, + 28.0f, 29.0f, 30.0f, + 31.0f, 32.0f, 33.0f, + 34.0f, 35.0f, 36.0f, }) ); auto input2 = MakeTensor<float, 3>(inputTensorInfo2, std::vector<float>( { - 11.0f, 12.0f, - 16.0f, 17.0f, - 21.0f, 22.0f, - 26.0f, 27.0f, - - 41.0f, 42.0f, - 46.0f, 47.0f, - 51.0f, 52.0f, - 56.0f, 57.0f, - - 71.0f, 72.0f, - 76.0f, 77.0f, - 81.0f, 82.0f, - 86.0f, 87.0f, - }) - ); - - auto input3 = MakeTensor<float, 3>(inputTensorInfo3, std::vector<float>( - { - 3.0f, 4.0f, 5.0f, - 8.0f, 9.0f, 10.0f, - 13.0f, 14.0f, 15.0f, - 18.0f, 19.0f, 20.0f, - 23.0f, 24.0f, 25.0f, - 28.0f, 29.0f, 30.0f, - - 33.0f, 34.0f, 35.0f, - 38.0f, 39.0f, 40.0f, + 37.0f, 38.0f, 39.0f, + 40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, - 48.0f, 49.0f, 50.0f, - 53.0f, 54.0f, 55.0f, - 58.0f, 59.0f, 60.0f, - }) - ); - - - auto input4 = MakeTensor<float, 3>(inputTensorInfo4, std::vector<float>( - { - 63.0f, 64.0f, 65.0f, - 68.0f, 69.0f, 70.0f, - 73.0f, 74.0f, 75.0f, - 78.0f, 79.0f, 80.0f, - 83.0f, 84.0f, 85.0f, - 88.0f, 89.0f, 90.0f, + 46.0f, 47.0f, 48.0f, + 49.0f, 50.0f, 51.0f, + 52.0f, 53.0f, 54.0f, }) ); std::vector<unsigned int> wOrigin1 = {0, 0, 0}; //extent of the window is defined by size of input[0] armnn::MergerQueueDescriptor::ViewOrigin window1(wOrigin1); - std::vector<unsigned int> wOrigin2 = {0, 2, 0}; //extent of the window is defined by size of input[1] + std::vector<unsigned int> wOrigin2 = {2, 0, 0}; //extent of the window is defined by size of input[1] armnn::MergerQueueDescriptor::ViewOrigin window2(wOrigin2); - std::vector<unsigned int> wOrigin3 = {0, 0, 2}; //extent of the window is defined by size of input[2] - armnn::MergerQueueDescriptor::ViewOrigin window3(wOrigin3); - - std::vector<unsigned int> wOrigin4 = {2, 0, 2}; //extent of the window is defined by size of input[3] - armnn::MergerQueueDescriptor::ViewOrigin window4(wOrigin4); - - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); bool subTensorsSupported = workloadFactory.SupportsSubTensors(); @@ -639,43 +664,25 @@ LayerTestResult<float,3> MergerTest(armnn::IWorkloadFactory& workloadFactory) workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) : workloadFactory.CreateTensorHandle(inputTensorInfo2); - std::unique_ptr<armnn::ITensorHandle> inputHandle3 = - subTensorsSupported ? - workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo3.GetShape(), wOrigin3.data()) : - workloadFactory.CreateTensorHandle(inputTensorInfo3); - - std::unique_ptr<armnn::ITensorHandle> inputHandle4 = - subTensorsSupported ? - workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo4.GetShape(), wOrigin4.data()) : - workloadFactory.CreateTensorHandle(inputTensorInfo4); - - armnn::MergerQueueDescriptor data; armnn::WorkloadInfo info; AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); - AddInputToWorkload(data, info, inputTensorInfo3, inputHandle3.get()); - AddInputToWorkload(data, info, inputTensorInfo4, inputHandle4.get()); AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); data.m_ViewOrigins.push_back(window1); data.m_ViewOrigins.push_back(window2); - data.m_ViewOrigins.push_back(window3); - data.m_ViewOrigins.push_back(window4); std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMerger(data, info); inputHandle1->Allocate(); inputHandle2->Allocate(); - inputHandle3->Allocate(); - inputHandle4->Allocate(); outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]); CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]); - CopyDataToITensorHandle(inputHandle3.get(), &input3[0][0][0]); - CopyDataToITensorHandle(inputHandle4.get(), &input4[0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get()); @@ -765,6 +772,7 @@ LayerTestResult<float,4> AdditionTest(armnn::IWorkloadFactory& workloadFactory) CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); @@ -841,6 +849,7 @@ LayerTestResult<T, 4> AdditionBroadcastTestImpl(armnn::IWorkloadFactory& workloa CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); @@ -912,6 +921,7 @@ LayerTestResult<T, 4> AdditionBroadcast1ElementTestImpl(armnn::IWorkloadFactory& CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); @@ -996,7 +1006,9 @@ LayerTestResult<float,4> CompareAdditionTest(armnn::IWorkloadFactory& workloadFa CopyDataToITensorHandle(inputHandle1Ref.get(), &input1[0][0][0][0]); CopyDataToITensorHandle(inputHandle2Ref.get(), &input2[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); + refWorkloadFactory.Finalize(); workloadRef->Execute(); CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); @@ -1043,6 +1055,7 @@ LayerTestResult<float,4> MultiplicationTestHelper(armnn::IWorkloadFactory& workl CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]); CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); @@ -1185,7 +1198,9 @@ LayerTestResult<float,4> CompareMultiplicationTest(armnn::IWorkloadFactory& work CopyDataToITensorHandle(inputHandle0Ref.get(), &input0[0][0][0][0]); CopyDataToITensorHandle(inputHandle1Ref.get(), &input1[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); + refWorkloadFactory.Finalize(); workloadRef->Execute(); CopyDataFromITensorHandle(&comparisonResult.output[0][0][0][0], outputHandle.get()); @@ -1264,7 +1279,9 @@ LayerTestResult<float,4> CompareBatchNormTest(armnn::IWorkloadFactory& workloadF CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); + refWorkloadFactory.Finalize(); workloadRef->Execute(); CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); @@ -1273,23 +1290,299 @@ LayerTestResult<float,4> CompareBatchNormTest(armnn::IWorkloadFactory& workloadF return ret; } -void Concatenate(armnn::IWorkloadFactory& workloadFactory, - std::initializer_list<const armnn::TensorInfo> inputTensorInfos, - std::initializer_list<void*> inputs, - const armnn::TensorInfo& outputTensorInfo, - void* output, - unsigned int concatDim) -{ - armnn::MergerQueueDescriptor queueDescriptor; +template<typename T> +void PermuteTensorData( + armnn::IWorkloadFactory& workloadFactory, + const armnn::PermutationVector& mappings, + armnn::TensorInfo & inputTensorInfo, + const T * inputData, + std::vector<T>& outputData) +{ + BOOST_ASSERT_MSG(inputData != nullptr, "inputData must not be null"); + if (inputData == nullptr) + { + // Nullptr is an error in the test. By returning without doing the concatenation + // I expect the caller to fail the test. It still makes sense to report this as + // an assert for Debug builds. + return; + } + + armnn::TensorInfo outputTensorInfo = armnnUtils::Permuted(inputTensorInfo, mappings); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::PermuteQueueDescriptor queueDescriptor; + queueDescriptor.m_Parameters = armnn::PermuteDescriptor{mappings}; + armnn::WorkloadInfo workloadInfo; + AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(queueDescriptor, workloadInfo, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreatePermute(queueDescriptor, workloadInfo); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), inputData); + + workload->Execute(); + + outputData.resize(outputTensorInfo.GetNumElements()); + CopyDataFromITensorHandle(&outputData[0], outputHandle.get()); + inputTensorInfo = outputTensorInfo; +} +armnn::OriginsDescriptor CreateMergerDescriptorForConcatenation( + const std::vector<armnn::TensorInfo> & inputTensorInfos, + unsigned int concatDim) +{ std::vector<armnn::TensorShape> shapes; shapes.reserve(inputTensorInfos.size()); for (const armnn::TensorInfo& it: inputTensorInfos) { shapes.push_back(it.GetShape()); } - armnn::OriginsDescriptor viewsDescriptor = armnn::CreateMergerDescriptorForConcatenation(shapes.begin(), - shapes.end(), concatDim); + + return armnn::CreateMergerDescriptorForConcatenation(shapes.begin(), + shapes.end(), + concatDim); +} + +// +// Concatenation is only supported for N and C dimensions for NCHW. In case of +// <4 dimensions we need to make sure that the concat dimensions is at least +// the 3rd slowest iterating one. +// + +bool NeedPermuteForConcat( + const std::vector<armnn::TensorInfo> & inputTensorInfos, + unsigned int concatDim) +{ + // See note above. Additionally we expect the input shapes to have the + // same number of dimensions. + unsigned int nDimensions = 0; + + // determine the number of dimensions as well as sanity check them + // agains test implementation issues + for (auto && tensorInfo : inputTensorInfos) + { + if (!nDimensions) + { + nDimensions = tensorInfo.GetShape().GetNumDimensions(); + } + else + { + BOOST_ASSERT_MSG(nDimensions == tensorInfo.GetShape().GetNumDimensions(), + "Input shapes must have the same number of dimensions"); + } + } + + return (nDimensions-concatDim) < 3; +} + +armnn::TensorShape ExpandTensorShapeTo3dForPermute(const armnn::TensorShape & inputShape) +{ + unsigned int numDims = inputShape.GetNumDimensions(); + if (numDims >= 3) + { + // Nothing to do if the inputShape has at least 3 dimensions. + return inputShape; + } + + std::vector<unsigned int> newDims(size_t(3), 1u); + unsigned int expandedBy = 3 - numDims; + for (unsigned int i=0; i<numDims; ++i) + { + newDims[expandedBy+i] = inputShape[i]; + } + return armnn::TensorShape(3u, &newDims[0]); +} + +void Generate3dPermuteVectorForConcat( + unsigned int numDimensions, + unsigned int & concatDim, + std::pair<armnn::PermutationVector, armnn::PermutationVector> & permutations) +{ + BOOST_ASSERT_MSG(numDimensions <= 3, + "Only dimensions 1,2 and 3 are supported by this helper"); + + unsigned int expandedBy = 3 - numDimensions; + unsigned int expandedConcatAxis = concatDim + expandedBy; + + if (expandedConcatAxis == 2) + { + concatDim = 0; + armnn::PermutationVector forwardPermutation({1, 2, 0}); + armnn::PermutationVector reversePermutation({2, 0, 1}); + permutations = std::make_pair(forwardPermutation, reversePermutation); + } + else if (expandedConcatAxis == 1) + { + concatDim = 0; + armnn::PermutationVector forwardPermutation({2, 0, 1}); + armnn::PermutationVector reversePermutation({1, 2, 0}); + permutations = std::make_pair(forwardPermutation, reversePermutation); + } + else + { + BOOST_ASSERT(expandedConcatAxis == 0); + concatDim = 0; + } +} + +// +// Permute the input tensors so we can do a supported concatenation. +// Also treat lower than 3d tensors as 3d by adding dummy 1 dimensions +// at the front. Finally this function tells what the output shape +// of the permuted concatenated tensor is going to be. +// +template <typename T> +void PermuteInputsForConcat( + armnn::IWorkloadFactory& workloadFactory, + std::vector<armnn::TensorInfo> & inputTensorInfos, + std::vector<T *> & inputData, + std::vector<std::vector<T>> & inputDataStorage, + armnn::PermutationVector & permuteVector, + unsigned int & concatDim, + armnn::TensorInfo & outputTensorInfo) +{ + BOOST_ASSERT_MSG(inputTensorInfos.size() > 1, + "Expecting more than one tensor to be concatenated here"); + + unsigned int numDims = 0; + unsigned int nthInput = 0; + const armnn::PermutationVector identity({0, 1, 2}); + + std::pair<armnn::PermutationVector, armnn::PermutationVector> permutations = + std::make_pair(identity, identity); + + inputDataStorage.resize(inputData.size()); + + for (auto && tensorInfo : inputTensorInfos) + { + if (numDims == 0) + { + numDims = tensorInfo.GetShape().GetNumDimensions(); + Generate3dPermuteVectorForConcat(numDims, concatDim, permutations); + // store the reverese permutation + permuteVector = permutations.second; + BOOST_ASSERT_MSG(!permuteVector.IsEqual(identity), + "Test logic error, we don't need permutation, so we shouldn't arrive here"); + } + else + { + BOOST_ASSERT_MSG(numDims == tensorInfo.GetShape().GetNumDimensions(), + "All inputs must have the same number of dimensions"); + } + + armnn::TensorInfo newTensorInfo = tensorInfo; + newTensorInfo.SetShape(ExpandTensorShapeTo3dForPermute(tensorInfo.GetShape())); + + PermuteTensorData<T>(workloadFactory, + permutations.first, + newTensorInfo, + inputData[nthInput], + inputDataStorage[nthInput]); + + inputData[nthInput] = inputDataStorage[nthInput].data(); + inputTensorInfos[nthInput] = newTensorInfo; + + ++nthInput; + } + + outputTensorInfo.SetShape( + armnnUtils::Permuted( + ExpandTensorShapeTo3dForPermute(outputTensorInfo.GetShape()), + permutations.first)); +} + + +// +// This is the pair of PermuteInputsForConcat(...) which permutes back +// the output of the concatenation so we can check against an expected +// output. +// +template <typename T> +void PermuteOutputForConcat( + armnn::IWorkloadFactory& workloadFactory, + const armnn::TensorInfo & tensorInfo, + const armnn::PermutationVector & permuteVector, + std::unique_ptr<armnn::ITensorHandle> && inputDataHandle, + T * data) +{ + BOOST_ASSERT_MSG(data != nullptr, "data must not be null"); + if (data == nullptr) + { + // Nullptr is an error in the test. By returning without doing the permutation + // I expect the caller to fail the test. It still makes sense to report this as + // an assert for Debug builds. + return; + } + + armnn::TensorInfo resultTensorInfo = tensorInfo; + std::vector<T> inputData(tensorInfo.GetNumElements()); + std::vector<T> outputData; + + CopyDataFromITensorHandle(&inputData[0], inputDataHandle.get()); + + PermuteTensorData<T>(workloadFactory, + permuteVector, + resultTensorInfo, + &inputData[0], + outputData); + + ::memcpy(data, &outputData[0], sizeof(T)*outputData.size()); +} + +template <typename T> +void Concatenate(armnn::IWorkloadFactory& workloadFactory, + std::initializer_list<const armnn::TensorInfo> inputTensorInfosOrig, + std::initializer_list<T *> inputsOrig, + const armnn::TensorInfo& outputTensorInfoOrig, + T * output, + unsigned int concatDim) +{ + BOOST_ASSERT_MSG(output != nullptr, "output must not be null"); + if (output == nullptr) + { + // Nullptr is an error in the test. By returning without doing the permutation + // I expect the caller to fail the test. It still makes sense to report this as + // an assert for Debug builds. + return; + } + + armnn::MergerQueueDescriptor queueDescriptor; + + // save a copy of the parameters which we might need to change + std::vector<armnn::TensorInfo> inputTensorInfos(inputTensorInfosOrig.begin(), inputTensorInfosOrig.end()); + std::vector<T *> inputs = inputsOrig; + armnn::TensorInfo outputTensorInfo = outputTensorInfoOrig; + + armnn::PermutationVector permuteVector{0, 1, 2}; + + // hold and automatically release memory for the reshaped input data + std::vector<std::vector<T>> tmpInputDataStorage; + + const size_t inputCount = inputTensorInfos.size(); + + bool needPermuteForConcat = NeedPermuteForConcat(inputTensorInfos, concatDim); + + if (needPermuteForConcat) + { + // + // We need to permute the inputs, because concatenation along + // the requested axis is not supported + // + PermuteInputsForConcat<T>(workloadFactory, + inputTensorInfos, + inputs, + tmpInputDataStorage, + permuteVector, + concatDim, + outputTensorInfo); + } + + armnn::OriginsDescriptor viewsDescriptor = CreateMergerDescriptorForConcatenation(inputTensorInfos, concatDim); queueDescriptor.m_ViewOrigins.reserve(viewsDescriptor.GetNumViews()); for (unsigned int i = 0; i < viewsDescriptor.GetNumViews(); ++i) @@ -1298,8 +1591,6 @@ void Concatenate(armnn::IWorkloadFactory& workloadFactory, viewsDescriptor.GetViewOrigin(i) + viewsDescriptor.GetNumDimensions())); } - const size_t inputCount = inputTensorInfos.size(); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); std::vector<std::unique_ptr<armnn::ITensorHandle>> inputHandles; @@ -1308,7 +1599,7 @@ void Concatenate(armnn::IWorkloadFactory& workloadFactory, const bool subTensorsSupported = workloadFactory.SupportsSubTensors(); for (unsigned int i = 0; i < inputCount; ++i) { - const armnn::TensorInfo& inputTensorInfo = inputTensorInfos.begin()[i]; + const armnn::TensorInfo& inputTensorInfo = inputTensorInfos[i]; std::unique_ptr<armnn::ITensorHandle> inputHandle = subTensorsSupported ? workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo.GetShape(), @@ -1322,7 +1613,7 @@ void Concatenate(armnn::IWorkloadFactory& workloadFactory, for (unsigned int i = 0; i < inputCount; ++i) { - AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfos.begin()[i], inputHandles[i].get()); + AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfos[i], inputHandles[i].get()); } AddOutputToWorkload(queueDescriptor, workloadInfo, outputTensorInfo, outputHandle.get()); @@ -1339,12 +1630,25 @@ void Concatenate(armnn::IWorkloadFactory& workloadFactory, unsigned int nextInputId = 0; for (auto& inputHandle : inputHandles) { - CopyDataToITensorHandle(inputHandle.get(), *(inputs.begin() + nextInputId++)); + CopyDataToITensorHandle(inputHandle.get(), inputs[nextInputId]); + ++nextInputId; } + workloadFactory.Finalize(); workload->Execute(); - CopyDataFromITensorHandle(output, outputHandle.get()); + if (needPermuteForConcat) + { + PermuteOutputForConcat<T>(workloadFactory, + outputTensorInfo, + permuteVector, + std::move(outputHandle), + output); + } + else + { + CopyDataFromITensorHandle(output, outputHandle.get()); + } } template <typename T> @@ -1362,7 +1666,7 @@ LayerTestResult<T, 1> Concatenation1dTestImpl(armnn::IWorkloadFactory& workloadF std::vector<T> output; output.resize(outputTensorInfo.GetNumElements()); - Concatenate(workloadFactory, + Concatenate<T>(workloadFactory, { inputTensorInfo, inputTensorInfo, inputTensorInfo }, { input0.data(), input1.data(), input2.data() }, outputTensorInfo, @@ -1419,7 +1723,7 @@ LayerTestResult<T, 2> Concatenation2dTestImpl(armnn::IWorkloadFactory& workloadF std::vector<T> output; output.resize(outputTensorInfo.GetNumElements()); - Concatenate(workloadFactory, + Concatenate<T>(workloadFactory, { inputTensorInfo, inputTensorInfo, inputTensorInfo }, { input0.data(), input1.data(), input2.data() }, outputTensorInfo, @@ -1524,7 +1828,7 @@ LayerTestResult<T, 2> Concatenation2dDim0DiffInputDimsTestImpl(armnn::IWorkloadF std::vector<T> output; output.resize(outputTensorInfo.GetNumElements()); - Concatenate(workloadFactory, + Concatenate<T>(workloadFactory, { input0TensorInfo, input1TensorInfo, input2TensorInfo }, { input0.data(), input1.data(), input2.data() }, outputTensorInfo, @@ -1596,7 +1900,7 @@ LayerTestResult<T, 2> Concatenation2dDim1DiffInputDimsTestImpl(armnn::IWorkloadF std::vector<T> output; output.resize(outputTensorInfo.GetNumElements()); - Concatenate(workloadFactory, + Concatenate<T>(workloadFactory, { input0TensorInfo, input1TensorInfo, input2TensorInfo }, { input0.data(), input1.data(), input2.data() }, outputTensorInfo, @@ -1693,7 +1997,7 @@ LayerTestResult<T, 3> Concatenation3dTestImpl(armnn::IWorkloadFactory& workloadF std::vector<T> output; output.resize(outputTensorInfo.GetNumElements()); - Concatenate(workloadFactory, + Concatenate<T>(workloadFactory, { inputTensorInfo, inputTensorInfo, inputTensorInfo }, { input0.data(), input1.data(), input2.data() }, outputTensorInfo, @@ -1953,7 +2257,7 @@ LayerTestResult<T, 3> Concatenation3dDim0DiffInputDimsTestImpl(armnn::IWorkloadF std::vector<T> output; output.resize(outputTensorInfo.GetNumElements()); - Concatenate(workloadFactory, + Concatenate<T>(workloadFactory, { input0TensorInfo, input1TensorInfo, input2TensorInfo }, { input0.data(), input1.data(), input2.data() }, outputTensorInfo, @@ -2091,7 +2395,7 @@ LayerTestResult<T, 3> Concatenation3dDim1DiffInputDimsTestImpl(armnn::IWorkloadF std::vector<T> output; output.resize(outputTensorInfo.GetNumElements()); - Concatenate(workloadFactory, + Concatenate<T>(workloadFactory, { input0TensorInfo, input1TensorInfo, input2TensorInfo }, { input0.data(), input1.data(), input2.data() }, outputTensorInfo, @@ -2229,7 +2533,7 @@ LayerTestResult<T, 3> Concatenation3dDim2DiffInputDimsTestImpl(armnn::IWorkloadF std::vector<T> output; output.resize(outputTensorInfo.GetNumElements()); - Concatenate(workloadFactory, + Concatenate<T>(workloadFactory, { input0TensorInfo, input1TensorInfo, input2TensorInfo }, { input0.data(), input1.data(), input2.data() }, outputTensorInfo, @@ -2306,6 +2610,7 @@ LayerTestResult<float, 4> ResizeBilinearNopTest(armnn::IWorkloadFactory& workloa outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); @@ -2358,6 +2663,7 @@ LayerTestResult<float, 4> SimpleResizeBilinearTest(armnn::IWorkloadFactory& work outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); @@ -2408,6 +2714,7 @@ LayerTestResult<float, 4> ResizeBilinearSqMinTest(armnn::IWorkloadFactory& workl outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); @@ -2457,6 +2764,7 @@ LayerTestResult<float, 4> ResizeBilinearMinTest(armnn::IWorkloadFactory& workloa outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); @@ -2507,6 +2815,7 @@ LayerTestResult<float, 4> ResizeBilinearMagTest(armnn::IWorkloadFactory& workloa outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); @@ -2555,6 +2864,7 @@ LayerTestResult<float, 2> FakeQuantizationTest(armnn::IWorkloadFactory& workload CopyDataToITensorHandle(inputHandle.get(), &input[0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get()); @@ -2617,6 +2927,7 @@ LayerTestResult<float, 4> L2Normalization1dTest(armnn::IWorkloadFactory& workloa outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); @@ -2686,6 +2997,7 @@ LayerTestResult<float, 4> L2Normalization2dTest(armnn::IWorkloadFactory& workloa outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); @@ -2766,6 +3078,7 @@ LayerTestResult<float, 4> L2Normalization3dTest(armnn::IWorkloadFactory& workloa outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); @@ -2929,6 +3242,7 @@ LayerTestResult<float, 4> L2Normalization4dTest(armnn::IWorkloadFactory& workloa outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); @@ -3022,6 +3336,7 @@ LayerTestResult<T, 4> ConstantTestImpl(armnn::IWorkloadFactory& workloadFactory, outputHandle->Allocate(); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); @@ -3040,32 +3355,22 @@ LayerTestResult<uint8_t, 4> ConstantTestUint8(armnn::IWorkloadFactory& workloadF LayerTestResult<uint8_t, 3> MergerUint8Test(armnn::IWorkloadFactory& workloadFactory) { - unsigned int outputWidth = 5; + unsigned int outputWidth = 3; unsigned int outputHeight = 6; unsigned int outputChannels = 3; - unsigned int inputWidth1 = 2; - unsigned int inputHeight1 = 2; - unsigned int inputChannels1 = 3; + unsigned int inputWidth1 = 3; + unsigned int inputHeight1 = 6; + unsigned int inputChannels1 = 2; - unsigned int inputWidth2 = 2; - unsigned int inputHeight2 = 4; - unsigned int inputChannels2 = 3; - - unsigned int inputWidth3 = 3; - unsigned int inputHeight3 = 6; - unsigned int inputChannels3 = 2; - - unsigned int inputWidth4 = 3; - unsigned int inputHeight4 = 6; - unsigned int inputChannels4 = 1; + unsigned int inputWidth2 = 3; + unsigned int inputHeight2 = 6; + unsigned int inputChannels2 = 1; // Define the tensor descriptors armnn::TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, armnn::DataType::QuantisedAsymm8); armnn::TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, armnn::DataType::QuantisedAsymm8); armnn::TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, armnn::DataType::QuantisedAsymm8); - armnn::TensorInfo inputTensorInfo3({ inputChannels3, inputHeight3, inputWidth3 }, armnn::DataType::QuantisedAsymm8); - armnn::TensorInfo inputTensorInfo4({ inputChannels4, inputHeight4, inputWidth4 }, armnn::DataType::QuantisedAsymm8); // Arbitrary scale and offsets. They don't really matter as the merger operator doesn't dequantize/quantize const float scale = 0.13497836f; @@ -3077,113 +3382,69 @@ LayerTestResult<uint8_t, 3> MergerUint8Test(armnn::IWorkloadFactory& workloadFac inputTensorInfo1.SetQuantizationOffset(offset); inputTensorInfo2.SetQuantizationScale(scale); inputTensorInfo2.SetQuantizationOffset(offset); - inputTensorInfo3.SetQuantizationScale(scale); - inputTensorInfo3.SetQuantizationOffset(offset); - inputTensorInfo4.SetQuantizationScale(scale); - inputTensorInfo4.SetQuantizationOffset(offset); LayerTestResult<uint8_t, 3> ret(outputTensorInfo); ret.outputExpected = MakeTensor<uint8_t, 3>(outputTensorInfo, std::vector<uint8_t>( - { - 1, 2, 3, 4, 5, - 6, 7, 8, 9, 10, - 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, - 21, 22, 23, 24, 25, - 26, 27, 28, 29, 30, - - 31, 32, 33, 34, 35, - 36, 37, 38, 39, 40, - 41, 42, 43, 44, 45, - 46, 47, 48, 49, 50, - 51, 52, 53, 54, 55, - 56, 57, 58, 59, 60, - - 61, 62, 63, 64, 65, - 66, 67, 68, 69, 70, - 71, 72, 73, 74, 75, - 76, 77, 78, 79, 80, - 81, 82, 83, 84, 85, - 86, 87, 88, 89, 90, - }) + { + 1, 2, 3, + 4, 5, 6, + 7, 8, 9, + 10, 11, 12, + 13, 14, 15, + 16, 17, 18, + + 19, 20, 21, + 22, 23, 24, + 25, 26, 27, + 28, 29, 30, + 31, 32, 33, + 34, 35, 36, + + 37, 38, 39, + 40, 41, 42, + 43, 44, 45, + 46, 47, 48, + 49, 50, 51, + 52, 53, 54, + }) ); - auto input1 = MakeTensor<uint8_t, 3>(inputTensorInfo1, std::vector<uint8_t>( { - 1, 2, - 6, 7, - - 31, 32, - 36, 37, + 1, 2, 3, + 4, 5, 6, + 7, 8, 9, + 10, 11, 12, + 13, 14, 15, + 16, 17, 18, - 61, 62, - 66, 67, + 19, 20, 21, + 22, 23, 24, + 25, 26, 27, + 28, 29, 30, + 31, 32, 33, + 34, 35, 36, }) ); auto input2 = MakeTensor<uint8_t, 3>(inputTensorInfo2, std::vector<uint8_t>( { - 11, 12, - 16, 17, - 21, 22, - 26, 27, - - 41, 42, - 46, 47, - 51, 52, - 56, 57, - - 71, 72, - 76, 77, - 81, 82, - 86, 87, - }) - ); - - auto input3 = MakeTensor<uint8_t, 3>(inputTensorInfo3, std::vector<uint8_t>( - { - 3, 4, 5, - 8, 9, 10, - 13, 14, 15, - 18, 19, 20, - 23, 24, 25, - 28, 29, 30, - - 33, 34, 35, - 38, 39, 40, + 37, 38, 39, + 40, 41, 42, 43, 44, 45, - 48, 49, 50, - 53, 54, 55, - 58, 59, 60, - }) - ); - - - auto input4 = MakeTensor<uint8_t, 3>(inputTensorInfo4, std::vector<uint8_t>( - { - 63, 64, 65, - 68, 69, 70, - 73, 74, 75, - 78, 79, 80, - 83, 84, 85, - 88, 89, 90, + 46, 47, 48, + 49, 50, 51, + 52, 53, 54, }) ); std::vector<unsigned int> wOrigin1 = { 0, 0, 0 }; //extent of the window is defined by size of input[0] armnn::MergerQueueDescriptor::ViewOrigin window1(wOrigin1); - std::vector<unsigned int> wOrigin2 = { 0, 2, 0 }; //extent of the window is defined by size of input[1] + std::vector<unsigned int> wOrigin2 = { 2, 0, 0 }; //extent of the window is defined by size of input[1] armnn::MergerQueueDescriptor::ViewOrigin window2(wOrigin2); - std::vector<unsigned int> wOrigin3 = { 0, 0, 2 }; //extent of the window is defined by size of input[2] - armnn::MergerQueueDescriptor::ViewOrigin window3(wOrigin3); - - std::vector<unsigned int> wOrigin4 = { 2, 0, 2 }; //extent of the window is defined by size of input[3] - armnn::MergerQueueDescriptor::ViewOrigin window4(wOrigin4); - std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); @@ -3199,43 +3460,26 @@ LayerTestResult<uint8_t, 3> MergerUint8Test(armnn::IWorkloadFactory& workloadFac workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) : workloadFactory.CreateTensorHandle(inputTensorInfo2); - std::unique_ptr<armnn::ITensorHandle> inputHandle3 = - subTensorsSupported ? - workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo3.GetShape(), wOrigin3.data()) : - workloadFactory.CreateTensorHandle(inputTensorInfo3); - - std::unique_ptr<armnn::ITensorHandle> inputHandle4 = - subTensorsSupported ? - workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo4.GetShape(), wOrigin4.data()) : - workloadFactory.CreateTensorHandle(inputTensorInfo4); - armnn::MergerQueueDescriptor data; armnn::WorkloadInfo info; AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); - AddInputToWorkload(data, info, inputTensorInfo3, inputHandle3.get()); - AddInputToWorkload(data, info, inputTensorInfo4, inputHandle4.get()); AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); data.m_ViewOrigins.push_back(window1); data.m_ViewOrigins.push_back(window2); - data.m_ViewOrigins.push_back(window3); - data.m_ViewOrigins.push_back(window4); std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMerger(data, info); inputHandle1->Allocate(); inputHandle2->Allocate(); - inputHandle3->Allocate(); - inputHandle4->Allocate(); outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]); CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]); - CopyDataToITensorHandle(inputHandle3.get(), &input3[0][0][0]); - CopyDataToITensorHandle(inputHandle4.get(), &input4[0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get()); @@ -3310,6 +3554,7 @@ LayerTestResult<uint8_t, 4> AdditionUint8Test(armnn::IWorkloadFactory& workloadF CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); @@ -3371,6 +3616,7 @@ LayerTestResult<uint8_t, 4> MultiplicationUint8TestHelper(armnn::IWorkloadFactor CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]); CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); @@ -3531,6 +3777,7 @@ LayerTestResult<uint8_t, 4> ResizeBilinearNopUint8Test(armnn::IWorkloadFactory& outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); @@ -3588,6 +3835,7 @@ LayerTestResult<uint8_t, 4> SimpleResizeBilinearUint8Test(armnn::IWorkloadFactor outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); @@ -3643,6 +3891,7 @@ LayerTestResult<uint8_t, 4> ResizeBilinearSqMinUint8Test(armnn::IWorkloadFactory outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); @@ -3696,6 +3945,7 @@ LayerTestResult<uint8_t, 4> ResizeBilinearMinUint8Test(armnn::IWorkloadFactory& CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); @@ -3751,6 +4001,7 @@ LayerTestResult<uint8_t, 4> ResizeBilinearMagUint8Test(armnn::IWorkloadFactory& outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); diff --git a/src/armnn/backends/test/LayerTests.hpp b/src/armnn/backends/test/LayerTests.hpp index 36e73e461c..2d543d61de 100644 --- a/src/armnn/backends/test/LayerTests.hpp +++ b/src/armnn/backends/test/LayerTests.hpp @@ -67,6 +67,9 @@ LayerTestResult<float, 4> DepthwiseConvolution2dTest(armnn::IWorkloadFactory& wo LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul1Test(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled); +LayerTestResult<float, 4> DepthwiseConvolution2dAsymmetricTest(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled); + LayerTestResult<float, 4> SimpleMaxPooling2dSize2x2Stride2x2Test(armnn::IWorkloadFactory& workloadFactory, bool forceNoPadding); LayerTestResult<uint8_t, 4> SimpleMaxPooling2dSize2x2Stride2x2Uint8Test(armnn::IWorkloadFactory& workloadFactory, diff --git a/src/armnn/backends/test/MemCopyTests.cpp b/src/armnn/backends/test/MemCopyTests.cpp index 8e4dae35f2..32331789e9 100644 --- a/src/armnn/backends/test/MemCopyTests.cpp +++ b/src/armnn/backends/test/MemCopyTests.cpp @@ -24,7 +24,7 @@ BOOST_AUTO_TEST_SUITE(MemCopyTestSuite) void MemCopyTest(armnn::IWorkloadFactory& srcWorkloadFactory, armnn::IWorkloadFactory& dstWorkloadFactory, bool withSubtensors) { - const std::array<unsigned int, 4> shapeData = { 1u, 1u, 6u, 5u }; + const std::array<unsigned int, 4> shapeData = { { 1u, 1u, 6u, 5u } }; const armnn::TensorShape tensorShape(4, shapeData.data()); const armnn::TensorInfo tensorInfo(tensorShape, armnn::DataType::Float32); boost::multi_array<float, 4> inputData = MakeTensor<float, 4>(tensorInfo, std::vector<float>( diff --git a/src/armnn/backends/test/NormTestImpl.hpp b/src/armnn/backends/test/NormTestImpl.hpp index 1f6aadc9df..d9dc01592a 100644 --- a/src/armnn/backends/test/NormTestImpl.hpp +++ b/src/armnn/backends/test/NormTestImpl.hpp @@ -71,6 +71,7 @@ LayerTestResult<float,4> SimpleNormalizationTestImpl(armnn::IWorkloadFactory& wo CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); @@ -227,7 +228,9 @@ LayerTestResult<float,4> CompareNormalizationTestImpl(armnn::IWorkloadFactory& w CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]); + workloadFactory.Finalize(); workload->Execute(); + refWorkloadFactory.Finalize(); workloadRef->Execute(); CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); diff --git a/src/armnn/backends/test/Reference.cpp b/src/armnn/backends/test/Reference.cpp index 89e5db8e43..b60483a4d9 100644 --- a/src/armnn/backends/test/Reference.cpp +++ b/src/armnn/backends/test/Reference.cpp @@ -49,6 +49,9 @@ ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2 ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1, DepthwiseConvolution2dDepthMul1Test, false) ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, false) +ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dAsymmetric, DepthwiseConvolution2dAsymmetricTest, true) +ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dAsymmetric, DepthwiseConvolution2dAsymmetricTest, false) + // Pooling ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize2x2Stride2x2, SimpleMaxPooling2dSize2x2Stride2x2Test, false) ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize2x2Stride2x2Uint8, SimpleMaxPooling2dSize2x2Stride2x2Uint8Test, false) diff --git a/src/armnn/backends/test/SoftmaxTestImpl.hpp b/src/armnn/backends/test/SoftmaxTestImpl.hpp index 5aa74f9618..4c3e0b73dd 100644 --- a/src/armnn/backends/test/SoftmaxTestImpl.hpp +++ b/src/armnn/backends/test/SoftmaxTestImpl.hpp @@ -62,6 +62,7 @@ LayerTestResult<T, 2> SimpleSoftmaxTestImpl(armnn::IWorkloadFactory& workloadFac outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), &input[0][0]); + workloadFactory.Finalize(); workload->Execute(); CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get()); @@ -140,11 +141,13 @@ LayerTestResult<T, 2> CompareSoftmaxTestImpl(armnn::IWorkloadFactory& workloadFa CopyDataToITensorHandle(inputHandle.get(), &input[0][0]); CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0]); + workloadFactory.Finalize(); workload->Execute(); + refWorkloadFactory.Finalize(); workloadRef->Execute(); CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get()); CopyDataFromITensorHandle(&ret.outputExpected[0][0], outputHandleRef.get()); return ret; -}
\ No newline at end of file +} diff --git a/src/armnn/backends/test/SplitterTestImpl.hpp b/src/armnn/backends/test/SplitterTestImpl.hpp index b72046e4bc..70b798eafa 100644 --- a/src/armnn/backends/test/SplitterTestImpl.hpp +++ b/src/armnn/backends/test/SplitterTestImpl.hpp @@ -25,31 +25,34 @@ std::vector<LayerTestResult<T,3>> SplitterTestCommon(armnn::IWorkloadFactory& wo unsigned int inputHeight = 6; unsigned int inputChannels = 3; - unsigned int outputWidth1 = 2; - unsigned int outputHeight1 = 2; - unsigned int outputChannels1 = 3; + // NOTE: Compute Library imposes a restriction that the x and y dimension (input height and width) + // cannot be split. + // For the reasons for this see first comment on https://jira.arm.com/browse/IVGCVSW-1239 + // + // this test has therefore been recast to split the channels, then split the resulting subtensor - unsigned int outputWidth2 = 2; - unsigned int outputHeight2 = 4; - unsigned int outputChannels2 = 3; + // to take channel 0 of original output + // and channel 0 and channel 1 of the split subtensor + unsigned int outputWidth1 = inputWidth; + unsigned int outputHeight1 = inputHeight; + unsigned int outputChannels1 = 1; - unsigned int outputWidth3 = 3; - unsigned int outputHeight3 = 6; - unsigned int outputChannels3 = 2; - - unsigned int outputWidth4 = 3; - unsigned int outputHeight4 = 6; - unsigned int outputChannels4 = 1; + // to take channel 1 and 2 of the original output + unsigned int outputWidth2 = inputWidth; + unsigned int outputHeight2 = inputHeight; + unsigned int outputChannels2 = 2; // Define the tensor descriptors armnn::TensorInfo inputTensorInfo({ inputChannels, inputHeight, inputWidth }, armnn::GetDataType<T>()); + + // outputs of the original split armnn::TensorInfo outputTensorInfo1({ outputChannels1, outputHeight1, outputWidth1 }, armnn::GetDataType<T>()); armnn::TensorInfo outputTensorInfo2({ outputChannels2, outputHeight2, outputWidth2 }, armnn::GetDataType<T>()); - armnn::TensorInfo outputTensorInfo3({ outputChannels3, outputHeight3, outputWidth3 }, armnn::GetDataType<T>()); - armnn::TensorInfo outputTensorInfo4({ outputChannels4, outputHeight4, outputWidth4 }, armnn::GetDataType<T>()); - // note that output 5 should match output 2 - armnn::TensorInfo outputTensorInfo5({ outputChannels2, outputHeight2, outputWidth2 }, armnn::GetDataType<T>()); + + // outputs of the subsequent subtensor split + armnn::TensorInfo outputTensorInfo3({ outputChannels1, outputHeight1, outputWidth1 }, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo4({ outputChannels1, outputHeight1, outputWidth1 }, armnn::GetDataType<T>()); // Set quantization parameters if the requested type is a quantized type. // The quantization doesn't really matter as the splitter operator doesn't dequantize/quantize @@ -65,15 +68,12 @@ std::vector<LayerTestResult<T,3>> SplitterTestCommon(armnn::IWorkloadFactory& wo outputTensorInfo3.SetQuantizationOffset(qOffset); outputTensorInfo4.SetQuantizationScale(qScale); outputTensorInfo4.SetQuantizationOffset(qOffset); - outputTensorInfo5.SetQuantizationScale(qScale); - outputTensorInfo5.SetQuantizationOffset(qOffset); } LayerTestResult<T,3> ret1(outputTensorInfo1); LayerTestResult<T,3> ret2(outputTensorInfo2); LayerTestResult<T,3> ret3(outputTensorInfo3); LayerTestResult<T,3> ret4(outputTensorInfo4); - LayerTestResult<T,3> ret5(outputTensorInfo5); auto input = MakeTensor<T, 3>(inputTensorInfo, std::vector<T>( QuantizedVector<T>(qScale, qOffset, { @@ -100,98 +100,74 @@ std::vector<LayerTestResult<T,3>> SplitterTestCommon(armnn::IWorkloadFactory& wo }) )); - + // channel 0 of the original input ret1.outputExpected = MakeTensor<T, 3>(outputTensorInfo1, std::vector<T>( QuantizedVector<T>(qScale, qOffset, { - 1.0f, 2.0f, - 6.0f, 7.0f, - - 31.0f, 32.0f, - 36.0f, 37.0f, - - 61.0f, 62.0f, - 66.0f, 67.0f, + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, + 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, + 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, + 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, + 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, + 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, }) )); + // channel 1 & 2 of the original input ret2.outputExpected = MakeTensor<T, 3>(outputTensorInfo2, std::vector<T>( QuantizedVector<T>(qScale, qOffset, { - 11.0f, 12.0f, - 16.0f, 17.0f, - 21.0f, 22.0f, - 26.0f, 27.0f, - - 41.0f, 42.0f, - 46.0f, 47.0f, - 51.0f, 52.0f, - 56.0f, 57.0f, - - 71.0f, 72.0f, - 76.0f, 77.0f, - 81.0f, 82.0f, - 86.0f, 87.0f, + 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, + 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, + 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, + 46.0f, 47.0f, 48.0f, 49.0f, 50.0f, + 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, + 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, + + 61.0f, 62.0f, 63.0f, 64.0f, 65.0f, + 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, + 71.0f, 72.0f, 73.0f, 74.0f, 75.0f, + 76.0f, 77.0f, 78.0f, 79.0f, 80.0f, + 81.0f, 82.0f, 83.0f, 84.0f, 85.0f, + 86.0f, 87.0f, 88.0f, 89.0f, 90.0f, }) )); + // channel 0 of return 2 (i.e. channels 1 and 2 of the original input) ret3.outputExpected = MakeTensor<T, 3>(outputTensorInfo3, std::vector<T>( QuantizedVector<T>(qScale, qOffset, { - 3.0f, 4.0f, 5.0f, - 8.0f, 9.0f, 10.0f, - 13.0f, 14.0f, 15.0f, - 18.0f, 19.0f, 20.0f, - 23.0f, 24.0f, 25.0f, - 28.0f, 29.0f, 30.0f, - - 33.0f, 34.0f, 35.0f, - 38.0f, 39.0f, 40.0f, - 43.0f, 44.0f, 45.0f, - 48.0f, 49.0f, 50.0f, - 53.0f, 54.0f, 55.0f, - 58.0f, 59.0f, 60.0f, + 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, + 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, + 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, + 46.0f, 47.0f, 48.0f, 49.0f, 50.0f, + 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, + 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, }) )); + // channel 1 of return 2 ret4.outputExpected = MakeTensor<T, 3>(outputTensorInfo4, std::vector<T>( QuantizedVector<T>(qScale, qOffset, { - 63.0f, 64.0f, 65.0f, - 68.0f, 69.0f, 70.0f, - 73.0f, 74.0f, 75.0f, - 78.0f, 79.0f, 80.0f, - 83.0f, 84.0f, 85.0f, - 88.0f, 89.0f, 90.0f, - }) - )); - - - ret5.outputExpected = MakeTensor<T, 3>(outputTensorInfo5, std::vector<T>( - QuantizedVector<T>(qScale, qOffset, { - 11.0f, 12.0f, - 16.0f, 17.0f, - 21.0f, 22.0f, - 26.0f, 27.0f, - - 41.0f, 42.0f, - 46.0f, 47.0f, - 51.0f, 52.0f, - 56.0f, 57.0f, - - 71.0f, 72.0f, - 76.0f, 77.0f, - 81.0f, 82.0f, - 86.0f, 87.0f, + 61.0f, 62.0f, 63.0f, 64.0f, 65.0f, + 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, + 71.0f, 72.0f, 73.0f, 74.0f, 75.0f, + 76.0f, 77.0f, 78.0f, 79.0f, 80.0f, + 81.0f, 82.0f, 83.0f, 84.0f, 85.0f, + 86.0f, 87.0f, 88.0f, 89.0f, 90.0f, }) )); + // NOTE: as a corollary of the no splitting of x and y restriction the x and y values of the view origins + // have to be zero, the co-ordinates are as per the tensor info above channels, height/y, width/x + // note that under the hood the compute engine reverses these i.e. its coordinate system is x, y, channels std::vector<unsigned int> wOrigin1 = {0, 0, 0}; //extent of the window is defined by size of output[0] armnn::SplitterQueueDescriptor::ViewOrigin window1(wOrigin1); - std::vector<unsigned int> wOrigin2 = {0, 2, 0}; //extent of the window is defined by size of output[1] + std::vector<unsigned int> wOrigin2 = {1, 0, 0}; //extent of the window is defined by size of output[1] armnn::SplitterQueueDescriptor::ViewOrigin window2(wOrigin2); - std::vector<unsigned int> wOrigin3 = {0, 0, 2}; //extent of the window is defined by size of output[2] + std::vector<unsigned int> wOrigin3 = {0, 0, 0}; //extent of the window is defined by size of output[2] armnn::SplitterQueueDescriptor::ViewOrigin window3(wOrigin3); - std::vector<unsigned int> wOrigin4 = {2, 0, 2}; //extent of the window is defined by size of output[3] + std::vector<unsigned int> wOrigin4 = {1, 0, 0}; //extent of the window is defined by size of output[3] armnn::SplitterQueueDescriptor::ViewOrigin window4(wOrigin4); bool subTensorsSupported = workloadFactory.SupportsSubTensors(); @@ -210,43 +186,29 @@ std::vector<LayerTestResult<T,3>> SplitterTestCommon(armnn::IWorkloadFactory& wo std::unique_ptr<armnn::ITensorHandle> outputHandle3 = subTensorsSupported ? - workloadFactory.CreateSubTensorHandle(*inputHandle, outputTensorInfo3.GetShape(), wOrigin3.data()) : + workloadFactory.CreateSubTensorHandle(*outputHandle2, outputTensorInfo3.GetShape(), wOrigin3.data()) : workloadFactory.CreateTensorHandle(outputTensorInfo3); std::unique_ptr<armnn::ITensorHandle> outputHandle4 = subTensorsSupported ? - workloadFactory.CreateSubTensorHandle(*inputHandle, outputTensorInfo4.GetShape(), wOrigin4.data()) : + workloadFactory.CreateSubTensorHandle(*outputHandle2, outputTensorInfo4.GetShape(), wOrigin4.data()) : workloadFactory.CreateTensorHandle(outputTensorInfo4); - std::unique_ptr<armnn::ITensorHandle> outputHandle5 = - subTensorsSupported ? - workloadFactory.CreateSubTensorHandle(*inputHandle, outputTensorInfo5.GetShape(), wOrigin2.data()) : - workloadFactory.CreateTensorHandle(outputTensorInfo5); - + // Do the first split armnn::SplitterQueueDescriptor data; armnn::WorkloadInfo info; AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); AddOutputToWorkload(data, info, outputTensorInfo1, outputHandle1.get()); AddOutputToWorkload(data, info, outputTensorInfo2, outputHandle2.get()); - AddOutputToWorkload(data, info, outputTensorInfo3, outputHandle3.get()); - AddOutputToWorkload(data, info, outputTensorInfo4, outputHandle4.get()); - AddOutputToWorkload(data, info, outputTensorInfo5, outputHandle5.get()); data.m_ViewOrigins.push_back(window1); data.m_ViewOrigins.push_back(window2); - data.m_ViewOrigins.push_back(window3); - data.m_ViewOrigins.push_back(window4); - //add window2 again (to have an overlapping split) - data.m_ViewOrigins.push_back(window2); std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateSplitter(data, info); inputHandle->Allocate(); outputHandle1->Allocate(); outputHandle2->Allocate(); - outputHandle3->Allocate(); - outputHandle4->Allocate(); - outputHandle5->Allocate(); CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0]); @@ -254,11 +216,28 @@ std::vector<LayerTestResult<T,3>> SplitterTestCommon(armnn::IWorkloadFactory& wo CopyDataFromITensorHandle(&ret1.output[0][0][0], outputHandle1.get()); CopyDataFromITensorHandle(&ret2.output[0][0][0], outputHandle2.get()); + +// // Do the second split + armnn::SplitterQueueDescriptor data2; + armnn::WorkloadInfo info2; + AddInputToWorkload(data2, info2, outputTensorInfo2, outputHandle2.get()); + AddOutputToWorkload(data2, info2, outputTensorInfo3, outputHandle3.get()); + AddOutputToWorkload(data2, info2, outputTensorInfo4, outputHandle4.get()); + + data2.m_ViewOrigins.push_back(window3); + data2.m_ViewOrigins.push_back(window4); + + std::unique_ptr<armnn::IWorkload> workload2 = workloadFactory.CreateSplitter(data2, info2); + + outputHandle3->Allocate(); + outputHandle4->Allocate(); + + workload2->Execute(); + CopyDataFromITensorHandle(&ret3.output[0][0][0], outputHandle3.get()); CopyDataFromITensorHandle(&ret4.output[0][0][0], outputHandle4.get()); - CopyDataFromITensorHandle(&ret5.output[0][0][0], outputHandle5.get()); - std::vector<LayerTestResult<T,3>> ret = {ret1, ret2, ret3, ret4, ret5}; + std::vector<LayerTestResult<T,3>> ret = {ret1, ret2, ret3, ret4,}; return ret; } |