From 4b19d2249e3b8f9216ec5b410fad20c41b4c6053 Mon Sep 17 00:00:00 2001 From: Cathal Corbett Date: Wed, 11 May 2022 20:12:17 +0100 Subject: IVGCVSW-6940 ConstTensorsAsInput: DepthwiseConvolution2d - Complete ACL * Added backend specific optimization & test for CpuAcc and GpuAcc: PermuteDepthwiseConv2dWeights Signed-off-by: Cathal Corbett Change-Id: I600476b2e9c557a39818a574c1091c9d650b21b1 --- .../test/layerTests/Conv2dTestImpl.cpp | 97 ++++++++++++++++++++-- 1 file changed, 88 insertions(+), 9 deletions(-) (limited to 'src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp') diff --git a/src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp index 4203fed23a..74c65e271c 100644 --- a/src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp +++ b/src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp @@ -1713,6 +1713,20 @@ LayerTestResult DepthwiseConvolution2dAsymmetricTestImpl( inputData = tmp; } + std::vector kernelData; + kernelData.assign(kernel.data(), kernel.data() + kernelHeight * kernelWidth * outputChannels); + if (workloadFactory.GetBackendId() == armnn::BackendId("GpuAcc") || + workloadFactory.GetBackendId() == armnn::BackendId("CpuAcc")) + { + if (layout == armnn::DataLayout::NCHW) + { + std::vector tmp(kernelData.size()); + kernelDesc.SetShape(armnnUtils::Permuted(kernelDesc.GetShape(), {0, 2, 3, 1})); + armnnUtils::Permute(kernelDesc.GetShape(), {0, 2, 3, 1}, kernelData.data(), tmp.data(), sizeof(T)); + kernelData = tmp; + } + } + // Construct the output data, with bias applied, as appropriate. std::vector outputData; outputData.assign(outputExpected.data(), outputExpected.data() + outputChannels*outputHeight*outputWidth); @@ -1751,8 +1765,8 @@ LayerTestResult DepthwiseConvolution2dAsymmetricTestImpl( // 2) ITensorHandle (converts to Backend TensorHandle) required in RefWorkload for GetTensorInfo() method. // Cannot PolymorphicDowncast from ScopedTensorHandle->RefTensorHandle. // Need to PolymorphicDowncast from ITensorHandle->RefTensorHandle. - AllocateAndCopyDataToITensorHandle(&weightsTensor, kernel.data()); - AllocateAndCopyDataToITensorHandle(weightsHandle.get(), kernel.data()); // required for ConstantTensor + AllocateAndCopyDataToITensorHandle(&weightsTensor, kernelData.data()); + AllocateAndCopyDataToITensorHandle(weightsHandle.get(), kernelData.data()); // required for ConstantTensor AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); AddInputToWorkload(data, info, kernelDesc, weightsHandle.get()); @@ -1881,6 +1895,18 @@ LayerTestResult DepthwiseConvolution2dDepthMul1TestImpl( kernelDesc.GetQuantizationScale(), kernelDesc.GetQuantizationOffset())); + if (workloadFactory.GetBackendId() == armnn::BackendId("GpuAcc") || + workloadFactory.GetBackendId() == armnn::BackendId("CpuAcc")) + { + if (layout == armnn::DataLayout::NCHW) + { + std::vector tmp(kernelData.size()); + kernelDesc.SetShape(armnnUtils::Permuted(kernelDesc.GetShape(), {0, 2, 3, 1})); + armnnUtils::Permute(kernelDesc.GetShape(), {0, 2, 3, 1}, kernelData.data(), tmp.data(), sizeof(T)); + kernelData = tmp; + } + } + // Manually calculated. std::vector outputImage( QuantizedVector({ 0.f, 0.f }, @@ -2077,6 +2103,18 @@ LayerTestResult DepthwiseConvolution2dTestImpl( kernelDesc.GetQuantizationScale(), kernelDesc.GetQuantizationOffset())); + if (workloadFactory.GetBackendId() == armnn::BackendId("GpuAcc") || + workloadFactory.GetBackendId() == armnn::BackendId("CpuAcc")) + { + if (layout == armnn::DataLayout::NCHW) + { + std::vector tmp(kernelData.size()); + kernelDesc.SetShape(armnnUtils::Permuted(kernelDesc.GetShape(), {0, 2, 3, 1})); + armnnUtils::Permute(kernelDesc.GetShape(), {0, 2, 3, 1}, kernelData.data(), tmp.data(), sizeof(T)); + kernelData = tmp; + } + } + // Manually calculated. std::vector originalOutputImage = std::vector( QuantizedVector({ @@ -2251,6 +2289,20 @@ LayerTestResult DepthwiseConvolution2dTestImpl( biasDesc.SetQuantizationOffset(0); } + std::vector kernelData; + kernelData.assign(originalKernel.data(), originalKernel.data() + kernelHeight*kernelWidth*outputChannels); + if (workloadFactory.GetBackendId() == armnn::BackendId("GpuAcc") || + workloadFactory.GetBackendId() == armnn::BackendId("CpuAcc")) + { + if (layout == armnn::DataLayout::NCHW) + { + std::vector tmp(kernelData.size()); + kernelDesc.SetShape(armnnUtils::Permuted(kernelDesc.GetShape(), {0, 2, 3, 1})); + armnnUtils::Permute(kernelDesc.GetShape(), {0, 2, 3, 1}, kernelData.data(), tmp.data(), sizeof(T)); + kernelData = tmp; + } + } + // Construct input data std::vector input; input.assign(originalInput.data(), originalInput.data() + 1*inputChannels*inputHeight*inputWidth); @@ -2309,8 +2361,8 @@ LayerTestResult DepthwiseConvolution2dTestImpl( // See comment in DepthwiseConvolution2dAsymmetricTestImpl() for reasons. // 1) ScopedTensorHandle (weightsTensor) required for QueueDescriptor (data.m_Weight). // 2) ITensorHandle (converts to Backend TensorHandle) required in RefWorkload for GetTensorInfo() method. - AllocateAndCopyDataToITensorHandle(&weightsTensor, originalKernel.data()); // required for QueueDescriptor - AllocateAndCopyDataToITensorHandle(weightsHandle.get(), originalKernel.data()); // required for ConstantTensor + AllocateAndCopyDataToITensorHandle(&weightsTensor, kernelData.data()); // required for QueueDescriptor + AllocateAndCopyDataToITensorHandle(weightsHandle.get(), kernelData.data()); // required for ConstantTensor AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); AddInputToWorkload(data, info, kernelDesc, weightsHandle.get()); @@ -3029,22 +3081,37 @@ LayerTestResult CompareDepthwiseConvolution2dTestImpl( auto kernel = MakeRandomTensor(kernelDesc, 891234, 0.0f, 255.0f); auto bias = MakeRandomTensor::Type>(biasDesc, 1028, 0.0f, 255.0f); + armnn::TensorInfo aclKernelDescriptor = kernelDesc; + std::vector aclKernelData; + aclKernelData.assign(kernel.data(), kernel.data() + kernelHeight * kernelWidth * outputChannels); + if (workloadFactory.GetBackendId() == armnn::BackendId("GpuAcc") || + workloadFactory.GetBackendId() == armnn::BackendId("CpuAcc")) + { + if (layout == armnn::DataLayout::NCHW) + { + std::vector tmp(kernel.size()); + aclKernelDescriptor.SetShape(armnnUtils::Permuted(kernelDesc.GetShape(), {0, 2, 3, 1})); + armnnUtils::Permute(kernelDesc.GetShape(), {0, 2, 3, 1}, kernel.data(), tmp.data(), sizeof(T)); + aclKernelData = tmp; + } + } + std::vector actualOutput(outputTensorInfo.GetNumElements()); std::vector expectedOutput(outputTensorInfo.GetNumElements()); std::unique_ptr inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr weightsHandle = tensorHandleFactory.CreateTensorHandle(kernelDesc); + std::unique_ptr weightsHandle = tensorHandleFactory.CreateTensorHandle(aclKernelDescriptor); std::unique_ptr biasHandle = tensorHandleFactory.CreateTensorHandle(biasDesc); std::unique_ptr outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo); armnn::DepthwiseConvolution2dQueueDescriptor data; armnn::WorkloadInfo info; - armnn::ScopedTensorHandle weightsTensor(kernelDesc); + armnn::ScopedTensorHandle weightsTensor(aclKernelDescriptor); armnn::ScopedTensorHandle biasTensor(biasDesc); AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); - AddInputToWorkload(data, info, kernelDesc, weightsHandle.get()); + AddInputToWorkload(data, info, aclKernelDescriptor, weightsHandle.get()); AddInputToWorkload(data, info, biasDesc, biasHandle.get()); AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); @@ -3052,8 +3119,8 @@ LayerTestResult CompareDepthwiseConvolution2dTestImpl( // See comment in DepthwiseConvolution2dAsymmetricTestImpl() for reasons. // 1) ScopedTensorHandle (weightsTensor) required for QueueDescriptor (data.m_Weight). // 2) ITensorHandle (converts to Backend TensorHandle) required in RefWorkload for GetTensorInfo() method. - AllocateAndCopyDataToITensorHandle(weightsHandle.get(), kernel.data()); - AllocateAndCopyDataToITensorHandle(&weightsTensor, kernel.data()); + AllocateAndCopyDataToITensorHandle(weightsHandle.get(), aclKernelData.data()); + AllocateAndCopyDataToITensorHandle(&weightsTensor, aclKernelData.data()); AllocateAndCopyDataToITensorHandle(biasHandle.get(), bias.data()); AllocateAndCopyDataToITensorHandle(&biasTensor, bias.data()); @@ -3788,6 +3855,18 @@ LayerTestResult DepthwiseConvolution2dPerAxisQuantTest( 1, 1, 1, 1 }; + if (workloadFactory.GetBackendId() == armnn::BackendId("GpuAcc") || + workloadFactory.GetBackendId() == armnn::BackendId("CpuAcc")) + { + if (layout == armnn::DataLayout::NCHW) + { + std::vector tmp(kernelData.size()); + kernelInfo.SetShape(armnnUtils::Permuted(kernelInfo.GetShape(), {0, 2, 3, 1})); + armnnUtils::Permute(kernelInfo.GetShape(), {0, 2, 3, 1}, kernelData.data(), tmp.data(), sizeof(int8_t)); + kernelData = tmp; + } + } + std::vector biasData = { 4, 4, 4, 4 -- cgit v1.2.1