From 53ef79504b4c881c572735393c2eede5fa556c46 Mon Sep 17 00:00:00 2001 From: Jan Eilers Date: Wed, 2 Jun 2021 12:01:25 +0100 Subject: IVGCVSW-5826 Change weights layout for depthwise to [1,H,W,I*M] * This change is necessary because tflite uses a [1,H,W,I*M] format and uses the I*M dimension for per axis quantization. Our previous layout [M,I,H,W] can't handle the correlating quantization scales. * Updates Onnx-, TfLiteParser and TfliteDelegate * Updates the CpuRef, CpuAcc and GpuAcc backends * Adjusts unit tests * Adds test to ensure models with old layout can still be read and executed * Adds conversion function to previous layout [1,H,W,I*M] --> [M,I,H,W] which can be used by backend developers !android-nn-driver:5553 Signed-off-by: Jan Eilers Change-Id: Ifef23368b8c3702cf315a5838d214f7dc13c0152 --- src/backends/neon/test/NeonLayerTests.cpp | 16 ++++++---- .../workloads/NeonDepthwiseConvolutionWorkload.cpp | 35 ++++++++++------------ 2 files changed, 25 insertions(+), 26 deletions(-) (limited to 'src/backends/neon') diff --git a/src/backends/neon/test/NeonLayerTests.cpp b/src/backends/neon/test/NeonLayerTests.cpp index edc8cb995c..62864f82dc 100644 --- a/src/backends/neon/test/NeonLayerTests.cpp +++ b/src/backends/neon/test/NeonLayerTests.cpp @@ -216,6 +216,11 @@ ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcInt16_3, DepthToSpaceTest3, DataLayout::NHWC); // Depthwise Convolution +ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2d, DepthwiseConvolution2dTest, true, DataLayout::NCHW) +ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2dUint8, DepthwiseConvolution2dUint8Test, true, DataLayout::NCHW) + +ARMNN_AUTO_TEST_CASE_WITH_THF(UnbiasedDepthwiseConvolution2d, DepthwiseConvolution2dTest, false, DataLayout::NCHW) + ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2dDepthMul1, DepthwiseConvolution2dDepthMul1Test, true, DataLayout::NCHW) ARMNN_AUTO_TEST_CASE_WITH_THF(UnbiasedDepthwiseConvolution2dDepthMul1, @@ -291,16 +296,15 @@ TensorInfo CreateOutputTensorInfo(const TensorInfo& inputInfo, unsigned int inHeight = inputShape[2]; unsigned int inBatchSize = inputShape[0]; - unsigned int filterWidth = filterShape[3]; + unsigned int filterWidth = filterShape[2]; unsigned int readWidth = (inWidth + descriptor.m_PadLeft + descriptor.m_PadRight) - (filterWidth); unsigned int outWidth = 1u + (readWidth / descriptor.m_StrideX); - unsigned int filterHeight = filterShape[2]; + unsigned int filterHeight = filterShape[1]; unsigned int readHeight = (inHeight + descriptor.m_PadTop + descriptor.m_PadBottom) - (filterHeight); unsigned int outHeight = 1u + (readHeight / descriptor.m_StrideY); - unsigned int depthMultiplier = filterShape[0]; - unsigned int outChannels = filterShape[1] * depthMultiplier; + unsigned int outChannels = filterShape[3]; unsigned int outBatchSize = inBatchSize; TensorShape outputShape({outBatchSize, outChannels, outHeight, outWidth}); @@ -314,7 +318,7 @@ TEST_CASE("DepthwiseConv2dUtils") TensorInfo inputInfo({1, 1, 10, 10 }, dataType); TensorInfo outputInfo; - TensorInfo weightsInfo3x3({ 1, 1, 3, 3 }, dataType); + TensorInfo weightsInfo3x3({ 1, 3, 3, 1 }, dataType); // [1,H,W,I*M] TensorInfo biasesInfo; DepthwiseConvolution2dDescriptor descriptor; @@ -380,7 +384,7 @@ TEST_CASE("DepthwiseConv2dUtils") weightsInfo1x1, biasesInfo)); // Supported shape 2x2 - TensorInfo weightsInfo2x2({ 1, 1, 2, 2 }, DataType::Float32); + TensorInfo weightsInfo2x2({ 1, 2, 2, 1 }, DataType::Float32); descriptor = MakeDepthwiseConv2dDesc(1, 1); outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo2x2, descriptor, dataType); CHECK(layerSupport.IsDepthwiseConvolutionSupported(inputInfo, outputInfo, descriptor, diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp index ad509076b4..589a951825 100644 --- a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp +++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp @@ -36,12 +36,11 @@ arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& i const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); - // ArmNN's weight format is [ M, I, H, W ] - const unsigned int aclDepthMultiplier = weights.GetShape()[0]; - - // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either - // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library - TensorInfo weightsPermuted = ConvertWeightTensorInfoFromArmnnToAcl(weights, descriptor.m_DataLayout); + // ArmNN's weight format is usually [ M, I, H, W ] but for depthwise its [ 1, H, W, I*M] + // Permute to [ 1, I * M, H, W ] (if NCHW), as required by the compute library + unsigned int aclDepthMultiplier; + TensorInfo weightsPermuted; + std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input,descriptor.m_DataLayout); // Convert the weights into the compute library format const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout); @@ -79,21 +78,20 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload( const WorkloadInfo& info) : BaseWorkload(descriptor, info) { - // ArmNN's weight format is [ M, I, H, W ] + // ArmNN's weight format for depthwise is [ 1, H, W, I*M ] auto& weightInfo = m_Data.m_Weight->GetTensorInfo(); - // Allocate a buffer for the swizzling of the weight tensor - std::unique_ptr permuteBuffer(new unsigned char[m_Data.m_Weight->GetTensorInfo().GetNumBytes()]); - - // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either - // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library - ConstTensor weightPermuted = ConvertWeightTensorFromArmnnToAcl(m_Data.m_Weight, - m_Data.m_Parameters.m_DataLayout, - permuteBuffer.get()); + ConstTensor weightsPermuted; + unsigned int depthMultiplier; + std::unique_ptr permuteBuffer(new unsigned char[weightInfo.GetNumBytes()]); + std::tie(weightsPermuted, depthMultiplier) = Convert1HWOTensorToAcl(m_Data.m_Weight, + info.m_InputTensorInfos[0], + m_Data.m_Parameters.m_DataLayout, + permuteBuffer.get()); // Convert the weights into the compute library format m_KernelTensor = std::make_unique(); - BuildArmComputeTensor(*m_KernelTensor, weightPermuted.GetInfo(), m_Data.m_Parameters.m_DataLayout); + BuildArmComputeTensor(*m_KernelTensor, weightsPermuted.GetInfo(), m_Data.m_Parameters.m_DataLayout); if (m_Data.m_Parameters.m_BiasEnabled) { @@ -116,9 +114,6 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload( input.info()->set_data_layout(aclDataLayout); output.info()->set_data_layout(aclDataLayout); - // Get the depth multiplier - const unsigned int depthMultiplier = weightInfo.GetShape()[0]; - arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(m_Data.m_Parameters); const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor); @@ -136,7 +131,7 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload( ARMNN_ASSERT(m_pDepthwiseConvolutionLayer); - ScopedTensorHandle weightsPermutedHandle(weightPermuted); + ScopedTensorHandle weightsPermutedHandle(weightsPermuted); InitializeArmComputeTensorData(*m_KernelTensor, &weightsPermutedHandle); if (m_Data.m_Parameters.m_BiasEnabled) -- cgit v1.2.1