From 53ef79504b4c881c572735393c2eede5fa556c46 Mon Sep 17 00:00:00 2001 From: Jan Eilers Date: Wed, 2 Jun 2021 12:01:25 +0100 Subject: IVGCVSW-5826 Change weights layout for depthwise to [1,H,W,I*M] * This change is necessary because tflite uses a [1,H,W,I*M] format and uses the I*M dimension for per axis quantization. Our previous layout [M,I,H,W] can't handle the correlating quantization scales. * Updates Onnx-, TfLiteParser and TfliteDelegate * Updates the CpuRef, CpuAcc and GpuAcc backends * Adjusts unit tests * Adds test to ensure models with old layout can still be read and executed * Adds conversion function to previous layout [1,H,W,I*M] --> [M,I,H,W] which can be used by backend developers !android-nn-driver:5553 Signed-off-by: Jan Eilers Change-Id: Ifef23368b8c3702cf315a5838d214f7dc13c0152 --- .../workloads/NeonDepthwiseConvolutionWorkload.cpp | 35 ++++++++++------------ 1 file changed, 15 insertions(+), 20 deletions(-) (limited to 'src/backends/neon/workloads') diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp index ad509076b4..589a951825 100644 --- a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp +++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp @@ -36,12 +36,11 @@ arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& i const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); - // ArmNN's weight format is [ M, I, H, W ] - const unsigned int aclDepthMultiplier = weights.GetShape()[0]; - - // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either - // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library - TensorInfo weightsPermuted = ConvertWeightTensorInfoFromArmnnToAcl(weights, descriptor.m_DataLayout); + // ArmNN's weight format is usually [ M, I, H, W ] but for depthwise its [ 1, H, W, I*M] + // Permute to [ 1, I * M, H, W ] (if NCHW), as required by the compute library + unsigned int aclDepthMultiplier; + TensorInfo weightsPermuted; + std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input,descriptor.m_DataLayout); // Convert the weights into the compute library format const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout); @@ -79,21 +78,20 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload( const WorkloadInfo& info) : BaseWorkload(descriptor, info) { - // ArmNN's weight format is [ M, I, H, W ] + // ArmNN's weight format for depthwise is [ 1, H, W, I*M ] auto& weightInfo = m_Data.m_Weight->GetTensorInfo(); - // Allocate a buffer for the swizzling of the weight tensor - std::unique_ptr permuteBuffer(new unsigned char[m_Data.m_Weight->GetTensorInfo().GetNumBytes()]); - - // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either - // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library - ConstTensor weightPermuted = ConvertWeightTensorFromArmnnToAcl(m_Data.m_Weight, - m_Data.m_Parameters.m_DataLayout, - permuteBuffer.get()); + ConstTensor weightsPermuted; + unsigned int depthMultiplier; + std::unique_ptr permuteBuffer(new unsigned char[weightInfo.GetNumBytes()]); + std::tie(weightsPermuted, depthMultiplier) = Convert1HWOTensorToAcl(m_Data.m_Weight, + info.m_InputTensorInfos[0], + m_Data.m_Parameters.m_DataLayout, + permuteBuffer.get()); // Convert the weights into the compute library format m_KernelTensor = std::make_unique(); - BuildArmComputeTensor(*m_KernelTensor, weightPermuted.GetInfo(), m_Data.m_Parameters.m_DataLayout); + BuildArmComputeTensor(*m_KernelTensor, weightsPermuted.GetInfo(), m_Data.m_Parameters.m_DataLayout); if (m_Data.m_Parameters.m_BiasEnabled) { @@ -116,9 +114,6 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload( input.info()->set_data_layout(aclDataLayout); output.info()->set_data_layout(aclDataLayout); - // Get the depth multiplier - const unsigned int depthMultiplier = weightInfo.GetShape()[0]; - arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(m_Data.m_Parameters); const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor); @@ -136,7 +131,7 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload( ARMNN_ASSERT(m_pDepthwiseConvolutionLayer); - ScopedTensorHandle weightsPermutedHandle(weightPermuted); + ScopedTensorHandle weightsPermutedHandle(weightsPermuted); InitializeArmComputeTensorData(*m_KernelTensor, &weightsPermutedHandle); if (m_Data.m_Parameters.m_BiasEnabled) -- cgit v1.2.1