From 747ef82c88f9afe14a8b80b6b3b34118353e97f2 Mon Sep 17 00:00:00 2001 From: Matteo Martincigh Date: Tue, 18 Dec 2018 09:26:39 +0000 Subject: MLCE-77 Depthwise Convolution with depth multiplier > 1 doesn't work * Unified ArmNN's weight format to [ M, I, H, W ] for the depthwise convolution * Added conversion utilities to permute/reshape the weights as appropriate when using CL and Neon backends * Updated the reference implementation of the convolution * Updated the relevant unit tests accordingly !android-nn-driver:459 Change-Id: I07d0818efa9d1ca1e5dad82983aac1fe78eadb18 --- .../workloads/NeonDepthwiseConvolutionWorkload.cpp | 72 +++++++++++++--------- 1 file changed, 44 insertions(+), 28 deletions(-) (limited to 'src/backends/neon') diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp index 6cad12cba8..be26359662 100644 --- a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp +++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp @@ -8,10 +8,7 @@ #include #include #include - -#include - -using namespace armnnUtils; +#include namespace armnn { @@ -19,17 +16,23 @@ namespace armnn using namespace armcomputetensorutils; arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const DepthwiseConvolution2dDescriptor& descriptor, - const TensorInfo& weights, - const Optional& biases) + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const Optional& biases) { - const arm_compute::TensorInfo aclInputInfo = - BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); - const arm_compute::TensorInfo aclOutputInfo = - BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); - const arm_compute::TensorInfo aclWeightsInfo = - BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout); + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); + + // ArmNN's weight format is [ M, I, H, W ] + const unsigned int aclDepthMultiplier = weights.GetShape()[0]; + + // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either + // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library + TensorInfo weightsPermuted = ConvertWeightTensorInfoFromArmnnToAcl(weights, descriptor.m_DataLayout); + + // Convert the weights into the compute library format + const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout); arm_compute::TensorInfo aclBiasesInfo; arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr; @@ -42,9 +45,7 @@ arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& i optionalAclBiasesInfo = &aclBiasesInfo; } - const arm_compute::PadStrideInfo aclPadStrideInfo = - BuildArmComputePadStrideInfo(descriptor); - const unsigned int aclDepthMultiplier = weights.GetShape()[0]; + const arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor); return arm_compute::NEDepthwiseConvolutionLayer::validate(&aclInputInfo, &aclWeightsInfo, @@ -59,14 +60,21 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload( const WorkloadInfo& info) : BaseWorkload(descriptor, info) { - const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); + // ArmNN's weight format is [ M, I, H, W ] + auto& weightInfo = m_Data.m_Weight->GetTensorInfo(); - m_KernelTensor = std::make_unique(); - BuildArmComputeTensor(*m_KernelTensor, weightInfo, m_Data.m_Parameters.m_DataLayout); + // Allocate a buffer for the swizzling of the weight tensor + std::unique_ptr permuteBuffer(new unsigned char[m_Data.m_Weight->GetTensorInfo().GetNumBytes()]); - INeonTensorHandle* inputTensorHandle = static_cast(m_Data.m_Inputs[0]); - INeonTensorHandle* outputTensorHandle = static_cast(m_Data.m_Outputs[0]); - DataLayoutIndexed dataLayoutIndex(m_Data.m_Parameters.m_DataLayout); + // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either + // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library + ConstTensor weightPermuted = ConvertWeightTensorFromArmnnToAcl(m_Data.m_Weight, + m_Data.m_Parameters.m_DataLayout, + permuteBuffer.get()); + + // Convert the weights into the compute library format + m_KernelTensor = std::make_unique(); + BuildArmComputeTensor(*m_KernelTensor, weightPermuted.GetInfo(), m_Data.m_Parameters.m_DataLayout); if (m_Data.m_Parameters.m_BiasEnabled) { @@ -84,6 +92,9 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload( m_Data.ValidateInputsOutputs("NeonDepthwiseConvolutionWorkload", 1, 1); + INeonTensorHandle* inputTensorHandle = static_cast(m_Data.m_Inputs[0]); + INeonTensorHandle* outputTensorHandle = static_cast(m_Data.m_Outputs[0]); + arm_compute::ITensor& input = inputTensorHandle->GetTensor(); arm_compute::ITensor& output = outputTensorHandle->GetTensor(); @@ -91,9 +102,11 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload( input.info()->set_data_layout(aclDataLayout); output.info()->set_data_layout(aclDataLayout); - bool use3x3Optimisation = weightInfo.GetShape()[dataLayoutIndex.GetWidthIndex()] == 3 && - weightInfo.GetShape()[dataLayoutIndex.GetHeightIndex()] == 3; + // Get the depth multiplier + const unsigned int depthMultiplier = weightInfo.GetShape()[0]; + // Check for optimisation opportunities. + bool use3x3Optimisation = (weightInfo.GetShape()[2] == 3) && (weightInfo.GetShape()[3] == 3); if (use3x3Optimisation) { m_pDepthwiseConvolutionLayer = std::make_unique(); @@ -102,7 +115,8 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload( m_KernelTensor.get(), m_BiasTensor.get(), &output, - padStrideInfo); + padStrideInfo, + depthMultiplier); } else { @@ -112,12 +126,14 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload( m_KernelTensor.get(), m_BiasTensor.get(), &output, - padStrideInfo); + padStrideInfo, + depthMultiplier); } BOOST_ASSERT(m_pDepthwiseConvolutionLayer); - InitializeArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight); + ScopedCpuTensorHandle weightsPermutedHandle(weightPermuted); + InitializeArmComputeTensorData(*m_KernelTensor, &weightsPermutedHandle); if (m_Data.m_Parameters.m_BiasEnabled) { -- cgit v1.2.1