diff options
author | Matteo Martincigh <matteo.martincigh@arm.com> | 2018-12-18 09:26:39 +0000 |
---|---|---|
committer | Matteo Martincigh <matteo.martincigh@arm.com> | 2019-01-04 17:28:07 +0000 |
commit | 747ef82c88f9afe14a8b80b6b3b34118353e97f2 (patch) | |
tree | a29ac33b84fb96a41103a0a97327189495374cc9 /src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp | |
parent | 760892724d131c7da4b9baad05cddd49276ad6bb (diff) | |
download | armnn-747ef82c88f9afe14a8b80b6b3b34118353e97f2.tar.gz |
MLCE-77 Depthwise Convolution with depth multiplier > 1 doesn't work
* Unified ArmNN's weight format to [ M, I, H, W ] for the depthwise convolution
* Added conversion utilities to permute/reshape the weights as appropriate
when using CL and Neon backends
* Updated the reference implementation of the convolution
* Updated the relevant unit tests accordingly
!android-nn-driver:459
Change-Id: I07d0818efa9d1ca1e5dad82983aac1fe78eadb18
Diffstat (limited to 'src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp')
-rw-r--r-- | src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp | 72 |
1 files changed, 44 insertions, 28 deletions
diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp index 6cad12cba8..be26359662 100644 --- a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp +++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp @@ -8,10 +8,7 @@ #include <aclCommon/ArmComputeTensorUtils.hpp> #include <neon/NeonLayerSupport.hpp> #include <backendsCommon/CpuTensorHandle.hpp> - -#include <DataLayoutIndexed.hpp> - -using namespace armnnUtils; +#include <backendsCommon/WorkloadUtils.hpp> namespace armnn { @@ -19,17 +16,23 @@ namespace armnn using namespace armcomputetensorutils; arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const DepthwiseConvolution2dDescriptor& descriptor, - const TensorInfo& weights, - const Optional<TensorInfo>& biases) + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const Optional<TensorInfo>& biases) { - const arm_compute::TensorInfo aclInputInfo = - BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); - const arm_compute::TensorInfo aclOutputInfo = - BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); - const arm_compute::TensorInfo aclWeightsInfo = - BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout); + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); + + // ArmNN's weight format is [ M, I, H, W ] + const unsigned int aclDepthMultiplier = weights.GetShape()[0]; + + // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either + // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library + TensorInfo weightsPermuted = ConvertWeightTensorInfoFromArmnnToAcl(weights, descriptor.m_DataLayout); + + // Convert the weights into the compute library format + const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout); arm_compute::TensorInfo aclBiasesInfo; arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr; @@ -42,9 +45,7 @@ arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& i optionalAclBiasesInfo = &aclBiasesInfo; } - const arm_compute::PadStrideInfo aclPadStrideInfo = - BuildArmComputePadStrideInfo(descriptor); - const unsigned int aclDepthMultiplier = weights.GetShape()[0]; + const arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor); return arm_compute::NEDepthwiseConvolutionLayer::validate(&aclInputInfo, &aclWeightsInfo, @@ -59,14 +60,21 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload( const WorkloadInfo& info) : BaseWorkload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info) { - const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); + // ArmNN's weight format is [ M, I, H, W ] + auto& weightInfo = m_Data.m_Weight->GetTensorInfo(); - m_KernelTensor = std::make_unique<arm_compute::Tensor>(); - BuildArmComputeTensor(*m_KernelTensor, weightInfo, m_Data.m_Parameters.m_DataLayout); + // Allocate a buffer for the swizzling of the weight tensor + std::unique_ptr<unsigned char[]> permuteBuffer(new unsigned char[m_Data.m_Weight->GetTensorInfo().GetNumBytes()]); - INeonTensorHandle* inputTensorHandle = static_cast<INeonTensorHandle*>(m_Data.m_Inputs[0]); - INeonTensorHandle* outputTensorHandle = static_cast<INeonTensorHandle*>(m_Data.m_Outputs[0]); - DataLayoutIndexed dataLayoutIndex(m_Data.m_Parameters.m_DataLayout); + // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either + // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library + ConstTensor weightPermuted = ConvertWeightTensorFromArmnnToAcl(m_Data.m_Weight, + m_Data.m_Parameters.m_DataLayout, + permuteBuffer.get()); + + // Convert the weights into the compute library format + m_KernelTensor = std::make_unique<arm_compute::Tensor>(); + BuildArmComputeTensor(*m_KernelTensor, weightPermuted.GetInfo(), m_Data.m_Parameters.m_DataLayout); if (m_Data.m_Parameters.m_BiasEnabled) { @@ -84,6 +92,9 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload( m_Data.ValidateInputsOutputs("NeonDepthwiseConvolutionWorkload", 1, 1); + INeonTensorHandle* inputTensorHandle = static_cast<INeonTensorHandle*>(m_Data.m_Inputs[0]); + INeonTensorHandle* outputTensorHandle = static_cast<INeonTensorHandle*>(m_Data.m_Outputs[0]); + arm_compute::ITensor& input = inputTensorHandle->GetTensor(); arm_compute::ITensor& output = outputTensorHandle->GetTensor(); @@ -91,9 +102,11 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload( input.info()->set_data_layout(aclDataLayout); output.info()->set_data_layout(aclDataLayout); - bool use3x3Optimisation = weightInfo.GetShape()[dataLayoutIndex.GetWidthIndex()] == 3 && - weightInfo.GetShape()[dataLayoutIndex.GetHeightIndex()] == 3; + // Get the depth multiplier + const unsigned int depthMultiplier = weightInfo.GetShape()[0]; + // Check for optimisation opportunities. + bool use3x3Optimisation = (weightInfo.GetShape()[2] == 3) && (weightInfo.GetShape()[3] == 3); if (use3x3Optimisation) { m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer3x3>(); @@ -102,7 +115,8 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload( m_KernelTensor.get(), m_BiasTensor.get(), &output, - padStrideInfo); + padStrideInfo, + depthMultiplier); } else { @@ -112,12 +126,14 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload( m_KernelTensor.get(), m_BiasTensor.get(), &output, - padStrideInfo); + padStrideInfo, + depthMultiplier); } BOOST_ASSERT(m_pDepthwiseConvolutionLayer); - InitializeArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight); + ScopedCpuTensorHandle weightsPermutedHandle(weightPermuted); + InitializeArmComputeTensorData(*m_KernelTensor, &weightsPermutedHandle); if (m_Data.m_Parameters.m_BiasEnabled) { |