diff options
author | Matteo Martincigh <matteo.martincigh@arm.com> | 2018-12-18 09:26:39 +0000 |
---|---|---|
committer | Matteo Martincigh <matteo.martincigh@arm.com> | 2019-01-04 17:28:07 +0000 |
commit | 747ef82c88f9afe14a8b80b6b3b34118353e97f2 (patch) | |
tree | a29ac33b84fb96a41103a0a97327189495374cc9 /src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp | |
parent | 760892724d131c7da4b9baad05cddd49276ad6bb (diff) | |
download | armnn-747ef82c88f9afe14a8b80b6b3b34118353e97f2.tar.gz |
MLCE-77 Depthwise Convolution with depth multiplier > 1 doesn't work
* Unified ArmNN's weight format to [ M, I, H, W ] for the depthwise convolution
* Added conversion utilities to permute/reshape the weights as appropriate
when using CL and Neon backends
* Updated the reference implementation of the convolution
* Updated the relevant unit tests accordingly
!android-nn-driver:459
Change-Id: I07d0818efa9d1ca1e5dad82983aac1fe78eadb18
Diffstat (limited to 'src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp')
-rw-r--r-- | src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp | 49 |
1 files changed, 34 insertions, 15 deletions
diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp index 9cadbf09ac..1745b8297a 100644 --- a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp +++ b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp @@ -12,6 +12,7 @@ #include <aclCommon/ArmComputeTensorUtils.hpp> #include <cl/ClTensorHandle.hpp> #include <backendsCommon/CpuTensorHandle.hpp> +#include <backendsCommon/WorkloadUtils.hpp> #include <arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h> @@ -21,14 +22,23 @@ namespace armnn using namespace armcomputetensorutils; arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const DepthwiseConvolution2dDescriptor& descriptor, - const TensorInfo& weights, - const Optional<TensorInfo>& biases) + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const Optional<TensorInfo>& biases) { - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); - const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout); + + // ArmNN's weight format is [ M, I, H, W ] + const unsigned int aclDepthMultiplier = weights.GetShape()[0]; + + // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either + // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library + TensorInfo weightsPermuted = ConvertWeightTensorInfoFromArmnnToAcl(weights, descriptor.m_DataLayout); + + // Convert the weights into the compute library format + const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout); arm_compute::TensorInfo aclBiasesInfo; arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr; @@ -42,7 +52,6 @@ arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& inp } const arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor); - const unsigned int aclDepthMultiplier = weights.GetShape()[0]; return arm_compute::CLDepthwiseConvolutionLayer::validate(&aclInputInfo, &aclWeightsInfo, @@ -57,10 +66,18 @@ ClDepthwiseConvolutionWorkload::ClDepthwiseConvolutionWorkload( const WorkloadInfo& info) : BaseWorkload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info) { - auto& weightInfo = m_Data.m_Weight->GetTensorInfo(); + // Allocate a buffer for the swizzling of the weight tensor + std::unique_ptr<unsigned char[]> permuteBuffer(new unsigned char[m_Data.m_Weight->GetTensorInfo().GetNumBytes()]); + + // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either + // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library + ConstTensor weightPermuted = ConvertWeightTensorFromArmnnToAcl(m_Data.m_Weight, + m_Data.m_Parameters.m_DataLayout, + permuteBuffer.get()); + // Convert the weights into the compute library format m_KernelTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_KernelTensor, weightInfo, m_Data.m_Parameters.m_DataLayout); + BuildArmComputeTensor(*m_KernelTensor, weightPermuted.GetInfo(), m_Data.m_Parameters.m_DataLayout); if (m_Data.m_Parameters.m_BiasEnabled) { @@ -86,13 +103,14 @@ ClDepthwiseConvolutionWorkload::ClDepthwiseConvolutionWorkload( input.info()->set_data_layout(aclDataLayout); output.info()->set_data_layout(aclDataLayout); - const unsigned int depthMultiplier = weightInfo.GetShape()[0]; + // ArmNN's weight format is [ M, I, H, W ] + auto& weightInfo = m_Data.m_Weight->GetTensorInfo(); - const unsigned int widthIndex = (m_Data.m_Parameters.m_DataLayout == DataLayout::NCHW) ? 3 : 2; - const unsigned int heightIndex = (m_Data.m_Parameters.m_DataLayout == DataLayout::NCHW) ? 2 : 1; + // Get the depth multiplier + const unsigned int depthMultiplier = weightInfo.GetShape()[0]; - //Check for optimisation opportunities. - bool use3x3Optimisation = (weightInfo.GetShape()[widthIndex] == 3) && (weightInfo.GetShape()[heightIndex] == 3); + // Check for optimisation opportunities. + bool use3x3Optimisation = (weightInfo.GetShape()[2] == 3) && (weightInfo.GetShape()[3] == 3); if (use3x3Optimisation) { m_DepthwiseConvolutionLayer = std::make_unique<arm_compute::CLDepthwiseConvolutionLayer3x3>(); @@ -118,7 +136,8 @@ ClDepthwiseConvolutionWorkload::ClDepthwiseConvolutionWorkload( BOOST_ASSERT(m_DepthwiseConvolutionLayer); - InitializeArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight); + ScopedCpuTensorHandle weightsPermutedHandle(weightPermuted); + InitializeArmComputeClTensorData(*m_KernelTensor, &weightsPermutedHandle); if (m_BiasTensor) { |