aboutsummaryrefslogtreecommitdiff
path: root/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp
diff options
context:
space:
mode:
authorMatteo Martincigh <matteo.martincigh@arm.com>2018-12-18 09:26:39 +0000
committerMatteo Martincigh <matteo.martincigh@arm.com>2019-01-04 17:28:07 +0000
commit747ef82c88f9afe14a8b80b6b3b34118353e97f2 (patch)
treea29ac33b84fb96a41103a0a97327189495374cc9 /src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp
parent760892724d131c7da4b9baad05cddd49276ad6bb (diff)
downloadarmnn-747ef82c88f9afe14a8b80b6b3b34118353e97f2.tar.gz
MLCE-77 Depthwise Convolution with depth multiplier > 1 doesn't work
* Unified ArmNN's weight format to [ M, I, H, W ] for the depthwise convolution * Added conversion utilities to permute/reshape the weights as appropriate when using CL and Neon backends * Updated the reference implementation of the convolution * Updated the relevant unit tests accordingly !android-nn-driver:459 Change-Id: I07d0818efa9d1ca1e5dad82983aac1fe78eadb18
Diffstat (limited to 'src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp')
-rw-r--r--src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp72
1 files changed, 44 insertions, 28 deletions
diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp
index 6cad12cba8..be26359662 100644
--- a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp
+++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp
@@ -8,10 +8,7 @@
#include <aclCommon/ArmComputeTensorUtils.hpp>
#include <neon/NeonLayerSupport.hpp>
#include <backendsCommon/CpuTensorHandle.hpp>
-
-#include <DataLayoutIndexed.hpp>
-
-using namespace armnnUtils;
+#include <backendsCommon/WorkloadUtils.hpp>
namespace armnn
{
@@ -19,17 +16,23 @@ namespace armnn
using namespace armcomputetensorutils;
arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& input,
- const TensorInfo& output,
- const DepthwiseConvolution2dDescriptor& descriptor,
- const TensorInfo& weights,
- const Optional<TensorInfo>& biases)
+ const TensorInfo& output,
+ const DepthwiseConvolution2dDescriptor& descriptor,
+ const TensorInfo& weights,
+ const Optional<TensorInfo>& biases)
{
- const arm_compute::TensorInfo aclInputInfo =
- BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
- const arm_compute::TensorInfo aclOutputInfo =
- BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
- const arm_compute::TensorInfo aclWeightsInfo =
- BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
+ const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
+ const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
+
+ // ArmNN's weight format is [ M, I, H, W ]
+ const unsigned int aclDepthMultiplier = weights.GetShape()[0];
+
+ // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
+ // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
+ TensorInfo weightsPermuted = ConvertWeightTensorInfoFromArmnnToAcl(weights, descriptor.m_DataLayout);
+
+ // Convert the weights into the compute library format
+ const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
arm_compute::TensorInfo aclBiasesInfo;
arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
@@ -42,9 +45,7 @@ arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& i
optionalAclBiasesInfo = &aclBiasesInfo;
}
- const arm_compute::PadStrideInfo aclPadStrideInfo =
- BuildArmComputePadStrideInfo(descriptor);
- const unsigned int aclDepthMultiplier = weights.GetShape()[0];
+ const arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor);
return arm_compute::NEDepthwiseConvolutionLayer::validate(&aclInputInfo,
&aclWeightsInfo,
@@ -59,14 +60,21 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload(
const WorkloadInfo& info)
: BaseWorkload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info)
{
- const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo();
+ // ArmNN's weight format is [ M, I, H, W ]
+ auto& weightInfo = m_Data.m_Weight->GetTensorInfo();
- m_KernelTensor = std::make_unique<arm_compute::Tensor>();
- BuildArmComputeTensor(*m_KernelTensor, weightInfo, m_Data.m_Parameters.m_DataLayout);
+ // Allocate a buffer for the swizzling of the weight tensor
+ std::unique_ptr<unsigned char[]> permuteBuffer(new unsigned char[m_Data.m_Weight->GetTensorInfo().GetNumBytes()]);
- INeonTensorHandle* inputTensorHandle = static_cast<INeonTensorHandle*>(m_Data.m_Inputs[0]);
- INeonTensorHandle* outputTensorHandle = static_cast<INeonTensorHandle*>(m_Data.m_Outputs[0]);
- DataLayoutIndexed dataLayoutIndex(m_Data.m_Parameters.m_DataLayout);
+ // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
+ // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
+ ConstTensor weightPermuted = ConvertWeightTensorFromArmnnToAcl(m_Data.m_Weight,
+ m_Data.m_Parameters.m_DataLayout,
+ permuteBuffer.get());
+
+ // Convert the weights into the compute library format
+ m_KernelTensor = std::make_unique<arm_compute::Tensor>();
+ BuildArmComputeTensor(*m_KernelTensor, weightPermuted.GetInfo(), m_Data.m_Parameters.m_DataLayout);
if (m_Data.m_Parameters.m_BiasEnabled)
{
@@ -84,6 +92,9 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload(
m_Data.ValidateInputsOutputs("NeonDepthwiseConvolutionWorkload", 1, 1);
+ INeonTensorHandle* inputTensorHandle = static_cast<INeonTensorHandle*>(m_Data.m_Inputs[0]);
+ INeonTensorHandle* outputTensorHandle = static_cast<INeonTensorHandle*>(m_Data.m_Outputs[0]);
+
arm_compute::ITensor& input = inputTensorHandle->GetTensor();
arm_compute::ITensor& output = outputTensorHandle->GetTensor();
@@ -91,9 +102,11 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload(
input.info()->set_data_layout(aclDataLayout);
output.info()->set_data_layout(aclDataLayout);
- bool use3x3Optimisation = weightInfo.GetShape()[dataLayoutIndex.GetWidthIndex()] == 3 &&
- weightInfo.GetShape()[dataLayoutIndex.GetHeightIndex()] == 3;
+ // Get the depth multiplier
+ const unsigned int depthMultiplier = weightInfo.GetShape()[0];
+ // Check for optimisation opportunities.
+ bool use3x3Optimisation = (weightInfo.GetShape()[2] == 3) && (weightInfo.GetShape()[3] == 3);
if (use3x3Optimisation)
{
m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer3x3>();
@@ -102,7 +115,8 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload(
m_KernelTensor.get(),
m_BiasTensor.get(),
&output,
- padStrideInfo);
+ padStrideInfo,
+ depthMultiplier);
}
else
{
@@ -112,12 +126,14 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload(
m_KernelTensor.get(),
m_BiasTensor.get(),
&output,
- padStrideInfo);
+ padStrideInfo,
+ depthMultiplier);
}
BOOST_ASSERT(m_pDepthwiseConvolutionLayer);
- InitializeArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight);
+ ScopedCpuTensorHandle weightsPermutedHandle(weightPermuted);
+ InitializeArmComputeTensorData(*m_KernelTensor, &weightsPermutedHandle);
if (m_Data.m_Parameters.m_BiasEnabled)
{