aboutsummaryrefslogtreecommitdiff
path: root/src/backends/neon
diff options
context:
space:
mode:
Diffstat (limited to 'src/backends/neon')
-rw-r--r--src/backends/neon/test/NeonLayerTests.cpp16
-rw-r--r--src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp35
2 files changed, 25 insertions, 26 deletions
diff --git a/src/backends/neon/test/NeonLayerTests.cpp b/src/backends/neon/test/NeonLayerTests.cpp
index edc8cb995c..62864f82dc 100644
--- a/src/backends/neon/test/NeonLayerTests.cpp
+++ b/src/backends/neon/test/NeonLayerTests.cpp
@@ -216,6 +216,11 @@ ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcInt16_3, DepthToSpaceTest3<DataType::QSymmS
ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcInt16_4, DepthToSpaceTest4<DataType::QSymmS16>, DataLayout::NHWC);
// Depthwise Convolution
+ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2d, DepthwiseConvolution2dTest, true, DataLayout::NCHW)
+ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2dUint8, DepthwiseConvolution2dUint8Test, true, DataLayout::NCHW)
+
+ARMNN_AUTO_TEST_CASE_WITH_THF(UnbiasedDepthwiseConvolution2d, DepthwiseConvolution2dTest, false, DataLayout::NCHW)
+
ARMNN_AUTO_TEST_CASE_WITH_THF(DepthwiseConvolution2dDepthMul1,
DepthwiseConvolution2dDepthMul1Test, true, DataLayout::NCHW)
ARMNN_AUTO_TEST_CASE_WITH_THF(UnbiasedDepthwiseConvolution2dDepthMul1,
@@ -291,16 +296,15 @@ TensorInfo CreateOutputTensorInfo(const TensorInfo& inputInfo,
unsigned int inHeight = inputShape[2];
unsigned int inBatchSize = inputShape[0];
- unsigned int filterWidth = filterShape[3];
+ unsigned int filterWidth = filterShape[2];
unsigned int readWidth = (inWidth + descriptor.m_PadLeft + descriptor.m_PadRight) - (filterWidth);
unsigned int outWidth = 1u + (readWidth / descriptor.m_StrideX);
- unsigned int filterHeight = filterShape[2];
+ unsigned int filterHeight = filterShape[1];
unsigned int readHeight = (inHeight + descriptor.m_PadTop + descriptor.m_PadBottom) - (filterHeight);
unsigned int outHeight = 1u + (readHeight / descriptor.m_StrideY);
- unsigned int depthMultiplier = filterShape[0];
- unsigned int outChannels = filterShape[1] * depthMultiplier;
+ unsigned int outChannels = filterShape[3];
unsigned int outBatchSize = inBatchSize;
TensorShape outputShape({outBatchSize, outChannels, outHeight, outWidth});
@@ -314,7 +318,7 @@ TEST_CASE("DepthwiseConv2dUtils")
TensorInfo inputInfo({1, 1, 10, 10 }, dataType);
TensorInfo outputInfo;
- TensorInfo weightsInfo3x3({ 1, 1, 3, 3 }, dataType);
+ TensorInfo weightsInfo3x3({ 1, 3, 3, 1 }, dataType); // [1,H,W,I*M]
TensorInfo biasesInfo;
DepthwiseConvolution2dDescriptor descriptor;
@@ -380,7 +384,7 @@ TEST_CASE("DepthwiseConv2dUtils")
weightsInfo1x1, biasesInfo));
// Supported shape 2x2
- TensorInfo weightsInfo2x2({ 1, 1, 2, 2 }, DataType::Float32);
+ TensorInfo weightsInfo2x2({ 1, 2, 2, 1 }, DataType::Float32);
descriptor = MakeDepthwiseConv2dDesc(1, 1);
outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo2x2, descriptor, dataType);
CHECK(layerSupport.IsDepthwiseConvolutionSupported(inputInfo, outputInfo, descriptor,
diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp
index ad509076b4..589a951825 100644
--- a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp
+++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp
@@ -36,12 +36,11 @@ arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& i
const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
- // ArmNN's weight format is [ M, I, H, W ]
- const unsigned int aclDepthMultiplier = weights.GetShape()[0];
-
- // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
- // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
- TensorInfo weightsPermuted = ConvertWeightTensorInfoFromArmnnToAcl(weights, descriptor.m_DataLayout);
+ // ArmNN's weight format is usually [ M, I, H, W ] but for depthwise its [ 1, H, W, I*M]
+ // Permute to [ 1, I * M, H, W ] (if NCHW), as required by the compute library
+ unsigned int aclDepthMultiplier;
+ TensorInfo weightsPermuted;
+ std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input,descriptor.m_DataLayout);
// Convert the weights into the compute library format
const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
@@ -79,21 +78,20 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload(
const WorkloadInfo& info)
: BaseWorkload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info)
{
- // ArmNN's weight format is [ M, I, H, W ]
+ // ArmNN's weight format for depthwise is [ 1, H, W, I*M ]
auto& weightInfo = m_Data.m_Weight->GetTensorInfo();
- // Allocate a buffer for the swizzling of the weight tensor
- std::unique_ptr<unsigned char[]> permuteBuffer(new unsigned char[m_Data.m_Weight->GetTensorInfo().GetNumBytes()]);
-
- // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
- // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
- ConstTensor weightPermuted = ConvertWeightTensorFromArmnnToAcl(m_Data.m_Weight,
- m_Data.m_Parameters.m_DataLayout,
- permuteBuffer.get());
+ ConstTensor weightsPermuted;
+ unsigned int depthMultiplier;
+ std::unique_ptr<unsigned char[]> permuteBuffer(new unsigned char[weightInfo.GetNumBytes()]);
+ std::tie(weightsPermuted, depthMultiplier) = Convert1HWOTensorToAcl(m_Data.m_Weight,
+ info.m_InputTensorInfos[0],
+ m_Data.m_Parameters.m_DataLayout,
+ permuteBuffer.get());
// Convert the weights into the compute library format
m_KernelTensor = std::make_unique<arm_compute::Tensor>();
- BuildArmComputeTensor(*m_KernelTensor, weightPermuted.GetInfo(), m_Data.m_Parameters.m_DataLayout);
+ BuildArmComputeTensor(*m_KernelTensor, weightsPermuted.GetInfo(), m_Data.m_Parameters.m_DataLayout);
if (m_Data.m_Parameters.m_BiasEnabled)
{
@@ -116,9 +114,6 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload(
input.info()->set_data_layout(aclDataLayout);
output.info()->set_data_layout(aclDataLayout);
- // Get the depth multiplier
- const unsigned int depthMultiplier = weightInfo.GetShape()[0];
-
arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(m_Data.m_Parameters);
const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
@@ -136,7 +131,7 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload(
ARMNN_ASSERT(m_pDepthwiseConvolutionLayer);
- ScopedTensorHandle weightsPermutedHandle(weightPermuted);
+ ScopedTensorHandle weightsPermutedHandle(weightsPermuted);
InitializeArmComputeTensorData(*m_KernelTensor, &weightsPermutedHandle);
if (m_Data.m_Parameters.m_BiasEnabled)