aboutsummaryrefslogtreecommitdiff
path: root/src/backends
diff options
context:
space:
mode:
authorMatteo Martincigh <matteo.martincigh@arm.com>2018-12-18 09:26:39 +0000
committerMatteo Martincigh <matteo.martincigh@arm.com>2019-01-04 17:28:07 +0000
commit747ef82c88f9afe14a8b80b6b3b34118353e97f2 (patch)
treea29ac33b84fb96a41103a0a97327189495374cc9 /src/backends
parent760892724d131c7da4b9baad05cddd49276ad6bb (diff)
downloadarmnn-747ef82c88f9afe14a8b80b6b3b34118353e97f2.tar.gz
MLCE-77 Depthwise Convolution with depth multiplier > 1 doesn't work
* Unified ArmNN's weight format to [ M, I, H, W ] for the depthwise convolution * Added conversion utilities to permute/reshape the weights as appropriate when using CL and Neon backends * Updated the reference implementation of the convolution * Updated the relevant unit tests accordingly !android-nn-driver:459 Change-Id: I07d0818efa9d1ca1e5dad82983aac1fe78eadb18
Diffstat (limited to 'src/backends')
-rw-r--r--src/backends/aclCommon/ArmComputeTensorUtils.cpp26
-rw-r--r--src/backends/aclCommon/ArmComputeTensorUtils.hpp8
-rw-r--r--src/backends/backendsCommon/CMakeLists.txt1
-rw-r--r--src/backends/backendsCommon/CpuTensorHandle.cpp4
-rw-r--r--src/backends/backendsCommon/CpuTensorHandle.hpp6
-rw-r--r--src/backends/backendsCommon/WorkloadData.cpp5
-rw-r--r--src/backends/backendsCommon/WorkloadUtils.cpp111
-rw-r--r--src/backends/backendsCommon/WorkloadUtils.hpp41
-rw-r--r--src/backends/backendsCommon/common.mk3
-rwxr-xr-xsrc/backends/backendsCommon/test/Conv2dTestImpl.hpp64
-rwxr-xr-xsrc/backends/backendsCommon/test/LayerTests.cpp30
-rw-r--r--src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp49
-rw-r--r--src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp72
-rw-r--r--src/backends/reference/workloads/ConvImpl.hpp93
-rw-r--r--src/backends/reference/workloads/RefConvolution2dFloat32Workload.cpp8
-rw-r--r--src/backends/reference/workloads/RefConvolution2dUint8Workload.cpp7
-rw-r--r--src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.cpp6
-rw-r--r--src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.cpp7
18 files changed, 332 insertions, 209 deletions
diff --git a/src/backends/aclCommon/ArmComputeTensorUtils.cpp b/src/backends/aclCommon/ArmComputeTensorUtils.cpp
index a2d7d8c797..32af42f7e1 100644
--- a/src/backends/aclCommon/ArmComputeTensorUtils.cpp
+++ b/src/backends/aclCommon/ArmComputeTensorUtils.cpp
@@ -109,19 +109,6 @@ arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tenso
return arm_compute::TensorInfo(aclTensorShape, 1, aclDataType, aclQuantizationInfo);
}
-arm_compute::DataLayout ConvertDataLayout(armnn::DataLayout dataLayout)
-{
- switch(dataLayout)
- {
- case armnn::DataLayout::NHWC : return arm_compute::DataLayout::NHWC;
-
- case armnn::DataLayout::NCHW : return arm_compute::DataLayout::NCHW;
-
- default: throw InvalidArgumentException("Unknown armnn::DataLayout: [" +
- std::to_string(static_cast<int>(dataLayout)) + "]");
- }
-}
-
arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo,
armnn::DataLayout dataLayout)
{
@@ -136,6 +123,19 @@ arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tenso
return clTensorInfo;
}
+arm_compute::DataLayout ConvertDataLayout(armnn::DataLayout dataLayout)
+{
+ switch(dataLayout)
+ {
+ case armnn::DataLayout::NHWC : return arm_compute::DataLayout::NHWC;
+
+ case armnn::DataLayout::NCHW : return arm_compute::DataLayout::NCHW;
+
+ default: throw InvalidArgumentException("Unknown armnn::DataLayout: [" +
+ std::to_string(static_cast<int>(dataLayout)) + "]");
+ }
+}
+
arm_compute::PoolingLayerInfo BuildArmComputePoolingLayerInfo(const Pooling2dDescriptor& descriptor)
{
using arm_compute::PoolingType;
diff --git a/src/backends/aclCommon/ArmComputeTensorUtils.hpp b/src/backends/aclCommon/ArmComputeTensorUtils.hpp
index fbd850c687..fa455b746b 100644
--- a/src/backends/aclCommon/ArmComputeTensorUtils.hpp
+++ b/src/backends/aclCommon/ArmComputeTensorUtils.hpp
@@ -36,16 +36,16 @@ arm_compute::TensorShape BuildArmComputeTensorShape(const armnn::TensorShape& te
/// armnn::ITensorInfo.
arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo);
-/// Utility function used to convert armnn::DataLayout to arm_compute::DataLayout
-/// armnn::DataLayout.
-arm_compute::DataLayout ConvertDataLayout(armnn::DataLayout dataLayout);
-
/// Utility function used to setup an arm_compute::ITensorInfo object whose dimensions are based on the given
/// armnn::ITensorInfo.
/// armnn::DataLayout.
arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo,
armnn::DataLayout dataLayout);
+/// Utility function used to convert armnn::DataLayout to arm_compute::DataLayout
+/// armnn::DataLayout.
+arm_compute::DataLayout ConvertDataLayout(armnn::DataLayout dataLayout);
+
/// Utility function used to setup an arm_compute::PoolingLayerInfo object from an armnn::Pooling2dDescriptor.
arm_compute::PoolingLayerInfo BuildArmComputePoolingLayerInfo(const Pooling2dDescriptor& descriptor);
diff --git a/src/backends/backendsCommon/CMakeLists.txt b/src/backends/backendsCommon/CMakeLists.txt
index f29563093c..b120f51184 100644
--- a/src/backends/backendsCommon/CMakeLists.txt
+++ b/src/backends/backendsCommon/CMakeLists.txt
@@ -27,6 +27,7 @@ list(APPEND armnnBackendsCommon_sources
WorkloadFactory.hpp
Workload.hpp
WorkloadInfo.hpp
+ WorkloadUtils.cpp
WorkloadUtils.hpp
)
diff --git a/src/backends/backendsCommon/CpuTensorHandle.cpp b/src/backends/backendsCommon/CpuTensorHandle.cpp
index fe0c634e7c..9dcd3f38df 100644
--- a/src/backends/backendsCommon/CpuTensorHandle.cpp
+++ b/src/backends/backendsCommon/CpuTensorHandle.cpp
@@ -18,7 +18,7 @@ ConstCpuTensorHandle::ConstCpuTensorHandle(const TensorInfo& tensorInfo)
}
template <>
-const void* ConstCpuTensorHandle::GetConstTensor() const
+const void* ConstCpuTensorHandle::GetConstTensor<void>() const
{
return m_Memory;
}
@@ -30,7 +30,7 @@ CpuTensorHandle::CpuTensorHandle(const TensorInfo& tensorInfo)
}
template <>
-void* CpuTensorHandle::GetTensor() const
+void* CpuTensorHandle::GetTensor<void>() const
{
return m_MutableMemory;
}
diff --git a/src/backends/backendsCommon/CpuTensorHandle.hpp b/src/backends/backendsCommon/CpuTensorHandle.hpp
index ae13d6c439..b88a0d385b 100644
--- a/src/backends/backendsCommon/CpuTensorHandle.hpp
+++ b/src/backends/backendsCommon/CpuTensorHandle.hpp
@@ -72,6 +72,9 @@ private:
const void* m_Memory;
};
+template<>
+const void* ConstCpuTensorHandle::GetConstTensor<void>() const;
+
// Abstract specialization of ConstCpuTensorHandle that allows write access to the same data.
class CpuTensorHandle : public ConstCpuTensorHandle
{
@@ -99,6 +102,9 @@ private:
void* m_MutableMemory;
};
+template <>
+void* CpuTensorHandle::GetTensor<void>() const;
+
// A CpuTensorHandle that owns the wrapped memory region.
class ScopedCpuTensorHandle : public CpuTensorHandle
{
diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp
index 8847b4efbf..1dac498c11 100644
--- a/src/backends/backendsCommon/WorkloadData.cpp
+++ b/src/backends/backendsCommon/WorkloadData.cpp
@@ -593,9 +593,10 @@ void DepthwiseConvolution2dQueueDescriptor::Validate(const WorkloadInfo& workloa
const unsigned int channelIndex = (m_Parameters.m_DataLayout == DataLayout::NCHW) ? 1 : 3;
- //inputChannels * channelMultiplier should be equal to outputChannels.
+ // Expected weight shape: [ M, I, H, W ] - This shape does NOT depend on the data layout
+ // inputChannels * channelMultiplier should be equal to outputChannels.
const unsigned int numWeightChannelMultiplier = m_Weight->GetTensorInfo().GetShape()[0];
- const unsigned int numWeightInputChannels = m_Weight->GetTensorInfo().GetShape()[channelIndex];
+ const unsigned int numWeightInputChannels = m_Weight->GetTensorInfo().GetShape()[1];
const unsigned int numWeightOutputChannels = workloadInfo.m_OutputTensorInfos[0].GetShape()[channelIndex];
if (numWeightChannelMultiplier * numWeightInputChannels != numWeightOutputChannels)
{
diff --git a/src/backends/backendsCommon/WorkloadUtils.cpp b/src/backends/backendsCommon/WorkloadUtils.cpp
new file mode 100644
index 0000000000..fa387a7a0b
--- /dev/null
+++ b/src/backends/backendsCommon/WorkloadUtils.cpp
@@ -0,0 +1,111 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "WorkloadUtils.hpp"
+
+namespace armnn
+{
+
+armnn::ConstTensor PermuteTensor(const ConstCpuTensorHandle* tensor,
+ const PermutationVector& permutationVector,
+ void* permuteBuffer)
+{
+ BOOST_ASSERT_MSG(tensor, "Invalid input tensor");
+ BOOST_ASSERT_MSG(permuteBuffer, "Invalid permute buffer");
+
+ TensorInfo tensorInfo = tensor->GetTensorInfo();
+
+ if (permutationVector.GetSize() > 0)
+ {
+ tensorInfo = armnnUtils::Permuted(tensorInfo, permutationVector);
+ armnnUtils::Permute(tensorInfo.GetShape(), permutationVector,
+ tensor->GetConstTensor<void>(), permuteBuffer,
+ GetDataTypeSize(tensorInfo.GetDataType()));
+ }
+ else
+ {
+ ::memcpy(permuteBuffer, tensor->GetConstTensor<void>(), tensorInfo.GetNumBytes());
+ }
+
+ return ConstTensor(tensorInfo, permuteBuffer);
+}
+
+void ReshapeWeightsForAcl(TensorInfo& weightInfo, DataLayout dataLayout)
+{
+ // Reshape the weights in-place
+ const TensorShape& weightShape = weightInfo.GetShape();
+ switch (dataLayout)
+ {
+ case DataLayout::NHWC:
+ // The data layout is NHWC, reshape from [ H, W, I, M ] to [ 1, H, W, I * M ]
+ weightInfo.SetShape({ 1,
+ weightShape[0],
+ weightShape[1],
+ weightShape[2] * weightShape[3] });
+ break;
+ case DataLayout::NCHW:
+ default:
+ // The data layout is NCHW, reshape from [ M, I, H, W ] to [ 1, I * M, H, W, ]
+ weightInfo.SetShape({ 1,
+ weightShape[0] * weightShape[1],
+ weightShape[2],
+ weightShape[3] });
+ break;
+ }
+}
+
+TensorInfo ConvertWeightTensorInfoFromArmnnToAcl(const TensorInfo& weightInfo, DataLayout dataLayout)
+{
+ // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
+ // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
+
+ // 1. Permute the weights if necessary
+ // If the data layout is NCHW no permutation is necessary, as a reshape to [ 1, I * M, H, W ] can be better done
+ // starting from the current shape of [ M, I, H, W ]
+ TensorInfo weightPermutedInfo(weightInfo);
+ if (dataLayout == DataLayout::NHWC)
+ {
+ // The data layout is NHWC, then permute the weights from [ M, I, H, W ] to [ H, W, I, M ]
+ PermutationVector permutationVector{ 3, 2, 0, 1 };
+ weightPermutedInfo = armnnUtils::Permuted(weightInfo, permutationVector);
+ }
+
+ // 2. Reshape the weights
+ ReshapeWeightsForAcl(weightPermutedInfo, dataLayout);
+
+ // 3. Return the permuted weight info
+ return weightPermutedInfo;
+}
+
+armnn::ConstTensor ConvertWeightTensorFromArmnnToAcl(const ConstCpuTensorHandle* weightTensor,
+ DataLayout dataLayout,
+ void* permuteBuffer)
+{
+ BOOST_ASSERT_MSG(weightTensor, "Invalid input tensor");
+ BOOST_ASSERT_MSG(permuteBuffer, "Invalid permute buffer");
+
+ // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
+ // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
+
+ // 1. Permute the weights if necessary
+ // If the data layout is NCHW no permutation is necessary, as a reshape to [ 1, I * M, H, W ] can be better done
+ // starting from the current shape of [ M, I, H, W ]
+ // If no permutation is necessary, leave the permutation vector empty
+ PermutationVector permutationVector{};
+ if (dataLayout == DataLayout::NHWC)
+ {
+ // The data layout is NHWC, then permute the weights from [ M, I, H, W ] to [ H, W, I, M ]
+ permutationVector = { 3, 2, 0, 1 };
+ }
+ ConstTensor weightPermuted = PermuteTensor(weightTensor, permutationVector, permuteBuffer);
+
+ // 2. Reshape the weights
+ ReshapeWeightsForAcl(weightPermuted.GetInfo(), dataLayout);
+
+ // 3. Return both the tensor and the allocated storage to ensure that the data stays alive
+ return weightPermuted;
+}
+
+} // namespace armnn
diff --git a/src/backends/backendsCommon/WorkloadUtils.hpp b/src/backends/backendsCommon/WorkloadUtils.hpp
index 2b07b2b0d2..a1a8d2a475 100644
--- a/src/backends/backendsCommon/WorkloadUtils.hpp
+++ b/src/backends/backendsCommon/WorkloadUtils.hpp
@@ -6,35 +6,42 @@
#pragma once
#include "ITensorHandle.hpp"
+#include "CpuTensorHandle.hpp"
#include <armnn/Tensor.hpp>
+#include <Permute.hpp>
+#include <Profiling.hpp>
+#include <Half.hpp>
+
#include <boost/cast.hpp>
namespace armnn
{
namespace
{
+
template<typename ArrayType, typename Arg>
void AssignValues(unsigned int num, unsigned int& idx, const ArrayType& array, Arg& arg)
{
- if (idx >= num)
- {
- return;
- }
+ if (idx >= num)
+ {
+ return;
+ }
- arg = array[(num - 1) - idx];
- idx++;
-};
+ arg = array[(num - 1) - idx];
+ idx++;
+}
template<typename T, typename ArrayType, typename ...Args>
void AssignValues(unsigned int num, unsigned int idx, const ArrayType& array, T& assignee, Args& ... args)
{
- AssignValues(num, idx, array, assignee);
+ AssignValues(num, idx, array, assignee);
- AssignValues(num, idx, array, args...);
+ AssignValues(num, idx, array, args...);
}
-} // namespace
+
+} // anonymous namespace
template<typename CopyFunc>
void CopyTensorContentsGeneric(const ITensorHandle* srcTensor, ITensorHandle* dstTensor, CopyFunc copy)
@@ -142,4 +149,16 @@ void GatherTensorHandlePairs(const DescriptorType& descriptor,
}
}
-} //namespace armnn \ No newline at end of file
+armnn::ConstTensor PermuteTensor(const ConstCpuTensorHandle* tensor,
+ const PermutationVector& permutationVector,
+ void* permuteBuffer);
+
+void ReshapeWeightsForAcl(TensorInfo& weightInfo, DataLayout dataLayout);
+
+TensorInfo ConvertWeightTensorInfoFromArmnnToAcl(const TensorInfo& weightInfo, DataLayout dataLayout);
+
+armnn::ConstTensor ConvertWeightTensorFromArmnnToAcl(const ConstCpuTensorHandle* weightTensor,
+ DataLayout dataLayout,
+ void* permuteBuffer);
+
+} //namespace armnn
diff --git a/src/backends/backendsCommon/common.mk b/src/backends/backendsCommon/common.mk
index a66b5c4581..4e79bfcd7e 100644
--- a/src/backends/backendsCommon/common.mk
+++ b/src/backends/backendsCommon/common.mk
@@ -14,7 +14,8 @@ COMMON_SOURCES := \
MemCopyWorkload.cpp \
OutputHandler.cpp \
WorkloadData.cpp \
- WorkloadFactory.cpp
+ WorkloadFactory.cpp \
+ WorkloadUtils.cpp
# COMMON_TEST_SOURCES contains the list of files to be included
# in the Android unit test build (armnn-tests) and it is picked
diff --git a/src/backends/backendsCommon/test/Conv2dTestImpl.hpp b/src/backends/backendsCommon/test/Conv2dTestImpl.hpp
index 37fa0f63d6..2ff66b08d5 100755
--- a/src/backends/backendsCommon/test/Conv2dTestImpl.hpp
+++ b/src/backends/backendsCommon/test/Conv2dTestImpl.hpp
@@ -327,7 +327,7 @@ LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestImpl(
armnn::IWorkloadFactory& workloadFactory,
const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
const boost::multi_array<T, 4>& input,
- const boost::multi_array<T, 4>& originalKernel,
+ const boost::multi_array<T, 4>& kernel,
const boost::multi_array<B, 1>& bias,
const boost::multi_array<T, 4>& outputExpected,
float qScale,
@@ -344,10 +344,10 @@ LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestImpl(
unsigned int inputChannels = boost::numeric_cast<unsigned int>(input.shape()[1]);
unsigned int inputHeight = boost::numeric_cast<unsigned int>(input.shape()[2]);
unsigned int inputWidth = boost::numeric_cast<unsigned int>(input.shape()[3]);
- unsigned int kernelChanMul = boost::numeric_cast<unsigned int>(originalKernel.shape()[0]);
- unsigned int kernelChannels = boost::numeric_cast<unsigned int>(originalKernel.shape()[1]);
- unsigned int kernelHeight = boost::numeric_cast<unsigned int>(originalKernel.shape()[2]);
- unsigned int kernelWidth = boost::numeric_cast<unsigned int>(originalKernel.shape()[3]);
+ unsigned int kernelChanMul = boost::numeric_cast<unsigned int>(kernel.shape()[0]);
+ unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[1]);
+ unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[2]);
+ unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[3]);
unsigned int outputNum = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]);
unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]);
unsigned int outputHeight = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]);
@@ -362,8 +362,7 @@ LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestImpl(
armnnUtils::GetTensorInfo<T>(inputNum, inputChannels, inputHeight, inputWidth, layout);
armnn::TensorInfo outputTensorInfo =
armnnUtils::GetTensorInfo<T>(outputNum, outputChannels, outputHeight, outputWidth, layout);
- armnn::TensorInfo kernelDesc =
- armnnUtils::GetTensorInfo<T>(kernelChanMul, kernelChannels, kernelHeight, kernelWidth, layout);
+ armnn::TensorInfo kernelDesc({kernelChanMul, kernelChannels, kernelHeight, kernelWidth}, armnn::GetDataType<T>());
armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, armnn::GetDataType<B>());
// Set quantization parameters if the requested type is a quantized type.
@@ -423,13 +422,6 @@ LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestImpl(
armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
- // Permute the kernel if necessary
- boost::multi_array<T, 4> kernel = boost::multi_array<T, 4>(originalKernel);
- if (layout == armnn::DataLayout::NHWC)
- {
- armnnUtils::Permute(kernelDesc.GetShape(), NCHWToNHWC, originalKernel.data(), kernel.data());
- }
-
AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
@@ -484,6 +476,7 @@ LayerTestResult<T, 4> DepthwiseConvolution2dDepthMul1TestImpl(
unsigned int kernelHeight = 3;
unsigned int kernelWidth = 3;
unsigned int kernelChannels = inputChannels;
+ unsigned int kernelDepthMultiplier = 1;
unsigned int outputHeight = 1;
unsigned int outputWidth = 1;
@@ -494,7 +487,8 @@ LayerTestResult<T, 4> DepthwiseConvolution2dDepthMul1TestImpl(
armnnUtils::GetTensorInfo<T>(inputNum, inputChannels, inputHeight, inputWidth, layout);
armnn::TensorInfo outputTensorInfo =
armnnUtils::GetTensorInfo<T>(outputNum, outputChannels, outputHeight, outputWidth, layout);
- armnn::TensorInfo kernelDesc = armnnUtils::GetTensorInfo<T>(1, outputChannels, kernelHeight, kernelWidth, layout);
+ armnn::TensorInfo kernelDesc({kernelDepthMultiplier, kernelChannels, kernelHeight, kernelWidth},
+ armnn::GetDataType<T>());
armnn::TensorInfo biasDesc({ outputChannels }, armnn::GetDataType<B>());
// Set quantization parameters if the requested type is a quantized type.
@@ -543,12 +537,6 @@ LayerTestResult<T, 4> DepthwiseConvolution2dDepthMul1TestImpl(
0.f, 0.f, 0.f,
-1.f, 0.f, -1.f,
}));
- if (layout == armnn::DataLayout::NHWC)
- {
- std::vector<T> tmp(kernelData.size());
- armnnUtils::Permute(kernelDesc.GetShape(), NCHWToNHWC, kernelData.data(), tmp.data());
- kernelData = tmp;
- }
auto kernel = MakeTensor<T, 4>(kernelDesc, kernelData);
// Manually calculated.
@@ -642,8 +630,8 @@ LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(
inputBatchSize, inputChannels, inputHeight, inputWidth, layout);
armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo<T>(
outputBatchSize, outputChannels, outputHeight, outputWidth, layout);
- armnn::TensorInfo kernelDesc = armnnUtils::GetTensorInfo<T>(
- depthMultiplier, inputChannels, kernelHeight, kernelWidth, layout);
+ armnn::TensorInfo kernelDesc({depthMultiplier, inputChannels, kernelHeight, kernelWidth},
+ armnn::GetDataType<T>());
armnn::TensorInfo biasDesc({outputChannels}, armnn::GetDataType<B>());
// Set quantization parameters if the requested type is a quantized type.
@@ -692,7 +680,7 @@ LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(
{0, 2, 1, -1}));
auto bias = MakeTensor<B, 1>(biasDesc, biasV);
- std::vector<T> originalKernelData = std::vector<T>(
+ std::vector<T> kernelData = std::vector<T>(
QuantizedVector<T>(kernelDesc.GetQuantizationScale(), kernelDesc.GetQuantizationOffset(), {
1, 1, 1,
1, -1, 1,
@@ -717,12 +705,8 @@ LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(
0, 1, 0,
0, 0, 0,
0, 0, 0
+
}));
- std::vector<T> kernelData = originalKernelData;
- if (layout == armnn::DataLayout::NHWC)
- {
- armnnUtils::Permute(kernelDesc.GetShape(), NCHWToNHWC, originalKernelData.data(), kernelData.data());
- }
auto kernel = MakeTensor<T, 4>(kernelDesc, kernelData);
// Manually calculated.
@@ -840,9 +824,9 @@ LayerTestResult<T, 4> DepthwiseConvolution2dNhwcTestImpl(
unsigned int inputWidth = boost::numeric_cast<unsigned int>(input.shape()[2]);
unsigned int kernelChanMul = boost::numeric_cast<unsigned int>(kernel.shape()[0]);
- unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[3]);
- unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[1]);
- unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[2]);
+ unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[1]);
+ unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[2]);
+ unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[3]);
unsigned int outputNum = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]);
unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]);
@@ -853,7 +837,7 @@ LayerTestResult<T, 4> DepthwiseConvolution2dNhwcTestImpl(
armnn::TensorInfo inputTensorInfo({inputNum, inputHeight, inputWidth, inputChannels}, armnn::GetDataType<T>());
armnn::TensorInfo outputTensorInfo({outputNum, outputHeight, outputWidth, outputChannels},
armnn::GetDataType<T>());
- armnn::TensorInfo kernelDesc({kernelChanMul, kernelHeight, kernelWidth, kernelChannels}, armnn::GetDataType<T>());
+ armnn::TensorInfo kernelDesc({kernelChanMul, kernelChannels, kernelHeight, kernelWidth}, armnn::GetDataType<T>());
armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, armnn::GetDataType<B>());
// Set quantization parameters if the requested type is a quantized type.
@@ -1068,10 +1052,10 @@ LayerTestResult<T,4> CompareConvolution2dTestImpl(
armnn::TensorInfo kernelDesc;
armnn::TensorInfo biasDesc;
- unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth};
- unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth};
- unsigned int kernelShape[] = {outputChannels, inputChannels, kernelHeight, kernelWidth};
- unsigned int biasShape[] = {outputChannels};
+ unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth};
+ unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth};
+ unsigned int kernelShape[] = {outputChannels, inputChannels, kernelHeight, kernelWidth};
+ unsigned int biasShape[] = {outputChannels};
inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::GetDataType<T>());
outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::GetDataType<T>());
@@ -1171,19 +1155,17 @@ LayerTestResult<T, 4> CompareDepthwiseConvolution2dTestImpl(
std::vector<unsigned int> inputShape;
std::vector<unsigned int> outputShape;
- std::vector<unsigned int> kernelShape;
- std::vector<unsigned int> biasShape= { outputChannels };
+ std::vector<unsigned int> kernelShape{ channelMultiplier, inputChannels, kernelHeight, kernelWidth };
+ std::vector<unsigned int> biasShape{ outputChannels };
switch (layout.GetDataLayout())
{
case armnn::DataLayout::NCHW:
inputShape = { inputNum, inputChannels, inputHeight, inputWidth };
outputShape = { outputNum, outputChannels, outputHeight, outputWidth };
- kernelShape = { channelMultiplier, inputChannels, kernelHeight, kernelWidth };
break;
case armnn::DataLayout ::NHWC:
inputShape = { inputNum, inputHeight, inputWidth, inputChannels };
outputShape = { outputNum, outputHeight, outputWidth, outputChannels };
- kernelShape = { channelMultiplier, kernelHeight, kernelWidth, inputChannels };
break;
default:
throw armnn::InvalidArgumentException("unknown data layout ["
diff --git a/src/backends/backendsCommon/test/LayerTests.cpp b/src/backends/backendsCommon/test/LayerTests.cpp
index ddf0d0b587..819b9d6e37 100755
--- a/src/backends/backendsCommon/test/LayerTests.cpp
+++ b/src/backends/backendsCommon/test/LayerTests.cpp
@@ -661,28 +661,18 @@ LayerTestResult<T, 4> DepthwiseConvolution2dNhwcTestCommon(
24, 49
})));
- armnn::TensorInfo kernelTensorInfo({ 1, 4, 4, 2}, armnn::GetDataType<T>());
+ armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, armnn::GetDataType<T>());
auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
QuantizedVector<T>(kernelTensorInfo.GetQuantizationScale(), kernelTensorInfo.GetQuantizationOffset(), {
- 32, 16,
- 31, 15,
- 30, 14,
- 29, 13,
-
- 28, 12,
- 27, 11,
- 26, 10,
- 25, 9,
-
- 24, 8,
- 23, 7,
- 22, 6,
- 21, 5,
-
- 20, 4,
- 19, 3,
- 18, 2,
- 17, 1
+ 32, 31, 30, 29,
+ 28, 27, 26, 25,
+ 24, 23, 22, 21,
+ 20, 19, 18, 17,
+
+ 16, 15, 14, 13,
+ 12, 11, 10, 9,
+ 8, 7, 6, 5,
+ 4, 3, 2, 1
})));
armnn::TensorInfo outputTensorInfo({ 1, 5, 5, 2}, armnn::GetDataType<T>());
diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp
index 9cadbf09ac..1745b8297a 100644
--- a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp
+++ b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp
@@ -12,6 +12,7 @@
#include <aclCommon/ArmComputeTensorUtils.hpp>
#include <cl/ClTensorHandle.hpp>
#include <backendsCommon/CpuTensorHandle.hpp>
+#include <backendsCommon/WorkloadUtils.hpp>
#include <arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h>
@@ -21,14 +22,23 @@ namespace armnn
using namespace armcomputetensorutils;
arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& input,
- const TensorInfo& output,
- const DepthwiseConvolution2dDescriptor& descriptor,
- const TensorInfo& weights,
- const Optional<TensorInfo>& biases)
+ const TensorInfo& output,
+ const DepthwiseConvolution2dDescriptor& descriptor,
+ const TensorInfo& weights,
+ const Optional<TensorInfo>& biases)
{
- const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
+ const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
- const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
+
+ // ArmNN's weight format is [ M, I, H, W ]
+ const unsigned int aclDepthMultiplier = weights.GetShape()[0];
+
+ // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
+ // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
+ TensorInfo weightsPermuted = ConvertWeightTensorInfoFromArmnnToAcl(weights, descriptor.m_DataLayout);
+
+ // Convert the weights into the compute library format
+ const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
arm_compute::TensorInfo aclBiasesInfo;
arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
@@ -42,7 +52,6 @@ arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& inp
}
const arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor);
- const unsigned int aclDepthMultiplier = weights.GetShape()[0];
return arm_compute::CLDepthwiseConvolutionLayer::validate(&aclInputInfo,
&aclWeightsInfo,
@@ -57,10 +66,18 @@ ClDepthwiseConvolutionWorkload::ClDepthwiseConvolutionWorkload(
const WorkloadInfo& info)
: BaseWorkload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info)
{
- auto& weightInfo = m_Data.m_Weight->GetTensorInfo();
+ // Allocate a buffer for the swizzling of the weight tensor
+ std::unique_ptr<unsigned char[]> permuteBuffer(new unsigned char[m_Data.m_Weight->GetTensorInfo().GetNumBytes()]);
+
+ // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
+ // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
+ ConstTensor weightPermuted = ConvertWeightTensorFromArmnnToAcl(m_Data.m_Weight,
+ m_Data.m_Parameters.m_DataLayout,
+ permuteBuffer.get());
+ // Convert the weights into the compute library format
m_KernelTensor = std::make_unique<arm_compute::CLTensor>();
- BuildArmComputeTensor(*m_KernelTensor, weightInfo, m_Data.m_Parameters.m_DataLayout);
+ BuildArmComputeTensor(*m_KernelTensor, weightPermuted.GetInfo(), m_Data.m_Parameters.m_DataLayout);
if (m_Data.m_Parameters.m_BiasEnabled)
{
@@ -86,13 +103,14 @@ ClDepthwiseConvolutionWorkload::ClDepthwiseConvolutionWorkload(
input.info()->set_data_layout(aclDataLayout);
output.info()->set_data_layout(aclDataLayout);
- const unsigned int depthMultiplier = weightInfo.GetShape()[0];
+ // ArmNN's weight format is [ M, I, H, W ]
+ auto& weightInfo = m_Data.m_Weight->GetTensorInfo();
- const unsigned int widthIndex = (m_Data.m_Parameters.m_DataLayout == DataLayout::NCHW) ? 3 : 2;
- const unsigned int heightIndex = (m_Data.m_Parameters.m_DataLayout == DataLayout::NCHW) ? 2 : 1;
+ // Get the depth multiplier
+ const unsigned int depthMultiplier = weightInfo.GetShape()[0];
- //Check for optimisation opportunities.
- bool use3x3Optimisation = (weightInfo.GetShape()[widthIndex] == 3) && (weightInfo.GetShape()[heightIndex] == 3);
+ // Check for optimisation opportunities.
+ bool use3x3Optimisation = (weightInfo.GetShape()[2] == 3) && (weightInfo.GetShape()[3] == 3);
if (use3x3Optimisation)
{
m_DepthwiseConvolutionLayer = std::make_unique<arm_compute::CLDepthwiseConvolutionLayer3x3>();
@@ -118,7 +136,8 @@ ClDepthwiseConvolutionWorkload::ClDepthwiseConvolutionWorkload(
BOOST_ASSERT(m_DepthwiseConvolutionLayer);
- InitializeArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight);
+ ScopedCpuTensorHandle weightsPermutedHandle(weightPermuted);
+ InitializeArmComputeClTensorData(*m_KernelTensor, &weightsPermutedHandle);
if (m_BiasTensor)
{
diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp
index 6cad12cba8..be26359662 100644
--- a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp
+++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp
@@ -8,10 +8,7 @@
#include <aclCommon/ArmComputeTensorUtils.hpp>
#include <neon/NeonLayerSupport.hpp>
#include <backendsCommon/CpuTensorHandle.hpp>
-
-#include <DataLayoutIndexed.hpp>
-
-using namespace armnnUtils;
+#include <backendsCommon/WorkloadUtils.hpp>
namespace armnn
{
@@ -19,17 +16,23 @@ namespace armnn
using namespace armcomputetensorutils;
arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& input,
- const TensorInfo& output,
- const DepthwiseConvolution2dDescriptor& descriptor,
- const TensorInfo& weights,
- const Optional<TensorInfo>& biases)
+ const TensorInfo& output,
+ const DepthwiseConvolution2dDescriptor& descriptor,
+ const TensorInfo& weights,
+ const Optional<TensorInfo>& biases)
{
- const arm_compute::TensorInfo aclInputInfo =
- BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
- const arm_compute::TensorInfo aclOutputInfo =
- BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
- const arm_compute::TensorInfo aclWeightsInfo =
- BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
+ const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
+ const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
+
+ // ArmNN's weight format is [ M, I, H, W ]
+ const unsigned int aclDepthMultiplier = weights.GetShape()[0];
+
+ // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
+ // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
+ TensorInfo weightsPermuted = ConvertWeightTensorInfoFromArmnnToAcl(weights, descriptor.m_DataLayout);
+
+ // Convert the weights into the compute library format
+ const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
arm_compute::TensorInfo aclBiasesInfo;
arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
@@ -42,9 +45,7 @@ arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& i
optionalAclBiasesInfo = &aclBiasesInfo;
}
- const arm_compute::PadStrideInfo aclPadStrideInfo =
- BuildArmComputePadStrideInfo(descriptor);
- const unsigned int aclDepthMultiplier = weights.GetShape()[0];
+ const arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor);
return arm_compute::NEDepthwiseConvolutionLayer::validate(&aclInputInfo,
&aclWeightsInfo,
@@ -59,14 +60,21 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload(
const WorkloadInfo& info)
: BaseWorkload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info)
{
- const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo();
+ // ArmNN's weight format is [ M, I, H, W ]
+ auto& weightInfo = m_Data.m_Weight->GetTensorInfo();
- m_KernelTensor = std::make_unique<arm_compute::Tensor>();
- BuildArmComputeTensor(*m_KernelTensor, weightInfo, m_Data.m_Parameters.m_DataLayout);
+ // Allocate a buffer for the swizzling of the weight tensor
+ std::unique_ptr<unsigned char[]> permuteBuffer(new unsigned char[m_Data.m_Weight->GetTensorInfo().GetNumBytes()]);
- INeonTensorHandle* inputTensorHandle = static_cast<INeonTensorHandle*>(m_Data.m_Inputs[0]);
- INeonTensorHandle* outputTensorHandle = static_cast<INeonTensorHandle*>(m_Data.m_Outputs[0]);
- DataLayoutIndexed dataLayoutIndex(m_Data.m_Parameters.m_DataLayout);
+ // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
+ // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
+ ConstTensor weightPermuted = ConvertWeightTensorFromArmnnToAcl(m_Data.m_Weight,
+ m_Data.m_Parameters.m_DataLayout,
+ permuteBuffer.get());
+
+ // Convert the weights into the compute library format
+ m_KernelTensor = std::make_unique<arm_compute::Tensor>();
+ BuildArmComputeTensor(*m_KernelTensor, weightPermuted.GetInfo(), m_Data.m_Parameters.m_DataLayout);
if (m_Data.m_Parameters.m_BiasEnabled)
{
@@ -84,6 +92,9 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload(
m_Data.ValidateInputsOutputs("NeonDepthwiseConvolutionWorkload", 1, 1);
+ INeonTensorHandle* inputTensorHandle = static_cast<INeonTensorHandle*>(m_Data.m_Inputs[0]);
+ INeonTensorHandle* outputTensorHandle = static_cast<INeonTensorHandle*>(m_Data.m_Outputs[0]);
+
arm_compute::ITensor& input = inputTensorHandle->GetTensor();
arm_compute::ITensor& output = outputTensorHandle->GetTensor();
@@ -91,9 +102,11 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload(
input.info()->set_data_layout(aclDataLayout);
output.info()->set_data_layout(aclDataLayout);
- bool use3x3Optimisation = weightInfo.GetShape()[dataLayoutIndex.GetWidthIndex()] == 3 &&
- weightInfo.GetShape()[dataLayoutIndex.GetHeightIndex()] == 3;
+ // Get the depth multiplier
+ const unsigned int depthMultiplier = weightInfo.GetShape()[0];
+ // Check for optimisation opportunities.
+ bool use3x3Optimisation = (weightInfo.GetShape()[2] == 3) && (weightInfo.GetShape()[3] == 3);
if (use3x3Optimisation)
{
m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer3x3>();
@@ -102,7 +115,8 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload(
m_KernelTensor.get(),
m_BiasTensor.get(),
&output,
- padStrideInfo);
+ padStrideInfo,
+ depthMultiplier);
}
else
{
@@ -112,12 +126,14 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload(
m_KernelTensor.get(),
m_BiasTensor.get(),
&output,
- padStrideInfo);
+ padStrideInfo,
+ depthMultiplier);
}
BOOST_ASSERT(m_pDepthwiseConvolutionLayer);
- InitializeArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight);
+ ScopedCpuTensorHandle weightsPermutedHandle(weightPermuted);
+ InitializeArmComputeTensorData(*m_KernelTensor, &weightsPermutedHandle);
if (m_Data.m_Parameters.m_BiasEnabled)
{
diff --git a/src/backends/reference/workloads/ConvImpl.hpp b/src/backends/reference/workloads/ConvImpl.hpp
index 704bc368d2..5c07f57ec0 100644
--- a/src/backends/reference/workloads/ConvImpl.hpp
+++ b/src/backends/reference/workloads/ConvImpl.hpp
@@ -57,7 +57,6 @@ static void ConvImpl(ConvData data,
float filterScale,
int32_t filterOffset,
const BiasType* biasData,
- InputType* outputData,
float outputScale,
int32_t outputOffset,
const TensorInfo& filterInfo,
@@ -68,10 +67,10 @@ static void ConvImpl(ConvData data,
throw InvalidArgumentException("Bias is enabled but the bias data is invalid");
}
- const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]);
- const TensorInfo& outputInfo0 = GetTensorInfo(data.m_Outputs[0]);
+ const TensorInfo& inputInfo = GetTensorInfo(data.m_Inputs[0]);
+ const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[0]);
- TensorBufferArrayView<InputType> output(outputInfo0.GetShape(),
+ TensorBufferArrayView<InputType> output(outputInfo.GetShape(),
GetOutputTensorData<InputType>(0, data),
data.m_Parameters.m_DataLayout);
@@ -81,18 +80,18 @@ static void ConvImpl(ConvData data,
const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex();
const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex();
- unsigned int depthMult = depthwise ? filterInfo.GetShape()[0] : 1;
- unsigned int channelsInput = filterInfo.GetShape()[channelsIndex];
- unsigned int channelsOutput = depthwise ? channelsInput * depthMult : filterInfo.GetShape()[0];
+ unsigned int depthMultiplier = depthwise ? filterInfo.GetShape()[0] : 1;
+ unsigned int inputChannels = depthwise ? filterInfo.GetShape()[1] : filterInfo.GetShape()[channelsIndex];
+ unsigned int outputChannels = depthwise ? inputChannels * depthMultiplier : filterInfo.GetShape()[0];
- unsigned int batchSize = outputInfo0.GetShape()[0];
- unsigned int heightOutput = outputInfo0.GetShape()[heightIndex];
- unsigned int widthOutput = outputInfo0.GetShape()[widthIndex];
- unsigned int heightInput = inputInfo0.GetShape()[heightIndex];
- unsigned int widthInput = inputInfo0.GetShape()[widthIndex];
+ unsigned int batchSize = outputInfo.GetShape()[0];
+ unsigned int outputHeight = outputInfo.GetShape()[heightIndex];
+ unsigned int outputWidth = outputInfo.GetShape()[widthIndex];
+ unsigned int inputHeight = inputInfo.GetShape()[heightIndex];
+ unsigned int inputWidth = inputInfo.GetShape()[widthIndex];
- unsigned int heightFilter = filterInfo.GetShape()[heightIndex];
- unsigned int widthFilter = filterInfo.GetShape()[widthIndex];
+ unsigned int filterHeight = depthwise ? filterInfo.GetShape()[2] : filterInfo.GetShape()[heightIndex];
+ unsigned int filterWidth = depthwise ? filterInfo.GetShape()[3] : filterInfo.GetShape()[widthIndex];
unsigned int paddingTop = data.m_Parameters.m_PadTop;
unsigned int paddingLeft = data.m_Parameters.m_PadLeft;
@@ -102,68 +101,56 @@ static void ConvImpl(ConvData data,
// The world's least efficient convolution.
for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)
{
- for (unsigned int cOutput = 0; cOutput < channelsOutput; cOutput++)
+ for (unsigned int cOutput = 0; cOutput < outputChannels; cOutput++)
{
- for (unsigned int yOutput = 0; yOutput < heightOutput; yOutput++)
+ for (unsigned int yOutput = 0; yOutput < outputHeight; yOutput++)
{
- for (unsigned int xOutput = 0; xOutput < widthOutput; xOutput++)
+ for (unsigned int xOutput = 0; xOutput < outputWidth; xOutput++)
{
// This loop goes over each output element.
AccumulatorType sum = AccumulatorType();
// For depthwise, each output channel corresponds to exactly one input channel.
// For normal, must loop over each input channel.
- for (unsigned int cInput = 0; cInput < (depthwise ? 1 : channelsInput); cInput++)
+ for (unsigned int cInput = 0; cInput < (depthwise ? 1 : inputChannels); cInput++)
{
unsigned int depthwiseMultiplierIdx = 0;
if (depthwise)
{
- cInput = cOutput / depthMult;
- depthwiseMultiplierIdx = cOutput % depthMult;
+ cInput = cOutput / depthMultiplier;
+ depthwiseMultiplierIdx = cOutput % depthMultiplier;
}
- for (unsigned int yFilter = 0; yFilter < heightFilter; yFilter++)
+ for (unsigned int yFilter = 0; yFilter < filterHeight; yFilter++)
{
- for (unsigned int xFilter = 0; xFilter < widthFilter; xFilter++)
+ for (unsigned int xFilter = 0; xFilter < filterWidth; xFilter++)
{
// This loop goes over each input element for each output element.
- unsigned int filterIndex;
+ unsigned int filterIndex = 0;
// Since dimensionality of kernel depends on depthwiseness, so does index.
if (depthwise)
{
- if (data.m_Parameters.m_DataLayout == DataLayout::NHWC)
- {
- filterIndex = depthwiseMultiplierIdx * heightFilter * widthFilter
- * channelsInput +
- yFilter * widthFilter * channelsInput +
- xFilter * channelsInput +
- cInput;
- }
- else
- {
- filterIndex = depthwiseMultiplierIdx * widthFilter * heightFilter
- * channelsInput +
- cInput * widthFilter * heightFilter +
- yFilter * widthFilter +
- xFilter;
- }
+ filterIndex = depthwiseMultiplierIdx * filterWidth * filterHeight * inputChannels +
+ cInput * filterWidth * filterHeight +
+ yFilter * filterWidth +
+ xFilter;
}
else
{
if (data.m_Parameters.m_DataLayout == DataLayout::NHWC)
{
- filterIndex = cOutput * heightFilter * widthFilter * channelsInput +
- yFilter * widthFilter * channelsInput +
- xFilter * channelsInput +
+ filterIndex = cOutput * filterHeight * filterWidth * inputChannels +
+ yFilter * filterWidth * inputChannels +
+ xFilter * inputChannels +
cInput;
}
else
{
- filterIndex = cOutput * widthFilter * heightFilter * channelsInput +
- cInput * widthFilter * heightFilter +
- yFilter * widthFilter +
+ filterIndex = cOutput * filterWidth * filterHeight * inputChannels +
+ cInput * filterWidth * filterHeight +
+ yFilter * filterWidth +
xFilter;
}
}
@@ -177,8 +164,8 @@ static void ConvImpl(ConvData data,
AccumulatorType inputValue;
// Check if we're in the padding.
- if (yInput < paddingTop || yInput >= heightInput + paddingTop ||
- xInput < paddingLeft || xInput >= widthInput + paddingLeft )
+ if (yInput < paddingTop || yInput >= inputHeight + paddingTop ||
+ xInput < paddingLeft || xInput >= inputWidth + paddingLeft )
{
inputValue = AccumulatorType();
}
@@ -188,17 +175,17 @@ static void ConvImpl(ConvData data,
if (data.m_Parameters.m_DataLayout == DataLayout::NHWC)
{
- inputIndex = batchIdx * heightInput * widthInput * channelsInput +
- (yInput - paddingTop) * widthInput * channelsInput +
- (xInput - paddingLeft) * channelsInput +
+ inputIndex = batchIdx * inputHeight * inputWidth * inputChannels +
+ (yInput - paddingTop) * inputWidth * inputChannels +
+ (xInput - paddingLeft) * inputChannels +
cInput;
}
else
{
- inputIndex = batchIdx * widthInput * heightInput * channelsInput +
- widthInput * heightInput * cInput +
- widthInput * (yInput - paddingTop) +
+ inputIndex = batchIdx * inputWidth * inputHeight * inputChannels +
+ inputWidth * inputHeight * cInput +
+ inputWidth * (yInput - paddingTop) +
xInput - paddingLeft;
}
diff --git a/src/backends/reference/workloads/RefConvolution2dFloat32Workload.cpp b/src/backends/reference/workloads/RefConvolution2dFloat32Workload.cpp
index 20905646d7..7b298df967 100644
--- a/src/backends/reference/workloads/RefConvolution2dFloat32Workload.cpp
+++ b/src/backends/reference/workloads/RefConvolution2dFloat32Workload.cpp
@@ -23,15 +23,13 @@ void RefConvolution2dFloat32Workload::Execute() const
{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvolution2dFloat32Workload_Execute");
- float* outputData = GetOutputTensorDataFloat(0, m_Data);
const float* inputData = GetInputTensorDataFloat(0, m_Data);
- const float* weightData = m_Weight->template GetConstTensor<float>();
- const float* biasData = m_Data.m_Parameters.m_BiasEnabled ?
- m_Bias->template GetConstTensor<float>() : nullptr;
+ const float* filterData = m_Weight->template GetConstTensor<float>();
+ const float* biasData = m_Data.m_Parameters.m_BiasEnabled ? m_Bias->template GetConstTensor<float>() : nullptr;
const TensorInfo& filterInfo = m_Weight->GetTensorInfo();
ConvImpl<armnn::Convolution2dQueueDescriptor, float, float, float>(
- m_Data, inputData, 0.0f, 0, weightData, 0.0f, 0, biasData, outputData, 0.0f, 0, filterInfo);
+ m_Data, inputData, 0.0f, 0, filterData, 0.0f, 0, biasData, 0.0f, 0, filterInfo);
}
} //namespace armnn
diff --git a/src/backends/reference/workloads/RefConvolution2dUint8Workload.cpp b/src/backends/reference/workloads/RefConvolution2dUint8Workload.cpp
index 881e9bf6b0..af2c7ad0d6 100644
--- a/src/backends/reference/workloads/RefConvolution2dUint8Workload.cpp
+++ b/src/backends/reference/workloads/RefConvolution2dUint8Workload.cpp
@@ -27,10 +27,7 @@ void RefConvolution2dUint8Workload::Execute() const
const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
const uint8_t* weightsData = m_Weight->template GetConstTensor<uint8_t>();
const TensorInfo& weightsInfo = GetTensorInfo(m_Weight.get());
- const int32_t* biasData = m_Data.m_Parameters.m_BiasEnabled ?
- m_Bias->template GetConstTensor<int32_t>() :
- nullptr;
- uint8_t* outputData = GetOutputTensorDataU8(0, m_Data);
+ const int32_t* biasData = m_Data.m_Parameters.m_BiasEnabled ? m_Bias->template GetConstTensor<int32_t>() : nullptr;
const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
const TensorInfo& filterInfo = m_Weight->GetTensorInfo();
@@ -39,7 +36,7 @@ void RefConvolution2dUint8Workload::Execute() const
inputData, inputInfo.GetQuantizationScale(), inputInfo.GetQuantizationOffset(),
weightsData, weightsInfo.GetQuantizationScale(), weightsInfo.GetQuantizationOffset(),
biasData,
- outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), filterInfo);
+ outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), filterInfo);
}
} //namespace armnn
diff --git a/src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.cpp b/src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.cpp
index e89013b9bd..756e958753 100644
--- a/src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.cpp
+++ b/src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.cpp
@@ -23,15 +23,13 @@ void RefDepthwiseConvolution2dFloat32Workload::Execute() const
{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDepthwiseConvolution2dFloat32Workload_Execute");
- float* outputData = GetOutputTensorDataFloat(0, m_Data);
const float* inputData = GetInputTensorDataFloat(0, m_Data);
const float* weightData = m_Weight->template GetConstTensor<float>();
- const float* biasData = m_Data.m_Parameters.m_BiasEnabled ?
- m_Bias->template GetConstTensor<float>() : nullptr;
+ const float* biasData = m_Data.m_Parameters.m_BiasEnabled ? m_Bias->template GetConstTensor<float>() : nullptr;
const TensorInfo& filterInfo = m_Weight->GetTensorInfo();
ConvImpl<armnn::DepthwiseConvolution2dQueueDescriptor, float, float, float>
- (m_Data, inputData, 0.0f, 0, weightData, 0.0f, 0, biasData, outputData, 0.0f, 0, filterInfo, true);
+ (m_Data, inputData, 0.0f, 0, weightData, 0.0f, 0, biasData, 0.0f, 0, filterInfo, true);
}
} //namespace armnn
diff --git a/src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.cpp b/src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.cpp
index e8e501d6ae..629b729ea6 100644
--- a/src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.cpp
+++ b/src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.cpp
@@ -28,10 +28,7 @@ void RefDepthwiseConvolution2dUint8Workload::Execute() const
const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
const uint8_t* weightsData = m_Weight->template GetConstTensor<uint8_t>();
const TensorInfo& weightsInfo = GetTensorInfo(m_Weight.get());
- const int32_t* biasData = m_Data.m_Parameters.m_BiasEnabled ?
- m_Bias->template GetConstTensor<int32_t>() :
- nullptr;
- uint8_t* outputData = GetOutputTensorDataU8(0, m_Data);
+ const int32_t* biasData = m_Data.m_Parameters.m_BiasEnabled ? m_Bias->template GetConstTensor<int32_t>() : nullptr;
const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
const TensorInfo& filterInfo = m_Weight->GetTensorInfo();
@@ -40,7 +37,7 @@ void RefDepthwiseConvolution2dUint8Workload::Execute() const
inputData, inputInfo.GetQuantizationScale(), inputInfo.GetQuantizationOffset(),
weightsData, weightsInfo.GetQuantizationScale(), weightsInfo.GetQuantizationOffset(),
biasData,
- outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), filterInfo, true);
+ outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), filterInfo, true);
}
} //namespace armnn