aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMatteo Martincigh <matteo.martincigh@arm.com>2018-12-18 09:26:39 +0000
committerMatteo Martincigh <matteo.martincigh@arm.com>2019-01-04 17:28:07 +0000
commit747ef82c88f9afe14a8b80b6b3b34118353e97f2 (patch)
treea29ac33b84fb96a41103a0a97327189495374cc9 /src
parent760892724d131c7da4b9baad05cddd49276ad6bb (diff)
downloadarmnn-747ef82c88f9afe14a8b80b6b3b34118353e97f2.tar.gz
MLCE-77 Depthwise Convolution with depth multiplier > 1 doesn't work
* Unified ArmNN's weight format to [ M, I, H, W ] for the depthwise convolution * Added conversion utilities to permute/reshape the weights as appropriate when using CL and Neon backends * Updated the reference implementation of the convolution * Updated the relevant unit tests accordingly !android-nn-driver:459 Change-Id: I07d0818efa9d1ca1e5dad82983aac1fe78eadb18
Diffstat (limited to 'src')
-rw-r--r--src/armnn/layers/DepthwiseConvolution2dLayer.cpp40
-rw-r--r--src/armnn/test/CreateWorkload.hpp20
-rw-r--r--src/armnn/test/OptimizerTests.cpp2
-rw-r--r--src/armnnTfLiteParser/TfLiteParser.cpp105
-rw-r--r--src/armnnTfLiteParser/TfLiteParser.hpp32
-rw-r--r--src/armnnTfParser/TfParser.cpp16
-rw-r--r--src/armnnUtils/ParserPrototxtFixture.hpp2
-rw-r--r--src/armnnUtils/Permute.cpp57
-rw-r--r--src/armnnUtils/Permute.hpp5
-rw-r--r--src/backends/aclCommon/ArmComputeTensorUtils.cpp26
-rw-r--r--src/backends/aclCommon/ArmComputeTensorUtils.hpp8
-rw-r--r--src/backends/backendsCommon/CMakeLists.txt1
-rw-r--r--src/backends/backendsCommon/CpuTensorHandle.cpp4
-rw-r--r--src/backends/backendsCommon/CpuTensorHandle.hpp6
-rw-r--r--src/backends/backendsCommon/WorkloadData.cpp5
-rw-r--r--src/backends/backendsCommon/WorkloadUtils.cpp111
-rw-r--r--src/backends/backendsCommon/WorkloadUtils.hpp41
-rw-r--r--src/backends/backendsCommon/common.mk3
-rwxr-xr-xsrc/backends/backendsCommon/test/Conv2dTestImpl.hpp64
-rwxr-xr-xsrc/backends/backendsCommon/test/LayerTests.cpp30
-rw-r--r--src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp49
-rw-r--r--src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp72
-rw-r--r--src/backends/reference/workloads/ConvImpl.hpp93
-rw-r--r--src/backends/reference/workloads/RefConvolution2dFloat32Workload.cpp8
-rw-r--r--src/backends/reference/workloads/RefConvolution2dUint8Workload.cpp7
-rw-r--r--src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.cpp6
-rw-r--r--src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.cpp7
27 files changed, 529 insertions, 291 deletions
diff --git a/src/armnn/layers/DepthwiseConvolution2dLayer.cpp b/src/armnn/layers/DepthwiseConvolution2dLayer.cpp
index 95d4690d4f..c4edc2022f 100644
--- a/src/armnn/layers/DepthwiseConvolution2dLayer.cpp
+++ b/src/armnn/layers/DepthwiseConvolution2dLayer.cpp
@@ -24,7 +24,7 @@ DepthwiseConvolution2dLayer::DepthwiseConvolution2dLayer(const DepthwiseConvolut
{
}
-std::unique_ptr<IWorkload> DepthwiseConvolution2dLayer::CreateWorkload(const Graph& graph,
+std::unique_ptr<IWorkload> DepthwiseConvolution2dLayer::CreateWorkload(const Graph& graph,
const IWorkloadFactory& factory) const
{
// on this level constant data should not be released..
@@ -59,34 +59,40 @@ std::vector<TensorShape>
DepthwiseConvolution2dLayer::InferOutputShapes(const std::vector<TensorShape>& inputShapes) const
{
BOOST_ASSERT(inputShapes.size() == 2);
- const TensorShape& inputShape = inputShapes[0];
- const TensorShape filterShape = inputShapes[1];
+ const TensorShape& inputShape = inputShapes[0];
+ const TensorShape& filterShape = inputShapes[1];
BOOST_ASSERT_MSG(inputShape.GetNumDimensions() == 4, "Convolutions will always have 4D input.");
DataLayoutIndexed dataLayoutIndex(m_Param.m_DataLayout);
- unsigned int inWidth = inputShape[dataLayoutIndex.GetWidthIndex()];
- unsigned int inHeight = inputShape[dataLayoutIndex.GetHeightIndex()];
- unsigned int inBatchSize = inputShape[0];
+ unsigned int inputBatchSize = inputShape[0];
+ unsigned int inputHeight = inputShape[dataLayoutIndex.GetHeightIndex()];
+ unsigned int inputWidth = inputShape[dataLayoutIndex.GetWidthIndex()];
+ unsigned int inputChannels = inputShape[dataLayoutIndex.GetChannelsIndex()];
- unsigned int filterWidth = filterShape[dataLayoutIndex.GetWidthIndex()];
- unsigned int readWidth = (inWidth + m_Param.m_PadLeft + m_Param.m_PadRight) - (filterWidth);
- unsigned int outWidth = 1 + (readWidth / m_Param.m_StrideX);
+ // Expected filter shape: [ M, I, H, W ] - This shape does NOT depend on the data layout
+ // Namely: [ depth multiplier, input channels, filter height, filter width ]
+ // Output channels = input channels * depthMultiplier
- unsigned int filterHeight = filterShape[dataLayoutIndex.GetHeightIndex()];
- unsigned int readHeight = (inHeight + m_Param.m_PadTop + m_Param.m_PadBottom) - (filterHeight);
- unsigned int outHeight = 1 + (readHeight / m_Param.m_StrideY);
unsigned int depthMultiplier = filterShape[0];
- unsigned int outChannels = filterShape[dataLayoutIndex.GetChannelsIndex()] * depthMultiplier;
- unsigned int outBatchSize = inBatchSize;
+ unsigned int filterHeight = filterShape[2];
+ unsigned int readHeight = (inputHeight + m_Param.m_PadTop + m_Param.m_PadBottom) - filterHeight;
+ unsigned int outputHeight = 1 + (readHeight / m_Param.m_StrideY);
+
+ unsigned int filterWidth = filterShape[3];
+ unsigned int readWidth = (inputWidth + m_Param.m_PadLeft + m_Param.m_PadRight) - filterWidth;
+ unsigned int outputWidth = 1 + (readWidth / m_Param.m_StrideX);
+
+ unsigned int outputChannels = inputChannels * depthMultiplier;
+ unsigned int outputBatchSize = inputBatchSize;
TensorShape tensorShape = m_Param.m_DataLayout == armnn::DataLayout::NHWC ?
- TensorShape( { outBatchSize, outHeight, outWidth, outChannels } ) :
- TensorShape( { outBatchSize, outChannels, outHeight, outWidth });
+ TensorShape{ outputBatchSize, outputHeight, outputWidth, outputChannels } :
+ TensorShape{ outputBatchSize, outputChannels, outputHeight, outputWidth };
- return std::vector<TensorShape>({ tensorShape });
+ return std::vector<TensorShape>{ tensorShape };
}
void DepthwiseConvolution2dLayer::ValidateTensorShapesFromInputs()
diff --git a/src/armnn/test/CreateWorkload.hpp b/src/armnn/test/CreateWorkload.hpp
index 3dc18b9e18..f52f6055ca 100644
--- a/src/armnn/test/CreateWorkload.hpp
+++ b/src/armnn/test/CreateWorkload.hpp
@@ -414,18 +414,18 @@ std::unique_ptr<DepthwiseConvolution2dFloat32Workload> CreateDepthwiseConvolutio
{
// Creates the layer we're testing.
DepthwiseConvolution2dDescriptor layerDesc;
- layerDesc.m_PadLeft = 1;
- layerDesc.m_PadRight = 2;
- layerDesc.m_PadTop = 1;
- layerDesc.m_PadBottom = 2;
- layerDesc.m_StrideX = 1;
- layerDesc.m_StrideY = 1;
- layerDesc.m_BiasEnabled = false;
- layerDesc.m_DataLayout = dataLayout;
+ layerDesc.m_PadLeft = 1;
+ layerDesc.m_PadRight = 2;
+ layerDesc.m_PadTop = 1;
+ layerDesc.m_PadBottom = 2;
+ layerDesc.m_StrideX = 1;
+ layerDesc.m_StrideY = 1;
+ layerDesc.m_BiasEnabled = false;
+ layerDesc.m_DataLayout = dataLayout;
DepthwiseConvolution2dLayer* const layer = graph.AddLayer<DepthwiseConvolution2dLayer>(layerDesc, "layer");
- layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({1, 4, 4, 2}, DataType));
+ layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({1, 2, 4, 4}, DataType)); // [ M, I, H, W ]
layer->m_Weight->Allocate();
// Creates extra layers.
@@ -457,7 +457,7 @@ std::unique_ptr<DepthwiseConvolution2dFloat32Workload> CreateDepthwiseConvolutio
BOOST_TEST(queueDescriptor.m_Inputs.size() == 1);
BOOST_TEST(queueDescriptor.m_Outputs.size() == 1);
- BOOST_TEST((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo({1, 4, 4, 2}, DataType)));
+ BOOST_TEST((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo({1, 2, 4, 4}, DataType)));
// Returns so we can do extra, backend-specific tests.
return workload;
diff --git a/src/armnn/test/OptimizerTests.cpp b/src/armnn/test/OptimizerTests.cpp
index 29d1702c64..80addb4bfd 100644
--- a/src/armnn/test/OptimizerTests.cpp
+++ b/src/armnn/test/OptimizerTests.cpp
@@ -898,7 +898,7 @@ BOOST_AUTO_TEST_CASE(DepthwiseConv2dValidateTensorShapesFromInputsNhwc)
{
Graph graph;
const unsigned int inputShape[] = { 1, 3, 3, 2 };
- const unsigned int weightsShape[] = { 1, 3, 3, 2 };
+ const unsigned int weightsShape[] = { 1, 2, 3, 3 };
const unsigned int outputShape[] = { 1, 1, 1, 2 };
CreateDepthwiseConvolution2dGraph(graph, inputShape, weightsShape, outputShape, DataLayout::NHWC);
diff --git a/src/armnnTfLiteParser/TfLiteParser.cpp b/src/armnnTfLiteParser/TfLiteParser.cpp
index 49bc73708f..3b50476ca2 100644
--- a/src/armnnTfLiteParser/TfLiteParser.cpp
+++ b/src/armnnTfLiteParser/TfLiteParser.cpp
@@ -401,7 +401,8 @@ template<typename T>
std::pair<armnn::ConstTensor, std::unique_ptr<T[]>>
CreateConstTensorImpl(TfLiteParser::BufferRawPtr bufferPtr,
TfLiteParser::TensorRawPtr tensorPtr,
- armnn::TensorInfo & tensorInfo)
+ armnn::TensorInfo& tensorInfo,
+ armnn::Optional<armnn::PermutationVector&> permutationVector)
{
BOOST_ASSERT_MSG(tensorPtr != nullptr, "tensorPtr is null");
BOOST_ASSERT_MSG(bufferPtr != nullptr,
@@ -409,7 +410,20 @@ CreateConstTensorImpl(TfLiteParser::BufferRawPtr bufferPtr,
boost::format("Buffer for buffer:%1% is null") % tensorPtr->buffer).c_str());
std::unique_ptr<T[]> data(new T[tensorInfo.GetNumElements()]);
- ::memcpy(data.get(), bufferPtr->data.data(), tensorInfo.GetNumBytes());
+
+ if (permutationVector.has_value() && permutationVector.value().GetSize() > 0)
+ {
+ tensorInfo = armnnUtils::Permuted(tensorInfo, permutationVector.value());
+ armnnUtils::Permute(tensorInfo.GetShape(),
+ permutationVector.value(),
+ reinterpret_cast<const T *>(bufferPtr->data.data()),
+ data.get());
+ }
+ else
+ {
+ ::memcpy(data.get(), bufferPtr->data.data(), tensorInfo.GetNumBytes());
+ }
+
return std::make_pair(ConstTensor(tensorInfo, data.get()), std::move(data));
}
@@ -660,7 +674,9 @@ void TfLiteParser::ParseConv2D(size_t subgraphIndex, size_t operatorIndex)
CalcPadding(inputHeight, filterHeight, desc.m_StrideY, desc.m_PadTop, desc.m_PadBottom, options->padding);
CalcPadding(inputWidth, filterWidth, desc.m_StrideX, desc.m_PadLeft, desc.m_PadRight, options->padding);
- auto filterTensorAndData = CreateConstTensor(inputs[1], filterTensorInfo);
+ auto filterTensorAndData = CreateConstTensor(inputs[1],
+ filterTensorInfo,
+ armnn::Optional<armnn::PermutationVector&>());
armnn::IConnectableLayer* layer;
auto layerName = boost::str(boost::format("Conv2D:%1%:%2%") % subgraphIndex % operatorIndex);
@@ -669,7 +685,9 @@ void TfLiteParser::ParseConv2D(size_t subgraphIndex, size_t operatorIndex)
{
desc.m_BiasEnabled = true;
armnn::TensorInfo biasTensorInfo = ToTensorInfo(inputs[2]);
- auto biasTensorAndData = CreateConstTensor(inputs[2], biasTensorInfo);
+ auto biasTensorAndData = CreateConstTensor(inputs[2],
+ biasTensorInfo,
+ armnn::Optional<armnn::PermutationVector&>());
layer = m_Network->AddConvolution2dLayer(desc,
filterTensorAndData.first,
biasTensorAndData.first,
@@ -723,17 +741,27 @@ void TfLiteParser::ParseDepthwiseConv2D(size_t subgraphIndex, size_t operatorInd
armnn::TensorInfo inputTensorInfo = ToTensorInfo(inputs[0]);
armnn::TensorInfo filterTensorInfo = ToTensorInfo(inputs[1]);
- // assuming input is NHWC
+ // Assuming input is NHWC
unsigned int inputHeight = inputTensorInfo.GetShape()[1];
unsigned int inputWidth = inputTensorInfo.GetShape()[2];
- // assuming the filter is OHWI : Output, H, W, Input
+
+ // TensorflowLite weights come in the format [1, H, W, I * M]
unsigned int filterHeight = filterTensorInfo.GetShape()[1];
unsigned int filterWidth = filterTensorInfo.GetShape()[2];
+ // Reshape weights as [ H, W, I, M ]
+ filterTensorInfo.SetShape({ filterHeight,
+ filterWidth,
+ inputTensorInfo.GetShape()[3],
+ filterTensorInfo.GetShape()[3] / inputTensorInfo.GetShape()[3] });
+
+ // Mappings from TensorflowLite filter tensors to the ArmNN filter tensors (ArmNN weights have to be [M, I, H, W])
+ PermutationVector permutationVector{ 2, 3, 1, 0 }; // [H, W, I, M] -> [M, I, H, W]
+
CalcPadding(inputHeight, filterHeight, desc.m_StrideY, desc.m_PadTop, desc.m_PadBottom, options->padding);
CalcPadding(inputWidth, filterWidth, desc.m_StrideX, desc.m_PadLeft, desc.m_PadRight, options->padding);
- auto filterTensorAndData = CreateConstTensor(inputs[1], filterTensorInfo);
+ auto filterTensorAndData = CreateConstTensor(inputs[1], filterTensorInfo, permutationVector);
armnn::IConnectableLayer* layer;
auto layerName = boost::str(boost::format("DepthwiseConv2D:%1%:%2%") % subgraphIndex % operatorIndex);
@@ -741,7 +769,9 @@ void TfLiteParser::ParseDepthwiseConv2D(size_t subgraphIndex, size_t operatorInd
{
desc.m_BiasEnabled = true;
TensorInfo biasTensorInfo = ToTensorInfo(inputs[2]);
- auto biasTensorAndData = CreateConstTensor(inputs[2], biasTensorInfo);
+ auto biasTensorAndData = CreateConstTensor(inputs[2],
+ biasTensorInfo,
+ armnn::Optional<armnn::PermutationVector&>());
layer = m_Network->AddDepthwiseConvolution2dLayer(desc,
filterTensorAndData.first,
biasTensorAndData.first,
@@ -1228,7 +1258,9 @@ void TfLiteParser::ParseFullyConnected(size_t subgraphIndex, size_t operatorInde
% CHECK_LOCATION().AsString()));
}
- auto filterTensorAndData = CreateConstTensor(inputs[1], filterTensorInfo);
+ auto filterTensorAndData = CreateConstTensor(inputs[1],
+ filterTensorInfo,
+ armnn::Optional<armnn::PermutationVector&>());
armnn::IConnectableLayer* layer;
auto layerName = boost::str(boost::format("FullyConnected:%1%:%2%") % subgraphIndex % operatorIndex);
@@ -1236,7 +1268,9 @@ void TfLiteParser::ParseFullyConnected(size_t subgraphIndex, size_t operatorInde
{
desc.m_BiasEnabled = true;
TensorInfo biasTensorInfo = ToTensorInfo(inputs[2]);
- auto biasTensorAndData = CreateConstTensor(inputs[2], biasTensorInfo);
+ auto biasTensorAndData = CreateConstTensor(inputs[2],
+ biasTensorInfo,
+ armnn::Optional<armnn::PermutationVector&>());
layer = m_Network->AddFullyConnectedLayer(desc,
filterTensorAndData.first,
biasTensorAndData.first,
@@ -1561,9 +1595,25 @@ TfLiteParser::BufferRawPtr TfLiteParser::GetBuffer(const ModelPtr& model, size_t
return model->buffers[bufferIndex].get();
}
+template<typename T>
+std::pair<armnn::ConstTensor, TfLiteParser::SupportedDataStorage>
+TfLiteParser::CreateConstTensorAndStoreData(TfLiteParser::BufferRawPtr bufferPtr,
+ TfLiteParser::TensorRawPtr tensorPtr,
+ armnn::TensorInfo& tensorInfo,
+ armnn::Optional<armnn::PermutationVector&> permutationVector)
+{
+ auto constData = CreateConstTensorImpl<T>(bufferPtr,
+ tensorPtr,
+ tensorInfo,
+ permutationVector);
+ TfLiteParser::SupportedDataStorage storage(std::move(constData.second));
+ return std::make_pair(constData.first, std::move(storage));
+}
+
std::pair<armnn::ConstTensor, TfLiteParser::SupportedDataStorage>
TfLiteParser::CreateConstTensor(TensorRawPtr tensorPtr,
- armnn::TensorInfo & tensorInfo)
+ armnn::TensorInfo& tensorInfo,
+ armnn::Optional<armnn::PermutationVector&> permutationVector)
{
CHECK_TENSOR_PTR(tensorPtr);
auto bufferPtr = GetBuffer(m_Model, tensorPtr->buffer);
@@ -1572,29 +1622,20 @@ TfLiteParser::CreateConstTensor(TensorRawPtr tensorPtr,
switch (tensorInfo.GetDataType())
{
case armnn::DataType::Float32:
- {
- auto constData = CreateConstTensorImpl<float>(bufferPtr,
- tensorPtr,
- tensorInfo);
- SupportedDataStorage storage(std::move(constData.second));
- return std::make_pair(constData.first, std::move(storage));
- }
+ return CreateConstTensorAndStoreData<float>(bufferPtr,
+ tensorPtr,
+ tensorInfo,
+ permutationVector);
case armnn::DataType::QuantisedAsymm8:
- {
- auto constData = CreateConstTensorImpl<uint8_t>(bufferPtr,
- tensorPtr,
- tensorInfo);
- SupportedDataStorage storage(std::move(constData.second));
- return std::make_pair(constData.first, std::move(storage));
- }
+ return CreateConstTensorAndStoreData<uint8_t>(bufferPtr,
+ tensorPtr,
+ tensorInfo,
+ permutationVector);
case armnn::DataType::Signed32:
- {
- auto constData = CreateConstTensorImpl<int32_t>(bufferPtr,
- tensorPtr,
- tensorInfo);
- SupportedDataStorage storage(std::move(constData.second));
- return std::make_pair(constData.first, std::move(storage));
- }
+ return CreateConstTensorAndStoreData<int32_t>(bufferPtr,
+ tensorPtr,
+ tensorInfo,
+ permutationVector);
default:
{
std::stringstream errString;
diff --git a/src/armnnTfLiteParser/TfLiteParser.hpp b/src/armnnTfLiteParser/TfLiteParser.hpp
index e7a7469f1f..9195728ad9 100644
--- a/src/armnnTfLiteParser/TfLiteParser.hpp
+++ b/src/armnnTfLiteParser/TfLiteParser.hpp
@@ -129,17 +129,31 @@ private:
// We don't care about the content, and we want a single datatype to simplify the code.
struct SupportedDataStorage
{
- std::unique_ptr<float[]> m_FloatData;
- std::unique_ptr<uint8_t[]> m_Uint8Data;
- std::unique_ptr<int32_t[]> m_Int32Data;
-
- SupportedDataStorage(std::unique_ptr<float[]> && data);
- SupportedDataStorage(std::unique_ptr<uint8_t[]> && data);
- SupportedDataStorage(std::unique_ptr<int32_t[]> && data);
+ public:
+ // Convenience constructors
+ SupportedDataStorage(std::unique_ptr<float[]>&& data);
+ SupportedDataStorage(std::unique_ptr<uint8_t[]>&& data);
+ SupportedDataStorage(std::unique_ptr<int32_t[]>&& data);
+
+ private:
+ // Pointers to the data buffers
+ std::unique_ptr<float[]> m_FloatData;
+ std::unique_ptr<uint8_t[]> m_Uint8Data;
+ std::unique_ptr<int32_t[]> m_Int32Data;
};
- std::pair<armnn::ConstTensor, SupportedDataStorage> CreateConstTensor(TensorRawPtr tensorPtr,
- armnn::TensorInfo & tensorInfo);
+
+ template<typename T>
+ std::pair<armnn::ConstTensor, TfLiteParser::SupportedDataStorage>
+ CreateConstTensorAndStoreData(TfLiteParser::BufferRawPtr bufferPtr,
+ TfLiteParser::TensorRawPtr tensorPtr,
+ armnn::TensorInfo& tensorInfo,
+ armnn::Optional<armnn::PermutationVector&> permutationVector);
+
+ std::pair<armnn::ConstTensor, SupportedDataStorage>
+ CreateConstTensor(TensorRawPtr tensorPtr,
+ armnn::TensorInfo& tensorInfo,
+ armnn::Optional<armnn::PermutationVector&> permutationVector);
/// The network we're building. Gets cleared after it is passed to the user
armnn::INetworkPtr m_Network;
diff --git a/src/armnnTfParser/TfParser.cpp b/src/armnnTfParser/TfParser.cpp
index 7f04757b75..7a213c0909 100644
--- a/src/armnnTfParser/TfParser.cpp
+++ b/src/armnnTfParser/TfParser.cpp
@@ -1338,13 +1338,9 @@ ParsedTfOperationPtr TfParser::ParseDepthwiseConv2D(const tensorflow::NodeDef& n
uint32_t inputWidth = inputTensorInfo.GetShape()[dataLayoutIndexed.GetWidthIndex()];
// Mappings from TensorFlow filter tensors to the ArmNN filter tensors.
- // Tensorflow weights are [H, W, In, Out].
- // ArmNN weights have to be [Out, H, W, In] when the data layout is NHWC,
- // and [Out, In, H, W] when the data layout is NCHW.
- PermutationVector permutationVector =
- dataLayout == DataLayout::NHWC ?
- std::initializer_list<unsigned int>{ 1, 2, 3, 0 } : // NHWC: [H, W, In, Out] -> [Out, H, W, In]
- std::initializer_list<unsigned int>{ 2, 3, 1, 0 }; // NCHW: [H, W, In, Out] -> [Out, In, H, W]
+ // Tensorflow weights come in the format [H, W, I, M].
+ // ArmNN weights have to be [M, I, H, W].
+ PermutationVector permutationVector{ 2, 3, 1, 0 }; // [H, W, I, M] -> [M, I, H, W]
// Swizzle the tensor using the given permutation vector.
const TensorInfo& weightTensorInfo = weightNode->GetTensorInfo();
@@ -1358,8 +1354,8 @@ ParsedTfOperationPtr TfParser::ParseDepthwiseConv2D(const tensorflow::NodeDef& n
// Create a weight tensor with the newly swizzled data.
ConstTensor weightTensor(weightTensorSwizzledInfo, weightTensorSwizzledData);
- uint32_t weightHeight = weightTensor.GetShape()[dataLayoutIndexed.GetHeightIndex()];
- uint32_t weightWidth = weightTensor.GetShape()[dataLayoutIndexed.GetWidthIndex()];
+ uint32_t weightHeight = weightTensor.GetShape()[2];
+ uint32_t weightWidth = weightTensor.GetShape()[3];
bool padding = false;
TensorInfo outputInfo;
@@ -1393,7 +1389,7 @@ ParsedTfOperationPtr TfParser::ParseDepthwiseConv2D(const tensorflow::NodeDef& n
outputInfo = TensorInfo({ inputTensorInfo.GetShape()[0],
outputHeight,
outputWidth,
- weightTensor.GetShape()[0] * weightTensor.GetShape()[3]},
+ weightTensor.GetShape()[0] * weightTensor.GetShape()[1]},
DataType::Float32);
break;
case DataLayout::NCHW:
diff --git a/src/armnnUtils/ParserPrototxtFixture.hpp b/src/armnnUtils/ParserPrototxtFixture.hpp
index fa21aba479..acb8f82c4d 100644
--- a/src/armnnUtils/ParserPrototxtFixture.hpp
+++ b/src/armnnUtils/ParserPrototxtFixture.hpp
@@ -14,8 +14,6 @@
#include <Network.hpp>
#include <VerificationHelpers.hpp>
-#include <backendsCommon/BackendRegistry.hpp>
-
#include <boost/format.hpp>
#include <string>
diff --git a/src/armnnUtils/Permute.cpp b/src/armnnUtils/Permute.cpp
index 61f4e0e644..6deff90168 100644
--- a/src/armnnUtils/Permute.cpp
+++ b/src/armnnUtils/Permute.cpp
@@ -9,6 +9,7 @@
#include <armnn/Tensor.hpp>
#include <cassert>
+#include <cstring>
namespace
{
@@ -46,10 +47,29 @@ public:
Unroll(0, srcData, dstData, srcEnd, dstEnd);
}
+ void Unroll(const void* srcData, void* dstData, size_t dataTypeSize)
+ {
+ assert(srcData);
+ assert(dstData);
+ assert(dataTypeSize > 0);
+
+ const unsigned char* srcDataPtr = reinterpret_cast<const unsigned char*>(srcData);
+ unsigned char* dstDataPtr = reinterpret_cast<unsigned char*>(dstData);
+
+ const unsigned char* const srcEndPtr = srcDataPtr + m_DstShape.GetNumElements() * dataTypeSize;
+ unsigned char* const dstEndPtr = dstDataPtr + m_DstShape.GetNumElements() * dataTypeSize;
+
+ Unroll(0, srcDataPtr, dstDataPtr, srcEndPtr, dstEndPtr, dataTypeSize);
+ }
+
private:
template <typename T>
void Unroll(size_type dimension, const T* srcData, T* dstData, const T* srcEnd, T* dstEnd)
{
+ assert(srcData);
+ assert(dstData);
+ assert(srcEnd);
+ assert(dstEnd);
assert(srcData < srcEnd);
assert(dstData < dstEnd);
@@ -69,6 +89,35 @@ private:
}
}
+ void Unroll(size_type dimension,
+ const unsigned char* srcData, unsigned char* dstData,
+ const unsigned char* srcEnd, unsigned char* dstEnd,
+ size_t dataTypeSize)
+ {
+ assert(srcData);
+ assert(dstData);
+ assert(srcEnd);
+ assert(dstEnd);
+ assert(srcData < srcEnd);
+ assert(dstData < dstEnd);
+ assert(dataTypeSize > 0);
+
+ if (dimension >= m_DstShape.GetNumDimensions())
+ {
+ ::memcpy(dstData, srcData, dataTypeSize);
+ }
+ else
+ {
+ for (size_type i = 0; i < m_DstShape[dimension]; i++)
+ {
+ Unroll(dimension + 1, srcData, dstData, srcEnd, dstEnd, dataTypeSize);
+
+ srcData += m_SrcStrides[dimension] * dataTypeSize;
+ dstData += m_DstStrides[dimension] * dataTypeSize;
+ }
+ }
+ }
+
armnn::TensorShape m_DstShape;
std::array<size_type, armnn::MaxNumOfTensorDimensions> m_SrcStrides;
std::array<size_type, armnn::MaxNumOfTensorDimensions> m_DstStrides;
@@ -102,6 +151,12 @@ armnn::TensorInfo Permuted(const armnn::TensorInfo& info, const armnn::Permutati
return outInfo;
}
+void Permute(const armnn::TensorShape& dstShape, const armnn::PermutationVector& mappings,
+ const void* src, void* dst, size_t dataTypeSize)
+{
+ PermuteLoop(dstShape, mappings).Unroll(src, dst, dataTypeSize);
+}
+
template <typename T>
void Permute(const armnn::TensorShape& dstShape, const armnn::PermutationVector& mappings, const T* src, T* dst)
{
@@ -117,5 +172,7 @@ template void Permute(const armnn::TensorShape& dstShape, const armnn::Permutati
const uint8_t* src, uint8_t* dst);
template void Permute(const armnn::TensorShape& dstShape, const armnn::PermutationVector& mappings,
const int32_t* src, int32_t* dst);
+template void Permute(const armnn::TensorShape& dstShape, const armnn::PermutationVector& mappings,
+ const bool* src, bool* dst);
} // namespace armnnUtils
diff --git a/src/armnnUtils/Permute.hpp b/src/armnnUtils/Permute.hpp
index 700ddc72ce..4e4319822b 100644
--- a/src/armnnUtils/Permute.hpp
+++ b/src/armnnUtils/Permute.hpp
@@ -14,7 +14,10 @@ armnn::TensorShape Permuted(const armnn::TensorShape& srcShape, const armnn::Per
armnn::TensorInfo Permuted(const armnn::TensorInfo& info, const armnn::PermutationVector& mappings);
+void Permute(const armnn::TensorShape& dstShape, const armnn::PermutationVector& mappings,
+ const void* src, void* dst, size_t dataTypeSize);
+
template <typename T>
void Permute(const armnn::TensorShape& dstShape, const armnn::PermutationVector& mappings, const T* src, T* dst);
-} // namespace armnnUtils \ No newline at end of file
+} // namespace armnnUtils
diff --git a/src/backends/aclCommon/ArmComputeTensorUtils.cpp b/src/backends/aclCommon/ArmComputeTensorUtils.cpp
index a2d7d8c797..32af42f7e1 100644
--- a/src/backends/aclCommon/ArmComputeTensorUtils.cpp
+++ b/src/backends/aclCommon/ArmComputeTensorUtils.cpp
@@ -109,19 +109,6 @@ arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tenso
return arm_compute::TensorInfo(aclTensorShape, 1, aclDataType, aclQuantizationInfo);
}
-arm_compute::DataLayout ConvertDataLayout(armnn::DataLayout dataLayout)
-{
- switch(dataLayout)
- {
- case armnn::DataLayout::NHWC : return arm_compute::DataLayout::NHWC;
-
- case armnn::DataLayout::NCHW : return arm_compute::DataLayout::NCHW;
-
- default: throw InvalidArgumentException("Unknown armnn::DataLayout: [" +
- std::to_string(static_cast<int>(dataLayout)) + "]");
- }
-}
-
arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo,
armnn::DataLayout dataLayout)
{
@@ -136,6 +123,19 @@ arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tenso
return clTensorInfo;
}
+arm_compute::DataLayout ConvertDataLayout(armnn::DataLayout dataLayout)
+{
+ switch(dataLayout)
+ {
+ case armnn::DataLayout::NHWC : return arm_compute::DataLayout::NHWC;
+
+ case armnn::DataLayout::NCHW : return arm_compute::DataLayout::NCHW;
+
+ default: throw InvalidArgumentException("Unknown armnn::DataLayout: [" +
+ std::to_string(static_cast<int>(dataLayout)) + "]");
+ }
+}
+
arm_compute::PoolingLayerInfo BuildArmComputePoolingLayerInfo(const Pooling2dDescriptor& descriptor)
{
using arm_compute::PoolingType;
diff --git a/src/backends/aclCommon/ArmComputeTensorUtils.hpp b/src/backends/aclCommon/ArmComputeTensorUtils.hpp
index fbd850c687..fa455b746b 100644
--- a/src/backends/aclCommon/ArmComputeTensorUtils.hpp
+++ b/src/backends/aclCommon/ArmComputeTensorUtils.hpp
@@ -36,16 +36,16 @@ arm_compute::TensorShape BuildArmComputeTensorShape(const armnn::TensorShape& te
/// armnn::ITensorInfo.
arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo);
-/// Utility function used to convert armnn::DataLayout to arm_compute::DataLayout
-/// armnn::DataLayout.
-arm_compute::DataLayout ConvertDataLayout(armnn::DataLayout dataLayout);
-
/// Utility function used to setup an arm_compute::ITensorInfo object whose dimensions are based on the given
/// armnn::ITensorInfo.
/// armnn::DataLayout.
arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo,
armnn::DataLayout dataLayout);
+/// Utility function used to convert armnn::DataLayout to arm_compute::DataLayout
+/// armnn::DataLayout.
+arm_compute::DataLayout ConvertDataLayout(armnn::DataLayout dataLayout);
+
/// Utility function used to setup an arm_compute::PoolingLayerInfo object from an armnn::Pooling2dDescriptor.
arm_compute::PoolingLayerInfo BuildArmComputePoolingLayerInfo(const Pooling2dDescriptor& descriptor);
diff --git a/src/backends/backendsCommon/CMakeLists.txt b/src/backends/backendsCommon/CMakeLists.txt
index f29563093c..b120f51184 100644
--- a/src/backends/backendsCommon/CMakeLists.txt
+++ b/src/backends/backendsCommon/CMakeLists.txt
@@ -27,6 +27,7 @@ list(APPEND armnnBackendsCommon_sources
WorkloadFactory.hpp
Workload.hpp
WorkloadInfo.hpp
+ WorkloadUtils.cpp
WorkloadUtils.hpp
)
diff --git a/src/backends/backendsCommon/CpuTensorHandle.cpp b/src/backends/backendsCommon/CpuTensorHandle.cpp
index fe0c634e7c..9dcd3f38df 100644
--- a/src/backends/backendsCommon/CpuTensorHandle.cpp
+++ b/src/backends/backendsCommon/CpuTensorHandle.cpp
@@ -18,7 +18,7 @@ ConstCpuTensorHandle::ConstCpuTensorHandle(const TensorInfo& tensorInfo)
}
template <>
-const void* ConstCpuTensorHandle::GetConstTensor() const
+const void* ConstCpuTensorHandle::GetConstTensor<void>() const
{
return m_Memory;
}
@@ -30,7 +30,7 @@ CpuTensorHandle::CpuTensorHandle(const TensorInfo& tensorInfo)
}
template <>
-void* CpuTensorHandle::GetTensor() const
+void* CpuTensorHandle::GetTensor<void>() const
{
return m_MutableMemory;
}
diff --git a/src/backends/backendsCommon/CpuTensorHandle.hpp b/src/backends/backendsCommon/CpuTensorHandle.hpp
index ae13d6c439..b88a0d385b 100644
--- a/src/backends/backendsCommon/CpuTensorHandle.hpp
+++ b/src/backends/backendsCommon/CpuTensorHandle.hpp
@@ -72,6 +72,9 @@ private:
const void* m_Memory;
};
+template<>
+const void* ConstCpuTensorHandle::GetConstTensor<void>() const;
+
// Abstract specialization of ConstCpuTensorHandle that allows write access to the same data.
class CpuTensorHandle : public ConstCpuTensorHandle
{
@@ -99,6 +102,9 @@ private:
void* m_MutableMemory;
};
+template <>
+void* CpuTensorHandle::GetTensor<void>() const;
+
// A CpuTensorHandle that owns the wrapped memory region.
class ScopedCpuTensorHandle : public CpuTensorHandle
{
diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp
index 8847b4efbf..1dac498c11 100644
--- a/src/backends/backendsCommon/WorkloadData.cpp
+++ b/src/backends/backendsCommon/WorkloadData.cpp
@@ -593,9 +593,10 @@ void DepthwiseConvolution2dQueueDescriptor::Validate(const WorkloadInfo& workloa
const unsigned int channelIndex = (m_Parameters.m_DataLayout == DataLayout::NCHW) ? 1 : 3;
- //inputChannels * channelMultiplier should be equal to outputChannels.
+ // Expected weight shape: [ M, I, H, W ] - This shape does NOT depend on the data layout
+ // inputChannels * channelMultiplier should be equal to outputChannels.
const unsigned int numWeightChannelMultiplier = m_Weight->GetTensorInfo().GetShape()[0];
- const unsigned int numWeightInputChannels = m_Weight->GetTensorInfo().GetShape()[channelIndex];
+ const unsigned int numWeightInputChannels = m_Weight->GetTensorInfo().GetShape()[1];
const unsigned int numWeightOutputChannels = workloadInfo.m_OutputTensorInfos[0].GetShape()[channelIndex];
if (numWeightChannelMultiplier * numWeightInputChannels != numWeightOutputChannels)
{
diff --git a/src/backends/backendsCommon/WorkloadUtils.cpp b/src/backends/backendsCommon/WorkloadUtils.cpp
new file mode 100644
index 0000000000..fa387a7a0b
--- /dev/null
+++ b/src/backends/backendsCommon/WorkloadUtils.cpp
@@ -0,0 +1,111 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "WorkloadUtils.hpp"
+
+namespace armnn
+{
+
+armnn::ConstTensor PermuteTensor(const ConstCpuTensorHandle* tensor,
+ const PermutationVector& permutationVector,
+ void* permuteBuffer)
+{
+ BOOST_ASSERT_MSG(tensor, "Invalid input tensor");
+ BOOST_ASSERT_MSG(permuteBuffer, "Invalid permute buffer");
+
+ TensorInfo tensorInfo = tensor->GetTensorInfo();
+
+ if (permutationVector.GetSize() > 0)
+ {
+ tensorInfo = armnnUtils::Permuted(tensorInfo, permutationVector);
+ armnnUtils::Permute(tensorInfo.GetShape(), permutationVector,
+ tensor->GetConstTensor<void>(), permuteBuffer,
+ GetDataTypeSize(tensorInfo.GetDataType()));
+ }
+ else
+ {
+ ::memcpy(permuteBuffer, tensor->GetConstTensor<void>(), tensorInfo.GetNumBytes());
+ }
+
+ return ConstTensor(tensorInfo, permuteBuffer);
+}
+
+void ReshapeWeightsForAcl(TensorInfo& weightInfo, DataLayout dataLayout)
+{
+ // Reshape the weights in-place
+ const TensorShape& weightShape = weightInfo.GetShape();
+ switch (dataLayout)
+ {
+ case DataLayout::NHWC:
+ // The data layout is NHWC, reshape from [ H, W, I, M ] to [ 1, H, W, I * M ]
+ weightInfo.SetShape({ 1,
+ weightShape[0],
+ weightShape[1],
+ weightShape[2] * weightShape[3] });
+ break;
+ case DataLayout::NCHW:
+ default:
+ // The data layout is NCHW, reshape from [ M, I, H, W ] to [ 1, I * M, H, W, ]
+ weightInfo.SetShape({ 1,
+ weightShape[0] * weightShape[1],
+ weightShape[2],
+ weightShape[3] });
+ break;
+ }
+}
+
+TensorInfo ConvertWeightTensorInfoFromArmnnToAcl(const TensorInfo& weightInfo, DataLayout dataLayout)
+{
+ // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
+ // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
+
+ // 1. Permute the weights if necessary
+ // If the data layout is NCHW no permutation is necessary, as a reshape to [ 1, I * M, H, W ] can be better done
+ // starting from the current shape of [ M, I, H, W ]
+ TensorInfo weightPermutedInfo(weightInfo);
+ if (dataLayout == DataLayout::NHWC)
+ {
+ // The data layout is NHWC, then permute the weights from [ M, I, H, W ] to [ H, W, I, M ]
+ PermutationVector permutationVector{ 3, 2, 0, 1 };
+ weightPermutedInfo = armnnUtils::Permuted(weightInfo, permutationVector);
+ }
+
+ // 2. Reshape the weights
+ ReshapeWeightsForAcl(weightPermutedInfo, dataLayout);
+
+ // 3. Return the permuted weight info
+ return weightPermutedInfo;
+}
+
+armnn::ConstTensor ConvertWeightTensorFromArmnnToAcl(const ConstCpuTensorHandle* weightTensor,
+ DataLayout dataLayout,
+ void* permuteBuffer)
+{
+ BOOST_ASSERT_MSG(weightTensor, "Invalid input tensor");
+ BOOST_ASSERT_MSG(permuteBuffer, "Invalid permute buffer");
+
+ // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
+ // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
+
+ // 1. Permute the weights if necessary
+ // If the data layout is NCHW no permutation is necessary, as a reshape to [ 1, I * M, H, W ] can be better done
+ // starting from the current shape of [ M, I, H, W ]
+ // If no permutation is necessary, leave the permutation vector empty
+ PermutationVector permutationVector{};
+ if (dataLayout == DataLayout::NHWC)
+ {
+ // The data layout is NHWC, then permute the weights from [ M, I, H, W ] to [ H, W, I, M ]
+ permutationVector = { 3, 2, 0, 1 };
+ }
+ ConstTensor weightPermuted = PermuteTensor(weightTensor, permutationVector, permuteBuffer);
+
+ // 2. Reshape the weights
+ ReshapeWeightsForAcl(weightPermuted.GetInfo(), dataLayout);
+
+ // 3. Return both the tensor and the allocated storage to ensure that the data stays alive
+ return weightPermuted;
+}
+
+} // namespace armnn
diff --git a/src/backends/backendsCommon/WorkloadUtils.hpp b/src/backends/backendsCommon/WorkloadUtils.hpp
index 2b07b2b0d2..a1a8d2a475 100644
--- a/src/backends/backendsCommon/WorkloadUtils.hpp
+++ b/src/backends/backendsCommon/WorkloadUtils.hpp
@@ -6,35 +6,42 @@
#pragma once
#include "ITensorHandle.hpp"
+#include "CpuTensorHandle.hpp"
#include <armnn/Tensor.hpp>
+#include <Permute.hpp>
+#include <Profiling.hpp>
+#include <Half.hpp>
+
#include <boost/cast.hpp>
namespace armnn
{
namespace
{
+
template<typename ArrayType, typename Arg>
void AssignValues(unsigned int num, unsigned int& idx, const ArrayType& array, Arg& arg)
{
- if (idx >= num)
- {
- return;
- }
+ if (idx >= num)
+ {
+ return;
+ }
- arg = array[(num - 1) - idx];
- idx++;
-};
+ arg = array[(num - 1) - idx];
+ idx++;
+}
template<typename T, typename ArrayType, typename ...Args>
void AssignValues(unsigned int num, unsigned int idx, const ArrayType& array, T& assignee, Args& ... args)
{
- AssignValues(num, idx, array, assignee);
+ AssignValues(num, idx, array, assignee);
- AssignValues(num, idx, array, args...);
+ AssignValues(num, idx, array, args...);
}
-} // namespace
+
+} // anonymous namespace
template<typename CopyFunc>
void CopyTensorContentsGeneric(const ITensorHandle* srcTensor, ITensorHandle* dstTensor, CopyFunc copy)
@@ -142,4 +149,16 @@ void GatherTensorHandlePairs(const DescriptorType& descriptor,
}
}
-} //namespace armnn \ No newline at end of file
+armnn::ConstTensor PermuteTensor(const ConstCpuTensorHandle* tensor,
+ const PermutationVector& permutationVector,
+ void* permuteBuffer);
+
+void ReshapeWeightsForAcl(TensorInfo& weightInfo, DataLayout dataLayout);
+
+TensorInfo ConvertWeightTensorInfoFromArmnnToAcl(const TensorInfo& weightInfo, DataLayout dataLayout);
+
+armnn::ConstTensor ConvertWeightTensorFromArmnnToAcl(const ConstCpuTensorHandle* weightTensor,
+ DataLayout dataLayout,
+ void* permuteBuffer);
+
+} //namespace armnn
diff --git a/src/backends/backendsCommon/common.mk b/src/backends/backendsCommon/common.mk
index a66b5c4581..4e79bfcd7e 100644
--- a/src/backends/backendsCommon/common.mk
+++ b/src/backends/backendsCommon/common.mk
@@ -14,7 +14,8 @@ COMMON_SOURCES := \
MemCopyWorkload.cpp \
OutputHandler.cpp \
WorkloadData.cpp \
- WorkloadFactory.cpp
+ WorkloadFactory.cpp \
+ WorkloadUtils.cpp
# COMMON_TEST_SOURCES contains the list of files to be included
# in the Android unit test build (armnn-tests) and it is picked
diff --git a/src/backends/backendsCommon/test/Conv2dTestImpl.hpp b/src/backends/backendsCommon/test/Conv2dTestImpl.hpp
index 37fa0f63d6..2ff66b08d5 100755
--- a/src/backends/backendsCommon/test/Conv2dTestImpl.hpp
+++ b/src/backends/backendsCommon/test/Conv2dTestImpl.hpp
@@ -327,7 +327,7 @@ LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestImpl(
armnn::IWorkloadFactory& workloadFactory,
const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
const boost::multi_array<T, 4>& input,
- const boost::multi_array<T, 4>& originalKernel,
+ const boost::multi_array<T, 4>& kernel,
const boost::multi_array<B, 1>& bias,
const boost::multi_array<T, 4>& outputExpected,
float qScale,
@@ -344,10 +344,10 @@ LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestImpl(
unsigned int inputChannels = boost::numeric_cast<unsigned int>(input.shape()[1]);
unsigned int inputHeight = boost::numeric_cast<unsigned int>(input.shape()[2]);
unsigned int inputWidth = boost::numeric_cast<unsigned int>(input.shape()[3]);
- unsigned int kernelChanMul = boost::numeric_cast<unsigned int>(originalKernel.shape()[0]);
- unsigned int kernelChannels = boost::numeric_cast<unsigned int>(originalKernel.shape()[1]);
- unsigned int kernelHeight = boost::numeric_cast<unsigned int>(originalKernel.shape()[2]);
- unsigned int kernelWidth = boost::numeric_cast<unsigned int>(originalKernel.shape()[3]);
+ unsigned int kernelChanMul = boost::numeric_cast<unsigned int>(kernel.shape()[0]);
+ unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[1]);
+ unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[2]);
+ unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[3]);
unsigned int outputNum = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]);
unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]);
unsigned int outputHeight = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]);
@@ -362,8 +362,7 @@ LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestImpl(
armnnUtils::GetTensorInfo<T>(inputNum, inputChannels, inputHeight, inputWidth, layout);
armnn::TensorInfo outputTensorInfo =
armnnUtils::GetTensorInfo<T>(outputNum, outputChannels, outputHeight, outputWidth, layout);
- armnn::TensorInfo kernelDesc =
- armnnUtils::GetTensorInfo<T>(kernelChanMul, kernelChannels, kernelHeight, kernelWidth, layout);
+ armnn::TensorInfo kernelDesc({kernelChanMul, kernelChannels, kernelHeight, kernelWidth}, armnn::GetDataType<T>());
armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, armnn::GetDataType<B>());
// Set quantization parameters if the requested type is a quantized type.
@@ -423,13 +422,6 @@ LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestImpl(
armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
- // Permute the kernel if necessary
- boost::multi_array<T, 4> kernel = boost::multi_array<T, 4>(originalKernel);
- if (layout == armnn::DataLayout::NHWC)
- {
- armnnUtils::Permute(kernelDesc.GetShape(), NCHWToNHWC, originalKernel.data(), kernel.data());
- }
-
AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
@@ -484,6 +476,7 @@ LayerTestResult<T, 4> DepthwiseConvolution2dDepthMul1TestImpl(
unsigned int kernelHeight = 3;
unsigned int kernelWidth = 3;
unsigned int kernelChannels = inputChannels;
+ unsigned int kernelDepthMultiplier = 1;
unsigned int outputHeight = 1;
unsigned int outputWidth = 1;
@@ -494,7 +487,8 @@ LayerTestResult<T, 4> DepthwiseConvolution2dDepthMul1TestImpl(
armnnUtils::GetTensorInfo<T>(inputNum, inputChannels, inputHeight, inputWidth, layout);
armnn::TensorInfo outputTensorInfo =
armnnUtils::GetTensorInfo<T>(outputNum, outputChannels, outputHeight, outputWidth, layout);
- armnn::TensorInfo kernelDesc = armnnUtils::GetTensorInfo<T>(1, outputChannels, kernelHeight, kernelWidth, layout);
+ armnn::TensorInfo kernelDesc({kernelDepthMultiplier, kernelChannels, kernelHeight, kernelWidth},
+ armnn::GetDataType<T>());
armnn::TensorInfo biasDesc({ outputChannels }, armnn::GetDataType<B>());
// Set quantization parameters if the requested type is a quantized type.
@@ -543,12 +537,6 @@ LayerTestResult<T, 4> DepthwiseConvolution2dDepthMul1TestImpl(
0.f, 0.f, 0.f,
-1.f, 0.f, -1.f,
}));
- if (layout == armnn::DataLayout::NHWC)
- {
- std::vector<T> tmp(kernelData.size());
- armnnUtils::Permute(kernelDesc.GetShape(), NCHWToNHWC, kernelData.data(), tmp.data());
- kernelData = tmp;
- }
auto kernel = MakeTensor<T, 4>(kernelDesc, kernelData);
// Manually calculated.
@@ -642,8 +630,8 @@ LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(
inputBatchSize, inputChannels, inputHeight, inputWidth, layout);
armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo<T>(
outputBatchSize, outputChannels, outputHeight, outputWidth, layout);
- armnn::TensorInfo kernelDesc = armnnUtils::GetTensorInfo<T>(
- depthMultiplier, inputChannels, kernelHeight, kernelWidth, layout);
+ armnn::TensorInfo kernelDesc({depthMultiplier, inputChannels, kernelHeight, kernelWidth},
+ armnn::GetDataType<T>());
armnn::TensorInfo biasDesc({outputChannels}, armnn::GetDataType<B>());
// Set quantization parameters if the requested type is a quantized type.
@@ -692,7 +680,7 @@ LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(
{0, 2, 1, -1}));
auto bias = MakeTensor<B, 1>(biasDesc, biasV);
- std::vector<T> originalKernelData = std::vector<T>(
+ std::vector<T> kernelData = std::vector<T>(
QuantizedVector<T>(kernelDesc.GetQuantizationScale(), kernelDesc.GetQuantizationOffset(), {
1, 1, 1,
1, -1, 1,
@@ -717,12 +705,8 @@ LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(
0, 1, 0,
0, 0, 0,
0, 0, 0
+
}));
- std::vector<T> kernelData = originalKernelData;
- if (layout == armnn::DataLayout::NHWC)
- {
- armnnUtils::Permute(kernelDesc.GetShape(), NCHWToNHWC, originalKernelData.data(), kernelData.data());
- }
auto kernel = MakeTensor<T, 4>(kernelDesc, kernelData);
// Manually calculated.
@@ -840,9 +824,9 @@ LayerTestResult<T, 4> DepthwiseConvolution2dNhwcTestImpl(
unsigned int inputWidth = boost::numeric_cast<unsigned int>(input.shape()[2]);
unsigned int kernelChanMul = boost::numeric_cast<unsigned int>(kernel.shape()[0]);
- unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[3]);
- unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[1]);
- unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[2]);
+ unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[1]);
+ unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[2]);
+ unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[3]);
unsigned int outputNum = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]);
unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]);
@@ -853,7 +837,7 @@ LayerTestResult<T, 4> DepthwiseConvolution2dNhwcTestImpl(
armnn::TensorInfo inputTensorInfo({inputNum, inputHeight, inputWidth, inputChannels}, armnn::GetDataType<T>());
armnn::TensorInfo outputTensorInfo({outputNum, outputHeight, outputWidth, outputChannels},
armnn::GetDataType<T>());
- armnn::TensorInfo kernelDesc({kernelChanMul, kernelHeight, kernelWidth, kernelChannels}, armnn::GetDataType<T>());
+ armnn::TensorInfo kernelDesc({kernelChanMul, kernelChannels, kernelHeight, kernelWidth}, armnn::GetDataType<T>());
armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, armnn::GetDataType<B>());
// Set quantization parameters if the requested type is a quantized type.
@@ -1068,10 +1052,10 @@ LayerTestResult<T,4> CompareConvolution2dTestImpl(
armnn::TensorInfo kernelDesc;
armnn::TensorInfo biasDesc;
- unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth};
- unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth};
- unsigned int kernelShape[] = {outputChannels, inputChannels, kernelHeight, kernelWidth};
- unsigned int biasShape[] = {outputChannels};
+ unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth};
+ unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth};
+ unsigned int kernelShape[] = {outputChannels, inputChannels, kernelHeight, kernelWidth};
+ unsigned int biasShape[] = {outputChannels};
inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::GetDataType<T>());
outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::GetDataType<T>());
@@ -1171,19 +1155,17 @@ LayerTestResult<T, 4> CompareDepthwiseConvolution2dTestImpl(
std::vector<unsigned int> inputShape;
std::vector<unsigned int> outputShape;
- std::vector<unsigned int> kernelShape;
- std::vector<unsigned int> biasShape= { outputChannels };
+ std::vector<unsigned int> kernelShape{ channelMultiplier, inputChannels, kernelHeight, kernelWidth };
+ std::vector<unsigned int> biasShape{ outputChannels };
switch (layout.GetDataLayout())
{
case armnn::DataLayout::NCHW:
inputShape = { inputNum, inputChannels, inputHeight, inputWidth };
outputShape = { outputNum, outputChannels, outputHeight, outputWidth };
- kernelShape = { channelMultiplier, inputChannels, kernelHeight, kernelWidth };
break;
case armnn::DataLayout ::NHWC:
inputShape = { inputNum, inputHeight, inputWidth, inputChannels };
outputShape = { outputNum, outputHeight, outputWidth, outputChannels };
- kernelShape = { channelMultiplier, kernelHeight, kernelWidth, inputChannels };
break;
default:
throw armnn::InvalidArgumentException("unknown data layout ["
diff --git a/src/backends/backendsCommon/test/LayerTests.cpp b/src/backends/backendsCommon/test/LayerTests.cpp
index ddf0d0b587..819b9d6e37 100755
--- a/src/backends/backendsCommon/test/LayerTests.cpp
+++ b/src/backends/backendsCommon/test/LayerTests.cpp
@@ -661,28 +661,18 @@ LayerTestResult<T, 4> DepthwiseConvolution2dNhwcTestCommon(
24, 49
})));
- armnn::TensorInfo kernelTensorInfo({ 1, 4, 4, 2}, armnn::GetDataType<T>());
+ armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, armnn::GetDataType<T>());
auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
QuantizedVector<T>(kernelTensorInfo.GetQuantizationScale(), kernelTensorInfo.GetQuantizationOffset(), {
- 32, 16,
- 31, 15,
- 30, 14,
- 29, 13,
-
- 28, 12,
- 27, 11,
- 26, 10,
- 25, 9,
-
- 24, 8,
- 23, 7,
- 22, 6,
- 21, 5,
-
- 20, 4,
- 19, 3,
- 18, 2,
- 17, 1
+ 32, 31, 30, 29,
+ 28, 27, 26, 25,
+ 24, 23, 22, 21,
+ 20, 19, 18, 17,
+
+ 16, 15, 14, 13,
+ 12, 11, 10, 9,
+ 8, 7, 6, 5,
+ 4, 3, 2, 1
})));
armnn::TensorInfo outputTensorInfo({ 1, 5, 5, 2}, armnn::GetDataType<T>());
diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp
index 9cadbf09ac..1745b8297a 100644
--- a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp
+++ b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp
@@ -12,6 +12,7 @@
#include <aclCommon/ArmComputeTensorUtils.hpp>
#include <cl/ClTensorHandle.hpp>
#include <backendsCommon/CpuTensorHandle.hpp>
+#include <backendsCommon/WorkloadUtils.hpp>
#include <arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h>
@@ -21,14 +22,23 @@ namespace armnn
using namespace armcomputetensorutils;
arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& input,
- const TensorInfo& output,
- const DepthwiseConvolution2dDescriptor& descriptor,
- const TensorInfo& weights,
- const Optional<TensorInfo>& biases)
+ const TensorInfo& output,
+ const DepthwiseConvolution2dDescriptor& descriptor,
+ const TensorInfo& weights,
+ const Optional<TensorInfo>& biases)
{
- const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
+ const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
- const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
+
+ // ArmNN's weight format is [ M, I, H, W ]
+ const unsigned int aclDepthMultiplier = weights.GetShape()[0];
+
+ // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
+ // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
+ TensorInfo weightsPermuted = ConvertWeightTensorInfoFromArmnnToAcl(weights, descriptor.m_DataLayout);
+
+ // Convert the weights into the compute library format
+ const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
arm_compute::TensorInfo aclBiasesInfo;
arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
@@ -42,7 +52,6 @@ arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& inp
}
const arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor);
- const unsigned int aclDepthMultiplier = weights.GetShape()[0];
return arm_compute::CLDepthwiseConvolutionLayer::validate(&aclInputInfo,
&aclWeightsInfo,
@@ -57,10 +66,18 @@ ClDepthwiseConvolutionWorkload::ClDepthwiseConvolutionWorkload(
const WorkloadInfo& info)
: BaseWorkload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info)
{
- auto& weightInfo = m_Data.m_Weight->GetTensorInfo();
+ // Allocate a buffer for the swizzling of the weight tensor
+ std::unique_ptr<unsigned char[]> permuteBuffer(new unsigned char[m_Data.m_Weight->GetTensorInfo().GetNumBytes()]);
+
+ // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
+ // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
+ ConstTensor weightPermuted = ConvertWeightTensorFromArmnnToAcl(m_Data.m_Weight,
+ m_Data.m_Parameters.m_DataLayout,
+ permuteBuffer.get());
+ // Convert the weights into the compute library format
m_KernelTensor = std::make_unique<arm_compute::CLTensor>();
- BuildArmComputeTensor(*m_KernelTensor, weightInfo, m_Data.m_Parameters.m_DataLayout);
+ BuildArmComputeTensor(*m_KernelTensor, weightPermuted.GetInfo(), m_Data.m_Parameters.m_DataLayout);
if (m_Data.m_Parameters.m_BiasEnabled)
{
@@ -86,13 +103,14 @@ ClDepthwiseConvolutionWorkload::ClDepthwiseConvolutionWorkload(
input.info()->set_data_layout(aclDataLayout);
output.info()->set_data_layout(aclDataLayout);
- const unsigned int depthMultiplier = weightInfo.GetShape()[0];
+ // ArmNN's weight format is [ M, I, H, W ]
+ auto& weightInfo = m_Data.m_Weight->GetTensorInfo();
- const unsigned int widthIndex = (m_Data.m_Parameters.m_DataLayout == DataLayout::NCHW) ? 3 : 2;
- const unsigned int heightIndex = (m_Data.m_Parameters.m_DataLayout == DataLayout::NCHW) ? 2 : 1;
+ // Get the depth multiplier
+ const unsigned int depthMultiplier = weightInfo.GetShape()[0];
- //Check for optimisation opportunities.
- bool use3x3Optimisation = (weightInfo.GetShape()[widthIndex] == 3) && (weightInfo.GetShape()[heightIndex] == 3);
+ // Check for optimisation opportunities.
+ bool use3x3Optimisation = (weightInfo.GetShape()[2] == 3) && (weightInfo.GetShape()[3] == 3);
if (use3x3Optimisation)
{
m_DepthwiseConvolutionLayer = std::make_unique<arm_compute::CLDepthwiseConvolutionLayer3x3>();
@@ -118,7 +136,8 @@ ClDepthwiseConvolutionWorkload::ClDepthwiseConvolutionWorkload(
BOOST_ASSERT(m_DepthwiseConvolutionLayer);
- InitializeArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight);
+ ScopedCpuTensorHandle weightsPermutedHandle(weightPermuted);
+ InitializeArmComputeClTensorData(*m_KernelTensor, &weightsPermutedHandle);
if (m_BiasTensor)
{
diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp
index 6cad12cba8..be26359662 100644
--- a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp
+++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp
@@ -8,10 +8,7 @@
#include <aclCommon/ArmComputeTensorUtils.hpp>
#include <neon/NeonLayerSupport.hpp>
#include <backendsCommon/CpuTensorHandle.hpp>
-
-#include <DataLayoutIndexed.hpp>
-
-using namespace armnnUtils;
+#include <backendsCommon/WorkloadUtils.hpp>
namespace armnn
{
@@ -19,17 +16,23 @@ namespace armnn
using namespace armcomputetensorutils;
arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& input,
- const TensorInfo& output,
- const DepthwiseConvolution2dDescriptor& descriptor,
- const TensorInfo& weights,
- const Optional<TensorInfo>& biases)
+ const TensorInfo& output,
+ const DepthwiseConvolution2dDescriptor& descriptor,
+ const TensorInfo& weights,
+ const Optional<TensorInfo>& biases)
{
- const arm_compute::TensorInfo aclInputInfo =
- BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
- const arm_compute::TensorInfo aclOutputInfo =
- BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
- const arm_compute::TensorInfo aclWeightsInfo =
- BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
+ const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
+ const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
+
+ // ArmNN's weight format is [ M, I, H, W ]
+ const unsigned int aclDepthMultiplier = weights.GetShape()[0];
+
+ // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
+ // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
+ TensorInfo weightsPermuted = ConvertWeightTensorInfoFromArmnnToAcl(weights, descriptor.m_DataLayout);
+
+ // Convert the weights into the compute library format
+ const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
arm_compute::TensorInfo aclBiasesInfo;
arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
@@ -42,9 +45,7 @@ arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& i
optionalAclBiasesInfo = &aclBiasesInfo;
}
- const arm_compute::PadStrideInfo aclPadStrideInfo =
- BuildArmComputePadStrideInfo(descriptor);
- const unsigned int aclDepthMultiplier = weights.GetShape()[0];
+ const arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor);
return arm_compute::NEDepthwiseConvolutionLayer::validate(&aclInputInfo,
&aclWeightsInfo,
@@ -59,14 +60,21 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload(
const WorkloadInfo& info)
: BaseWorkload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info)
{
- const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo();
+ // ArmNN's weight format is [ M, I, H, W ]
+ auto& weightInfo = m_Data.m_Weight->GetTensorInfo();
- m_KernelTensor = std::make_unique<arm_compute::Tensor>();
- BuildArmComputeTensor(*m_KernelTensor, weightInfo, m_Data.m_Parameters.m_DataLayout);
+ // Allocate a buffer for the swizzling of the weight tensor
+ std::unique_ptr<unsigned char[]> permuteBuffer(new unsigned char[m_Data.m_Weight->GetTensorInfo().GetNumBytes()]);
- INeonTensorHandle* inputTensorHandle = static_cast<INeonTensorHandle*>(m_Data.m_Inputs[0]);
- INeonTensorHandle* outputTensorHandle = static_cast<INeonTensorHandle*>(m_Data.m_Outputs[0]);
- DataLayoutIndexed dataLayoutIndex(m_Data.m_Parameters.m_DataLayout);
+ // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
+ // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
+ ConstTensor weightPermuted = ConvertWeightTensorFromArmnnToAcl(m_Data.m_Weight,
+ m_Data.m_Parameters.m_DataLayout,
+ permuteBuffer.get());
+
+ // Convert the weights into the compute library format
+ m_KernelTensor = std::make_unique<arm_compute::Tensor>();
+ BuildArmComputeTensor(*m_KernelTensor, weightPermuted.GetInfo(), m_Data.m_Parameters.m_DataLayout);
if (m_Data.m_Parameters.m_BiasEnabled)
{
@@ -84,6 +92,9 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload(
m_Data.ValidateInputsOutputs("NeonDepthwiseConvolutionWorkload", 1, 1);
+ INeonTensorHandle* inputTensorHandle = static_cast<INeonTensorHandle*>(m_Data.m_Inputs[0]);
+ INeonTensorHandle* outputTensorHandle = static_cast<INeonTensorHandle*>(m_Data.m_Outputs[0]);
+
arm_compute::ITensor& input = inputTensorHandle->GetTensor();
arm_compute::ITensor& output = outputTensorHandle->GetTensor();
@@ -91,9 +102,11 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload(
input.info()->set_data_layout(aclDataLayout);
output.info()->set_data_layout(aclDataLayout);
- bool use3x3Optimisation = weightInfo.GetShape()[dataLayoutIndex.GetWidthIndex()] == 3 &&
- weightInfo.GetShape()[dataLayoutIndex.GetHeightIndex()] == 3;
+ // Get the depth multiplier
+ const unsigned int depthMultiplier = weightInfo.GetShape()[0];
+ // Check for optimisation opportunities.
+ bool use3x3Optimisation = (weightInfo.GetShape()[2] == 3) && (weightInfo.GetShape()[3] == 3);
if (use3x3Optimisation)
{
m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer3x3>();
@@ -102,7 +115,8 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload(
m_KernelTensor.get(),
m_BiasTensor.get(),
&output,
- padStrideInfo);
+ padStrideInfo,
+ depthMultiplier);
}
else
{
@@ -112,12 +126,14 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload(
m_KernelTensor.get(),
m_BiasTensor.get(),
&output,
- padStrideInfo);
+ padStrideInfo,
+ depthMultiplier);
}
BOOST_ASSERT(m_pDepthwiseConvolutionLayer);
- InitializeArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight);
+ ScopedCpuTensorHandle weightsPermutedHandle(weightPermuted);
+ InitializeArmComputeTensorData(*m_KernelTensor, &weightsPermutedHandle);
if (m_Data.m_Parameters.m_BiasEnabled)
{
diff --git a/src/backends/reference/workloads/ConvImpl.hpp b/src/backends/reference/workloads/ConvImpl.hpp
index 704bc368d2..5c07f57ec0 100644
--- a/src/backends/reference/workloads/ConvImpl.hpp
+++ b/src/backends/reference/workloads/ConvImpl.hpp
@@ -57,7 +57,6 @@ static void ConvImpl(ConvData data,
float filterScale,
int32_t filterOffset,
const BiasType* biasData,
- InputType* outputData,
float outputScale,
int32_t outputOffset,
const TensorInfo& filterInfo,
@@ -68,10 +67,10 @@ static void ConvImpl(ConvData data,
throw InvalidArgumentException("Bias is enabled but the bias data is invalid");
}
- const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]);
- const TensorInfo& outputInfo0 = GetTensorInfo(data.m_Outputs[0]);
+ const TensorInfo& inputInfo = GetTensorInfo(data.m_Inputs[0]);
+ const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[0]);
- TensorBufferArrayView<InputType> output(outputInfo0.GetShape(),
+ TensorBufferArrayView<InputType> output(outputInfo.GetShape(),
GetOutputTensorData<InputType>(0, data),
data.m_Parameters.m_DataLayout);
@@ -81,18 +80,18 @@ static void ConvImpl(ConvData data,
const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex();
const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex();
- unsigned int depthMult = depthwise ? filterInfo.GetShape()[0] : 1;
- unsigned int channelsInput = filterInfo.GetShape()[channelsIndex];
- unsigned int channelsOutput = depthwise ? channelsInput * depthMult : filterInfo.GetShape()[0];
+ unsigned int depthMultiplier = depthwise ? filterInfo.GetShape()[0] : 1;
+ unsigned int inputChannels = depthwise ? filterInfo.GetShape()[1] : filterInfo.GetShape()[channelsIndex];
+ unsigned int outputChannels = depthwise ? inputChannels * depthMultiplier : filterInfo.GetShape()[0];
- unsigned int batchSize = outputInfo0.GetShape()[0];
- unsigned int heightOutput = outputInfo0.GetShape()[heightIndex];
- unsigned int widthOutput = outputInfo0.GetShape()[widthIndex];
- unsigned int heightInput = inputInfo0.GetShape()[heightIndex];
- unsigned int widthInput = inputInfo0.GetShape()[widthIndex];
+ unsigned int batchSize = outputInfo.GetShape()[0];
+ unsigned int outputHeight = outputInfo.GetShape()[heightIndex];
+ unsigned int outputWidth = outputInfo.GetShape()[widthIndex];
+ unsigned int inputHeight = inputInfo.GetShape()[heightIndex];
+ unsigned int inputWidth = inputInfo.GetShape()[widthIndex];
- unsigned int heightFilter = filterInfo.GetShape()[heightIndex];
- unsigned int widthFilter = filterInfo.GetShape()[widthIndex];
+ unsigned int filterHeight = depthwise ? filterInfo.GetShape()[2] : filterInfo.GetShape()[heightIndex];
+ unsigned int filterWidth = depthwise ? filterInfo.GetShape()[3] : filterInfo.GetShape()[widthIndex];
unsigned int paddingTop = data.m_Parameters.m_PadTop;
unsigned int paddingLeft = data.m_Parameters.m_PadLeft;
@@ -102,68 +101,56 @@ static void ConvImpl(ConvData data,
// The world's least efficient convolution.
for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)
{
- for (unsigned int cOutput = 0; cOutput < channelsOutput; cOutput++)
+ for (unsigned int cOutput = 0; cOutput < outputChannels; cOutput++)
{
- for (unsigned int yOutput = 0; yOutput < heightOutput; yOutput++)
+ for (unsigned int yOutput = 0; yOutput < outputHeight; yOutput++)
{
- for (unsigned int xOutput = 0; xOutput < widthOutput; xOutput++)
+ for (unsigned int xOutput = 0; xOutput < outputWidth; xOutput++)
{
// This loop goes over each output element.
AccumulatorType sum = AccumulatorType();
// For depthwise, each output channel corresponds to exactly one input channel.
// For normal, must loop over each input channel.
- for (unsigned int cInput = 0; cInput < (depthwise ? 1 : channelsInput); cInput++)
+ for (unsigned int cInput = 0; cInput < (depthwise ? 1 : inputChannels); cInput++)
{
unsigned int depthwiseMultiplierIdx = 0;
if (depthwise)
{
- cInput = cOutput / depthMult;
- depthwiseMultiplierIdx = cOutput % depthMult;
+ cInput = cOutput / depthMultiplier;
+ depthwiseMultiplierIdx = cOutput % depthMultiplier;
}
- for (unsigned int yFilter = 0; yFilter < heightFilter; yFilter++)
+ for (unsigned int yFilter = 0; yFilter < filterHeight; yFilter++)
{
- for (unsigned int xFilter = 0; xFilter < widthFilter; xFilter++)
+ for (unsigned int xFilter = 0; xFilter < filterWidth; xFilter++)
{
// This loop goes over each input element for each output element.
- unsigned int filterIndex;
+ unsigned int filterIndex = 0;
// Since dimensionality of kernel depends on depthwiseness, so does index.
if (depthwise)
{
- if (data.m_Parameters.m_DataLayout == DataLayout::NHWC)
- {
- filterIndex = depthwiseMultiplierIdx * heightFilter * widthFilter
- * channelsInput +
- yFilter * widthFilter * channelsInput +
- xFilter * channelsInput +
- cInput;
- }
- else
- {
- filterIndex = depthwiseMultiplierIdx * widthFilter * heightFilter
- * channelsInput +
- cInput * widthFilter * heightFilter +
- yFilter * widthFilter +
- xFilter;
- }
+ filterIndex = depthwiseMultiplierIdx * filterWidth * filterHeight * inputChannels +
+ cInput * filterWidth * filterHeight +
+ yFilter * filterWidth +
+ xFilter;
}
else
{
if (data.m_Parameters.m_DataLayout == DataLayout::NHWC)
{
- filterIndex = cOutput * heightFilter * widthFilter * channelsInput +
- yFilter * widthFilter * channelsInput +
- xFilter * channelsInput +
+ filterIndex = cOutput * filterHeight * filterWidth * inputChannels +
+ yFilter * filterWidth * inputChannels +
+ xFilter * inputChannels +
cInput;
}
else
{
- filterIndex = cOutput * widthFilter * heightFilter * channelsInput +
- cInput * widthFilter * heightFilter +
- yFilter * widthFilter +
+ filterIndex = cOutput * filterWidth * filterHeight * inputChannels +
+ cInput * filterWidth * filterHeight +
+ yFilter * filterWidth +
xFilter;
}
}
@@ -177,8 +164,8 @@ static void ConvImpl(ConvData data,
AccumulatorType inputValue;
// Check if we're in the padding.
- if (yInput < paddingTop || yInput >= heightInput + paddingTop ||
- xInput < paddingLeft || xInput >= widthInput + paddingLeft )
+ if (yInput < paddingTop || yInput >= inputHeight + paddingTop ||
+ xInput < paddingLeft || xInput >= inputWidth + paddingLeft )
{
inputValue = AccumulatorType();
}
@@ -188,17 +175,17 @@ static void ConvImpl(ConvData data,
if (data.m_Parameters.m_DataLayout == DataLayout::NHWC)
{
- inputIndex = batchIdx * heightInput * widthInput * channelsInput +
- (yInput - paddingTop) * widthInput * channelsInput +
- (xInput - paddingLeft) * channelsInput +
+ inputIndex = batchIdx * inputHeight * inputWidth * inputChannels +
+ (yInput - paddingTop) * inputWidth * inputChannels +
+ (xInput - paddingLeft) * inputChannels +
cInput;
}
else
{
- inputIndex = batchIdx * widthInput * heightInput * channelsInput +
- widthInput * heightInput * cInput +
- widthInput * (yInput - paddingTop) +
+ inputIndex = batchIdx * inputWidth * inputHeight * inputChannels +
+ inputWidth * inputHeight * cInput +
+ inputWidth * (yInput - paddingTop) +
xInput - paddingLeft;
}
diff --git a/src/backends/reference/workloads/RefConvolution2dFloat32Workload.cpp b/src/backends/reference/workloads/RefConvolution2dFloat32Workload.cpp
index 20905646d7..7b298df967 100644
--- a/src/backends/reference/workloads/RefConvolution2dFloat32Workload.cpp
+++ b/src/backends/reference/workloads/RefConvolution2dFloat32Workload.cpp
@@ -23,15 +23,13 @@ void RefConvolution2dFloat32Workload::Execute() const
{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvolution2dFloat32Workload_Execute");
- float* outputData = GetOutputTensorDataFloat(0, m_Data);
const float* inputData = GetInputTensorDataFloat(0, m_Data);
- const float* weightData = m_Weight->template GetConstTensor<float>();
- const float* biasData = m_Data.m_Parameters.m_BiasEnabled ?
- m_Bias->template GetConstTensor<float>() : nullptr;
+ const float* filterData = m_Weight->template GetConstTensor<float>();
+ const float* biasData = m_Data.m_Parameters.m_BiasEnabled ? m_Bias->template GetConstTensor<float>() : nullptr;
const TensorInfo& filterInfo = m_Weight->GetTensorInfo();
ConvImpl<armnn::Convolution2dQueueDescriptor, float, float, float>(
- m_Data, inputData, 0.0f, 0, weightData, 0.0f, 0, biasData, outputData, 0.0f, 0, filterInfo);
+ m_Data, inputData, 0.0f, 0, filterData, 0.0f, 0, biasData, 0.0f, 0, filterInfo);
}
} //namespace armnn
diff --git a/src/backends/reference/workloads/RefConvolution2dUint8Workload.cpp b/src/backends/reference/workloads/RefConvolution2dUint8Workload.cpp
index 881e9bf6b0..af2c7ad0d6 100644
--- a/src/backends/reference/workloads/RefConvolution2dUint8Workload.cpp
+++ b/src/backends/reference/workloads/RefConvolution2dUint8Workload.cpp
@@ -27,10 +27,7 @@ void RefConvolution2dUint8Workload::Execute() const
const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
const uint8_t* weightsData = m_Weight->template GetConstTensor<uint8_t>();
const TensorInfo& weightsInfo = GetTensorInfo(m_Weight.get());
- const int32_t* biasData = m_Data.m_Parameters.m_BiasEnabled ?
- m_Bias->template GetConstTensor<int32_t>() :
- nullptr;
- uint8_t* outputData = GetOutputTensorDataU8(0, m_Data);
+ const int32_t* biasData = m_Data.m_Parameters.m_BiasEnabled ? m_Bias->template GetConstTensor<int32_t>() : nullptr;
const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
const TensorInfo& filterInfo = m_Weight->GetTensorInfo();
@@ -39,7 +36,7 @@ void RefConvolution2dUint8Workload::Execute() const
inputData, inputInfo.GetQuantizationScale(), inputInfo.GetQuantizationOffset(),
weightsData, weightsInfo.GetQuantizationScale(), weightsInfo.GetQuantizationOffset(),
biasData,
- outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), filterInfo);
+ outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), filterInfo);
}
} //namespace armnn
diff --git a/src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.cpp b/src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.cpp
index e89013b9bd..756e958753 100644
--- a/src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.cpp
+++ b/src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.cpp
@@ -23,15 +23,13 @@ void RefDepthwiseConvolution2dFloat32Workload::Execute() const
{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDepthwiseConvolution2dFloat32Workload_Execute");
- float* outputData = GetOutputTensorDataFloat(0, m_Data);
const float* inputData = GetInputTensorDataFloat(0, m_Data);
const float* weightData = m_Weight->template GetConstTensor<float>();
- const float* biasData = m_Data.m_Parameters.m_BiasEnabled ?
- m_Bias->template GetConstTensor<float>() : nullptr;
+ const float* biasData = m_Data.m_Parameters.m_BiasEnabled ? m_Bias->template GetConstTensor<float>() : nullptr;
const TensorInfo& filterInfo = m_Weight->GetTensorInfo();
ConvImpl<armnn::DepthwiseConvolution2dQueueDescriptor, float, float, float>
- (m_Data, inputData, 0.0f, 0, weightData, 0.0f, 0, biasData, outputData, 0.0f, 0, filterInfo, true);
+ (m_Data, inputData, 0.0f, 0, weightData, 0.0f, 0, biasData, 0.0f, 0, filterInfo, true);
}
} //namespace armnn
diff --git a/src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.cpp b/src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.cpp
index e8e501d6ae..629b729ea6 100644
--- a/src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.cpp
+++ b/src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.cpp
@@ -28,10 +28,7 @@ void RefDepthwiseConvolution2dUint8Workload::Execute() const
const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
const uint8_t* weightsData = m_Weight->template GetConstTensor<uint8_t>();
const TensorInfo& weightsInfo = GetTensorInfo(m_Weight.get());
- const int32_t* biasData = m_Data.m_Parameters.m_BiasEnabled ?
- m_Bias->template GetConstTensor<int32_t>() :
- nullptr;
- uint8_t* outputData = GetOutputTensorDataU8(0, m_Data);
+ const int32_t* biasData = m_Data.m_Parameters.m_BiasEnabled ? m_Bias->template GetConstTensor<int32_t>() : nullptr;
const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
const TensorInfo& filterInfo = m_Weight->GetTensorInfo();
@@ -40,7 +37,7 @@ void RefDepthwiseConvolution2dUint8Workload::Execute() const
inputData, inputInfo.GetQuantizationScale(), inputInfo.GetQuantizationOffset(),
weightsData, weightsInfo.GetQuantizationScale(), weightsInfo.GetQuantizationOffset(),
biasData,
- outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), filterInfo, true);
+ outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), filterInfo, true);
}
} //namespace armnn