diff options
Diffstat (limited to 'src')
27 files changed, 529 insertions, 291 deletions
diff --git a/src/armnn/layers/DepthwiseConvolution2dLayer.cpp b/src/armnn/layers/DepthwiseConvolution2dLayer.cpp index 95d4690d4f..c4edc2022f 100644 --- a/src/armnn/layers/DepthwiseConvolution2dLayer.cpp +++ b/src/armnn/layers/DepthwiseConvolution2dLayer.cpp @@ -24,7 +24,7 @@ DepthwiseConvolution2dLayer::DepthwiseConvolution2dLayer(const DepthwiseConvolut { } -std::unique_ptr<IWorkload> DepthwiseConvolution2dLayer::CreateWorkload(const Graph& graph, +std::unique_ptr<IWorkload> DepthwiseConvolution2dLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const { // on this level constant data should not be released.. @@ -59,34 +59,40 @@ std::vector<TensorShape> DepthwiseConvolution2dLayer::InferOutputShapes(const std::vector<TensorShape>& inputShapes) const { BOOST_ASSERT(inputShapes.size() == 2); - const TensorShape& inputShape = inputShapes[0]; - const TensorShape filterShape = inputShapes[1]; + const TensorShape& inputShape = inputShapes[0]; + const TensorShape& filterShape = inputShapes[1]; BOOST_ASSERT_MSG(inputShape.GetNumDimensions() == 4, "Convolutions will always have 4D input."); DataLayoutIndexed dataLayoutIndex(m_Param.m_DataLayout); - unsigned int inWidth = inputShape[dataLayoutIndex.GetWidthIndex()]; - unsigned int inHeight = inputShape[dataLayoutIndex.GetHeightIndex()]; - unsigned int inBatchSize = inputShape[0]; + unsigned int inputBatchSize = inputShape[0]; + unsigned int inputHeight = inputShape[dataLayoutIndex.GetHeightIndex()]; + unsigned int inputWidth = inputShape[dataLayoutIndex.GetWidthIndex()]; + unsigned int inputChannels = inputShape[dataLayoutIndex.GetChannelsIndex()]; - unsigned int filterWidth = filterShape[dataLayoutIndex.GetWidthIndex()]; - unsigned int readWidth = (inWidth + m_Param.m_PadLeft + m_Param.m_PadRight) - (filterWidth); - unsigned int outWidth = 1 + (readWidth / m_Param.m_StrideX); + // Expected filter shape: [ M, I, H, W ] - This shape does NOT depend on the data layout + // Namely: [ depth multiplier, input channels, filter height, filter width ] + // Output channels = input channels * depthMultiplier - unsigned int filterHeight = filterShape[dataLayoutIndex.GetHeightIndex()]; - unsigned int readHeight = (inHeight + m_Param.m_PadTop + m_Param.m_PadBottom) - (filterHeight); - unsigned int outHeight = 1 + (readHeight / m_Param.m_StrideY); unsigned int depthMultiplier = filterShape[0]; - unsigned int outChannels = filterShape[dataLayoutIndex.GetChannelsIndex()] * depthMultiplier; - unsigned int outBatchSize = inBatchSize; + unsigned int filterHeight = filterShape[2]; + unsigned int readHeight = (inputHeight + m_Param.m_PadTop + m_Param.m_PadBottom) - filterHeight; + unsigned int outputHeight = 1 + (readHeight / m_Param.m_StrideY); + + unsigned int filterWidth = filterShape[3]; + unsigned int readWidth = (inputWidth + m_Param.m_PadLeft + m_Param.m_PadRight) - filterWidth; + unsigned int outputWidth = 1 + (readWidth / m_Param.m_StrideX); + + unsigned int outputChannels = inputChannels * depthMultiplier; + unsigned int outputBatchSize = inputBatchSize; TensorShape tensorShape = m_Param.m_DataLayout == armnn::DataLayout::NHWC ? - TensorShape( { outBatchSize, outHeight, outWidth, outChannels } ) : - TensorShape( { outBatchSize, outChannels, outHeight, outWidth }); + TensorShape{ outputBatchSize, outputHeight, outputWidth, outputChannels } : + TensorShape{ outputBatchSize, outputChannels, outputHeight, outputWidth }; - return std::vector<TensorShape>({ tensorShape }); + return std::vector<TensorShape>{ tensorShape }; } void DepthwiseConvolution2dLayer::ValidateTensorShapesFromInputs() diff --git a/src/armnn/test/CreateWorkload.hpp b/src/armnn/test/CreateWorkload.hpp index 3dc18b9e18..f52f6055ca 100644 --- a/src/armnn/test/CreateWorkload.hpp +++ b/src/armnn/test/CreateWorkload.hpp @@ -414,18 +414,18 @@ std::unique_ptr<DepthwiseConvolution2dFloat32Workload> CreateDepthwiseConvolutio { // Creates the layer we're testing. DepthwiseConvolution2dDescriptor layerDesc; - layerDesc.m_PadLeft = 1; - layerDesc.m_PadRight = 2; - layerDesc.m_PadTop = 1; - layerDesc.m_PadBottom = 2; - layerDesc.m_StrideX = 1; - layerDesc.m_StrideY = 1; - layerDesc.m_BiasEnabled = false; - layerDesc.m_DataLayout = dataLayout; + layerDesc.m_PadLeft = 1; + layerDesc.m_PadRight = 2; + layerDesc.m_PadTop = 1; + layerDesc.m_PadBottom = 2; + layerDesc.m_StrideX = 1; + layerDesc.m_StrideY = 1; + layerDesc.m_BiasEnabled = false; + layerDesc.m_DataLayout = dataLayout; DepthwiseConvolution2dLayer* const layer = graph.AddLayer<DepthwiseConvolution2dLayer>(layerDesc, "layer"); - layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({1, 4, 4, 2}, DataType)); + layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({1, 2, 4, 4}, DataType)); // [ M, I, H, W ] layer->m_Weight->Allocate(); // Creates extra layers. @@ -457,7 +457,7 @@ std::unique_ptr<DepthwiseConvolution2dFloat32Workload> CreateDepthwiseConvolutio BOOST_TEST(queueDescriptor.m_Inputs.size() == 1); BOOST_TEST(queueDescriptor.m_Outputs.size() == 1); - BOOST_TEST((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo({1, 4, 4, 2}, DataType))); + BOOST_TEST((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo({1, 2, 4, 4}, DataType))); // Returns so we can do extra, backend-specific tests. return workload; diff --git a/src/armnn/test/OptimizerTests.cpp b/src/armnn/test/OptimizerTests.cpp index 29d1702c64..80addb4bfd 100644 --- a/src/armnn/test/OptimizerTests.cpp +++ b/src/armnn/test/OptimizerTests.cpp @@ -898,7 +898,7 @@ BOOST_AUTO_TEST_CASE(DepthwiseConv2dValidateTensorShapesFromInputsNhwc) { Graph graph; const unsigned int inputShape[] = { 1, 3, 3, 2 }; - const unsigned int weightsShape[] = { 1, 3, 3, 2 }; + const unsigned int weightsShape[] = { 1, 2, 3, 3 }; const unsigned int outputShape[] = { 1, 1, 1, 2 }; CreateDepthwiseConvolution2dGraph(graph, inputShape, weightsShape, outputShape, DataLayout::NHWC); diff --git a/src/armnnTfLiteParser/TfLiteParser.cpp b/src/armnnTfLiteParser/TfLiteParser.cpp index 49bc73708f..3b50476ca2 100644 --- a/src/armnnTfLiteParser/TfLiteParser.cpp +++ b/src/armnnTfLiteParser/TfLiteParser.cpp @@ -401,7 +401,8 @@ template<typename T> std::pair<armnn::ConstTensor, std::unique_ptr<T[]>> CreateConstTensorImpl(TfLiteParser::BufferRawPtr bufferPtr, TfLiteParser::TensorRawPtr tensorPtr, - armnn::TensorInfo & tensorInfo) + armnn::TensorInfo& tensorInfo, + armnn::Optional<armnn::PermutationVector&> permutationVector) { BOOST_ASSERT_MSG(tensorPtr != nullptr, "tensorPtr is null"); BOOST_ASSERT_MSG(bufferPtr != nullptr, @@ -409,7 +410,20 @@ CreateConstTensorImpl(TfLiteParser::BufferRawPtr bufferPtr, boost::format("Buffer for buffer:%1% is null") % tensorPtr->buffer).c_str()); std::unique_ptr<T[]> data(new T[tensorInfo.GetNumElements()]); - ::memcpy(data.get(), bufferPtr->data.data(), tensorInfo.GetNumBytes()); + + if (permutationVector.has_value() && permutationVector.value().GetSize() > 0) + { + tensorInfo = armnnUtils::Permuted(tensorInfo, permutationVector.value()); + armnnUtils::Permute(tensorInfo.GetShape(), + permutationVector.value(), + reinterpret_cast<const T *>(bufferPtr->data.data()), + data.get()); + } + else + { + ::memcpy(data.get(), bufferPtr->data.data(), tensorInfo.GetNumBytes()); + } + return std::make_pair(ConstTensor(tensorInfo, data.get()), std::move(data)); } @@ -660,7 +674,9 @@ void TfLiteParser::ParseConv2D(size_t subgraphIndex, size_t operatorIndex) CalcPadding(inputHeight, filterHeight, desc.m_StrideY, desc.m_PadTop, desc.m_PadBottom, options->padding); CalcPadding(inputWidth, filterWidth, desc.m_StrideX, desc.m_PadLeft, desc.m_PadRight, options->padding); - auto filterTensorAndData = CreateConstTensor(inputs[1], filterTensorInfo); + auto filterTensorAndData = CreateConstTensor(inputs[1], + filterTensorInfo, + armnn::Optional<armnn::PermutationVector&>()); armnn::IConnectableLayer* layer; auto layerName = boost::str(boost::format("Conv2D:%1%:%2%") % subgraphIndex % operatorIndex); @@ -669,7 +685,9 @@ void TfLiteParser::ParseConv2D(size_t subgraphIndex, size_t operatorIndex) { desc.m_BiasEnabled = true; armnn::TensorInfo biasTensorInfo = ToTensorInfo(inputs[2]); - auto biasTensorAndData = CreateConstTensor(inputs[2], biasTensorInfo); + auto biasTensorAndData = CreateConstTensor(inputs[2], + biasTensorInfo, + armnn::Optional<armnn::PermutationVector&>()); layer = m_Network->AddConvolution2dLayer(desc, filterTensorAndData.first, biasTensorAndData.first, @@ -723,17 +741,27 @@ void TfLiteParser::ParseDepthwiseConv2D(size_t subgraphIndex, size_t operatorInd armnn::TensorInfo inputTensorInfo = ToTensorInfo(inputs[0]); armnn::TensorInfo filterTensorInfo = ToTensorInfo(inputs[1]); - // assuming input is NHWC + // Assuming input is NHWC unsigned int inputHeight = inputTensorInfo.GetShape()[1]; unsigned int inputWidth = inputTensorInfo.GetShape()[2]; - // assuming the filter is OHWI : Output, H, W, Input + + // TensorflowLite weights come in the format [1, H, W, I * M] unsigned int filterHeight = filterTensorInfo.GetShape()[1]; unsigned int filterWidth = filterTensorInfo.GetShape()[2]; + // Reshape weights as [ H, W, I, M ] + filterTensorInfo.SetShape({ filterHeight, + filterWidth, + inputTensorInfo.GetShape()[3], + filterTensorInfo.GetShape()[3] / inputTensorInfo.GetShape()[3] }); + + // Mappings from TensorflowLite filter tensors to the ArmNN filter tensors (ArmNN weights have to be [M, I, H, W]) + PermutationVector permutationVector{ 2, 3, 1, 0 }; // [H, W, I, M] -> [M, I, H, W] + CalcPadding(inputHeight, filterHeight, desc.m_StrideY, desc.m_PadTop, desc.m_PadBottom, options->padding); CalcPadding(inputWidth, filterWidth, desc.m_StrideX, desc.m_PadLeft, desc.m_PadRight, options->padding); - auto filterTensorAndData = CreateConstTensor(inputs[1], filterTensorInfo); + auto filterTensorAndData = CreateConstTensor(inputs[1], filterTensorInfo, permutationVector); armnn::IConnectableLayer* layer; auto layerName = boost::str(boost::format("DepthwiseConv2D:%1%:%2%") % subgraphIndex % operatorIndex); @@ -741,7 +769,9 @@ void TfLiteParser::ParseDepthwiseConv2D(size_t subgraphIndex, size_t operatorInd { desc.m_BiasEnabled = true; TensorInfo biasTensorInfo = ToTensorInfo(inputs[2]); - auto biasTensorAndData = CreateConstTensor(inputs[2], biasTensorInfo); + auto biasTensorAndData = CreateConstTensor(inputs[2], + biasTensorInfo, + armnn::Optional<armnn::PermutationVector&>()); layer = m_Network->AddDepthwiseConvolution2dLayer(desc, filterTensorAndData.first, biasTensorAndData.first, @@ -1228,7 +1258,9 @@ void TfLiteParser::ParseFullyConnected(size_t subgraphIndex, size_t operatorInde % CHECK_LOCATION().AsString())); } - auto filterTensorAndData = CreateConstTensor(inputs[1], filterTensorInfo); + auto filterTensorAndData = CreateConstTensor(inputs[1], + filterTensorInfo, + armnn::Optional<armnn::PermutationVector&>()); armnn::IConnectableLayer* layer; auto layerName = boost::str(boost::format("FullyConnected:%1%:%2%") % subgraphIndex % operatorIndex); @@ -1236,7 +1268,9 @@ void TfLiteParser::ParseFullyConnected(size_t subgraphIndex, size_t operatorInde { desc.m_BiasEnabled = true; TensorInfo biasTensorInfo = ToTensorInfo(inputs[2]); - auto biasTensorAndData = CreateConstTensor(inputs[2], biasTensorInfo); + auto biasTensorAndData = CreateConstTensor(inputs[2], + biasTensorInfo, + armnn::Optional<armnn::PermutationVector&>()); layer = m_Network->AddFullyConnectedLayer(desc, filterTensorAndData.first, biasTensorAndData.first, @@ -1561,9 +1595,25 @@ TfLiteParser::BufferRawPtr TfLiteParser::GetBuffer(const ModelPtr& model, size_t return model->buffers[bufferIndex].get(); } +template<typename T> +std::pair<armnn::ConstTensor, TfLiteParser::SupportedDataStorage> +TfLiteParser::CreateConstTensorAndStoreData(TfLiteParser::BufferRawPtr bufferPtr, + TfLiteParser::TensorRawPtr tensorPtr, + armnn::TensorInfo& tensorInfo, + armnn::Optional<armnn::PermutationVector&> permutationVector) +{ + auto constData = CreateConstTensorImpl<T>(bufferPtr, + tensorPtr, + tensorInfo, + permutationVector); + TfLiteParser::SupportedDataStorage storage(std::move(constData.second)); + return std::make_pair(constData.first, std::move(storage)); +} + std::pair<armnn::ConstTensor, TfLiteParser::SupportedDataStorage> TfLiteParser::CreateConstTensor(TensorRawPtr tensorPtr, - armnn::TensorInfo & tensorInfo) + armnn::TensorInfo& tensorInfo, + armnn::Optional<armnn::PermutationVector&> permutationVector) { CHECK_TENSOR_PTR(tensorPtr); auto bufferPtr = GetBuffer(m_Model, tensorPtr->buffer); @@ -1572,29 +1622,20 @@ TfLiteParser::CreateConstTensor(TensorRawPtr tensorPtr, switch (tensorInfo.GetDataType()) { case armnn::DataType::Float32: - { - auto constData = CreateConstTensorImpl<float>(bufferPtr, - tensorPtr, - tensorInfo); - SupportedDataStorage storage(std::move(constData.second)); - return std::make_pair(constData.first, std::move(storage)); - } + return CreateConstTensorAndStoreData<float>(bufferPtr, + tensorPtr, + tensorInfo, + permutationVector); case armnn::DataType::QuantisedAsymm8: - { - auto constData = CreateConstTensorImpl<uint8_t>(bufferPtr, - tensorPtr, - tensorInfo); - SupportedDataStorage storage(std::move(constData.second)); - return std::make_pair(constData.first, std::move(storage)); - } + return CreateConstTensorAndStoreData<uint8_t>(bufferPtr, + tensorPtr, + tensorInfo, + permutationVector); case armnn::DataType::Signed32: - { - auto constData = CreateConstTensorImpl<int32_t>(bufferPtr, - tensorPtr, - tensorInfo); - SupportedDataStorage storage(std::move(constData.second)); - return std::make_pair(constData.first, std::move(storage)); - } + return CreateConstTensorAndStoreData<int32_t>(bufferPtr, + tensorPtr, + tensorInfo, + permutationVector); default: { std::stringstream errString; diff --git a/src/armnnTfLiteParser/TfLiteParser.hpp b/src/armnnTfLiteParser/TfLiteParser.hpp index e7a7469f1f..9195728ad9 100644 --- a/src/armnnTfLiteParser/TfLiteParser.hpp +++ b/src/armnnTfLiteParser/TfLiteParser.hpp @@ -129,17 +129,31 @@ private: // We don't care about the content, and we want a single datatype to simplify the code. struct SupportedDataStorage { - std::unique_ptr<float[]> m_FloatData; - std::unique_ptr<uint8_t[]> m_Uint8Data; - std::unique_ptr<int32_t[]> m_Int32Data; - - SupportedDataStorage(std::unique_ptr<float[]> && data); - SupportedDataStorage(std::unique_ptr<uint8_t[]> && data); - SupportedDataStorage(std::unique_ptr<int32_t[]> && data); + public: + // Convenience constructors + SupportedDataStorage(std::unique_ptr<float[]>&& data); + SupportedDataStorage(std::unique_ptr<uint8_t[]>&& data); + SupportedDataStorage(std::unique_ptr<int32_t[]>&& data); + + private: + // Pointers to the data buffers + std::unique_ptr<float[]> m_FloatData; + std::unique_ptr<uint8_t[]> m_Uint8Data; + std::unique_ptr<int32_t[]> m_Int32Data; }; - std::pair<armnn::ConstTensor, SupportedDataStorage> CreateConstTensor(TensorRawPtr tensorPtr, - armnn::TensorInfo & tensorInfo); + + template<typename T> + std::pair<armnn::ConstTensor, TfLiteParser::SupportedDataStorage> + CreateConstTensorAndStoreData(TfLiteParser::BufferRawPtr bufferPtr, + TfLiteParser::TensorRawPtr tensorPtr, + armnn::TensorInfo& tensorInfo, + armnn::Optional<armnn::PermutationVector&> permutationVector); + + std::pair<armnn::ConstTensor, SupportedDataStorage> + CreateConstTensor(TensorRawPtr tensorPtr, + armnn::TensorInfo& tensorInfo, + armnn::Optional<armnn::PermutationVector&> permutationVector); /// The network we're building. Gets cleared after it is passed to the user armnn::INetworkPtr m_Network; diff --git a/src/armnnTfParser/TfParser.cpp b/src/armnnTfParser/TfParser.cpp index 7f04757b75..7a213c0909 100644 --- a/src/armnnTfParser/TfParser.cpp +++ b/src/armnnTfParser/TfParser.cpp @@ -1338,13 +1338,9 @@ ParsedTfOperationPtr TfParser::ParseDepthwiseConv2D(const tensorflow::NodeDef& n uint32_t inputWidth = inputTensorInfo.GetShape()[dataLayoutIndexed.GetWidthIndex()]; // Mappings from TensorFlow filter tensors to the ArmNN filter tensors. - // Tensorflow weights are [H, W, In, Out]. - // ArmNN weights have to be [Out, H, W, In] when the data layout is NHWC, - // and [Out, In, H, W] when the data layout is NCHW. - PermutationVector permutationVector = - dataLayout == DataLayout::NHWC ? - std::initializer_list<unsigned int>{ 1, 2, 3, 0 } : // NHWC: [H, W, In, Out] -> [Out, H, W, In] - std::initializer_list<unsigned int>{ 2, 3, 1, 0 }; // NCHW: [H, W, In, Out] -> [Out, In, H, W] + // Tensorflow weights come in the format [H, W, I, M]. + // ArmNN weights have to be [M, I, H, W]. + PermutationVector permutationVector{ 2, 3, 1, 0 }; // [H, W, I, M] -> [M, I, H, W] // Swizzle the tensor using the given permutation vector. const TensorInfo& weightTensorInfo = weightNode->GetTensorInfo(); @@ -1358,8 +1354,8 @@ ParsedTfOperationPtr TfParser::ParseDepthwiseConv2D(const tensorflow::NodeDef& n // Create a weight tensor with the newly swizzled data. ConstTensor weightTensor(weightTensorSwizzledInfo, weightTensorSwizzledData); - uint32_t weightHeight = weightTensor.GetShape()[dataLayoutIndexed.GetHeightIndex()]; - uint32_t weightWidth = weightTensor.GetShape()[dataLayoutIndexed.GetWidthIndex()]; + uint32_t weightHeight = weightTensor.GetShape()[2]; + uint32_t weightWidth = weightTensor.GetShape()[3]; bool padding = false; TensorInfo outputInfo; @@ -1393,7 +1389,7 @@ ParsedTfOperationPtr TfParser::ParseDepthwiseConv2D(const tensorflow::NodeDef& n outputInfo = TensorInfo({ inputTensorInfo.GetShape()[0], outputHeight, outputWidth, - weightTensor.GetShape()[0] * weightTensor.GetShape()[3]}, + weightTensor.GetShape()[0] * weightTensor.GetShape()[1]}, DataType::Float32); break; case DataLayout::NCHW: diff --git a/src/armnnUtils/ParserPrototxtFixture.hpp b/src/armnnUtils/ParserPrototxtFixture.hpp index fa21aba479..acb8f82c4d 100644 --- a/src/armnnUtils/ParserPrototxtFixture.hpp +++ b/src/armnnUtils/ParserPrototxtFixture.hpp @@ -14,8 +14,6 @@ #include <Network.hpp> #include <VerificationHelpers.hpp> -#include <backendsCommon/BackendRegistry.hpp> - #include <boost/format.hpp> #include <string> diff --git a/src/armnnUtils/Permute.cpp b/src/armnnUtils/Permute.cpp index 61f4e0e644..6deff90168 100644 --- a/src/armnnUtils/Permute.cpp +++ b/src/armnnUtils/Permute.cpp @@ -9,6 +9,7 @@ #include <armnn/Tensor.hpp> #include <cassert> +#include <cstring> namespace { @@ -46,10 +47,29 @@ public: Unroll(0, srcData, dstData, srcEnd, dstEnd); } + void Unroll(const void* srcData, void* dstData, size_t dataTypeSize) + { + assert(srcData); + assert(dstData); + assert(dataTypeSize > 0); + + const unsigned char* srcDataPtr = reinterpret_cast<const unsigned char*>(srcData); + unsigned char* dstDataPtr = reinterpret_cast<unsigned char*>(dstData); + + const unsigned char* const srcEndPtr = srcDataPtr + m_DstShape.GetNumElements() * dataTypeSize; + unsigned char* const dstEndPtr = dstDataPtr + m_DstShape.GetNumElements() * dataTypeSize; + + Unroll(0, srcDataPtr, dstDataPtr, srcEndPtr, dstEndPtr, dataTypeSize); + } + private: template <typename T> void Unroll(size_type dimension, const T* srcData, T* dstData, const T* srcEnd, T* dstEnd) { + assert(srcData); + assert(dstData); + assert(srcEnd); + assert(dstEnd); assert(srcData < srcEnd); assert(dstData < dstEnd); @@ -69,6 +89,35 @@ private: } } + void Unroll(size_type dimension, + const unsigned char* srcData, unsigned char* dstData, + const unsigned char* srcEnd, unsigned char* dstEnd, + size_t dataTypeSize) + { + assert(srcData); + assert(dstData); + assert(srcEnd); + assert(dstEnd); + assert(srcData < srcEnd); + assert(dstData < dstEnd); + assert(dataTypeSize > 0); + + if (dimension >= m_DstShape.GetNumDimensions()) + { + ::memcpy(dstData, srcData, dataTypeSize); + } + else + { + for (size_type i = 0; i < m_DstShape[dimension]; i++) + { + Unroll(dimension + 1, srcData, dstData, srcEnd, dstEnd, dataTypeSize); + + srcData += m_SrcStrides[dimension] * dataTypeSize; + dstData += m_DstStrides[dimension] * dataTypeSize; + } + } + } + armnn::TensorShape m_DstShape; std::array<size_type, armnn::MaxNumOfTensorDimensions> m_SrcStrides; std::array<size_type, armnn::MaxNumOfTensorDimensions> m_DstStrides; @@ -102,6 +151,12 @@ armnn::TensorInfo Permuted(const armnn::TensorInfo& info, const armnn::Permutati return outInfo; } +void Permute(const armnn::TensorShape& dstShape, const armnn::PermutationVector& mappings, + const void* src, void* dst, size_t dataTypeSize) +{ + PermuteLoop(dstShape, mappings).Unroll(src, dst, dataTypeSize); +} + template <typename T> void Permute(const armnn::TensorShape& dstShape, const armnn::PermutationVector& mappings, const T* src, T* dst) { @@ -117,5 +172,7 @@ template void Permute(const armnn::TensorShape& dstShape, const armnn::Permutati const uint8_t* src, uint8_t* dst); template void Permute(const armnn::TensorShape& dstShape, const armnn::PermutationVector& mappings, const int32_t* src, int32_t* dst); +template void Permute(const armnn::TensorShape& dstShape, const armnn::PermutationVector& mappings, + const bool* src, bool* dst); } // namespace armnnUtils diff --git a/src/armnnUtils/Permute.hpp b/src/armnnUtils/Permute.hpp index 700ddc72ce..4e4319822b 100644 --- a/src/armnnUtils/Permute.hpp +++ b/src/armnnUtils/Permute.hpp @@ -14,7 +14,10 @@ armnn::TensorShape Permuted(const armnn::TensorShape& srcShape, const armnn::Per armnn::TensorInfo Permuted(const armnn::TensorInfo& info, const armnn::PermutationVector& mappings); +void Permute(const armnn::TensorShape& dstShape, const armnn::PermutationVector& mappings, + const void* src, void* dst, size_t dataTypeSize); + template <typename T> void Permute(const armnn::TensorShape& dstShape, const armnn::PermutationVector& mappings, const T* src, T* dst); -} // namespace armnnUtils
\ No newline at end of file +} // namespace armnnUtils diff --git a/src/backends/aclCommon/ArmComputeTensorUtils.cpp b/src/backends/aclCommon/ArmComputeTensorUtils.cpp index a2d7d8c797..32af42f7e1 100644 --- a/src/backends/aclCommon/ArmComputeTensorUtils.cpp +++ b/src/backends/aclCommon/ArmComputeTensorUtils.cpp @@ -109,19 +109,6 @@ arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tenso return arm_compute::TensorInfo(aclTensorShape, 1, aclDataType, aclQuantizationInfo); } -arm_compute::DataLayout ConvertDataLayout(armnn::DataLayout dataLayout) -{ - switch(dataLayout) - { - case armnn::DataLayout::NHWC : return arm_compute::DataLayout::NHWC; - - case armnn::DataLayout::NCHW : return arm_compute::DataLayout::NCHW; - - default: throw InvalidArgumentException("Unknown armnn::DataLayout: [" + - std::to_string(static_cast<int>(dataLayout)) + "]"); - } -} - arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo, armnn::DataLayout dataLayout) { @@ -136,6 +123,19 @@ arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tenso return clTensorInfo; } +arm_compute::DataLayout ConvertDataLayout(armnn::DataLayout dataLayout) +{ + switch(dataLayout) + { + case armnn::DataLayout::NHWC : return arm_compute::DataLayout::NHWC; + + case armnn::DataLayout::NCHW : return arm_compute::DataLayout::NCHW; + + default: throw InvalidArgumentException("Unknown armnn::DataLayout: [" + + std::to_string(static_cast<int>(dataLayout)) + "]"); + } +} + arm_compute::PoolingLayerInfo BuildArmComputePoolingLayerInfo(const Pooling2dDescriptor& descriptor) { using arm_compute::PoolingType; diff --git a/src/backends/aclCommon/ArmComputeTensorUtils.hpp b/src/backends/aclCommon/ArmComputeTensorUtils.hpp index fbd850c687..fa455b746b 100644 --- a/src/backends/aclCommon/ArmComputeTensorUtils.hpp +++ b/src/backends/aclCommon/ArmComputeTensorUtils.hpp @@ -36,16 +36,16 @@ arm_compute::TensorShape BuildArmComputeTensorShape(const armnn::TensorShape& te /// armnn::ITensorInfo. arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo); -/// Utility function used to convert armnn::DataLayout to arm_compute::DataLayout -/// armnn::DataLayout. -arm_compute::DataLayout ConvertDataLayout(armnn::DataLayout dataLayout); - /// Utility function used to setup an arm_compute::ITensorInfo object whose dimensions are based on the given /// armnn::ITensorInfo. /// armnn::DataLayout. arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo, armnn::DataLayout dataLayout); +/// Utility function used to convert armnn::DataLayout to arm_compute::DataLayout +/// armnn::DataLayout. +arm_compute::DataLayout ConvertDataLayout(armnn::DataLayout dataLayout); + /// Utility function used to setup an arm_compute::PoolingLayerInfo object from an armnn::Pooling2dDescriptor. arm_compute::PoolingLayerInfo BuildArmComputePoolingLayerInfo(const Pooling2dDescriptor& descriptor); diff --git a/src/backends/backendsCommon/CMakeLists.txt b/src/backends/backendsCommon/CMakeLists.txt index f29563093c..b120f51184 100644 --- a/src/backends/backendsCommon/CMakeLists.txt +++ b/src/backends/backendsCommon/CMakeLists.txt @@ -27,6 +27,7 @@ list(APPEND armnnBackendsCommon_sources WorkloadFactory.hpp Workload.hpp WorkloadInfo.hpp + WorkloadUtils.cpp WorkloadUtils.hpp ) diff --git a/src/backends/backendsCommon/CpuTensorHandle.cpp b/src/backends/backendsCommon/CpuTensorHandle.cpp index fe0c634e7c..9dcd3f38df 100644 --- a/src/backends/backendsCommon/CpuTensorHandle.cpp +++ b/src/backends/backendsCommon/CpuTensorHandle.cpp @@ -18,7 +18,7 @@ ConstCpuTensorHandle::ConstCpuTensorHandle(const TensorInfo& tensorInfo) } template <> -const void* ConstCpuTensorHandle::GetConstTensor() const +const void* ConstCpuTensorHandle::GetConstTensor<void>() const { return m_Memory; } @@ -30,7 +30,7 @@ CpuTensorHandle::CpuTensorHandle(const TensorInfo& tensorInfo) } template <> -void* CpuTensorHandle::GetTensor() const +void* CpuTensorHandle::GetTensor<void>() const { return m_MutableMemory; } diff --git a/src/backends/backendsCommon/CpuTensorHandle.hpp b/src/backends/backendsCommon/CpuTensorHandle.hpp index ae13d6c439..b88a0d385b 100644 --- a/src/backends/backendsCommon/CpuTensorHandle.hpp +++ b/src/backends/backendsCommon/CpuTensorHandle.hpp @@ -72,6 +72,9 @@ private: const void* m_Memory; }; +template<> +const void* ConstCpuTensorHandle::GetConstTensor<void>() const; + // Abstract specialization of ConstCpuTensorHandle that allows write access to the same data. class CpuTensorHandle : public ConstCpuTensorHandle { @@ -99,6 +102,9 @@ private: void* m_MutableMemory; }; +template <> +void* CpuTensorHandle::GetTensor<void>() const; + // A CpuTensorHandle that owns the wrapped memory region. class ScopedCpuTensorHandle : public CpuTensorHandle { diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp index 8847b4efbf..1dac498c11 100644 --- a/src/backends/backendsCommon/WorkloadData.cpp +++ b/src/backends/backendsCommon/WorkloadData.cpp @@ -593,9 +593,10 @@ void DepthwiseConvolution2dQueueDescriptor::Validate(const WorkloadInfo& workloa const unsigned int channelIndex = (m_Parameters.m_DataLayout == DataLayout::NCHW) ? 1 : 3; - //inputChannels * channelMultiplier should be equal to outputChannels. + // Expected weight shape: [ M, I, H, W ] - This shape does NOT depend on the data layout + // inputChannels * channelMultiplier should be equal to outputChannels. const unsigned int numWeightChannelMultiplier = m_Weight->GetTensorInfo().GetShape()[0]; - const unsigned int numWeightInputChannels = m_Weight->GetTensorInfo().GetShape()[channelIndex]; + const unsigned int numWeightInputChannels = m_Weight->GetTensorInfo().GetShape()[1]; const unsigned int numWeightOutputChannels = workloadInfo.m_OutputTensorInfos[0].GetShape()[channelIndex]; if (numWeightChannelMultiplier * numWeightInputChannels != numWeightOutputChannels) { diff --git a/src/backends/backendsCommon/WorkloadUtils.cpp b/src/backends/backendsCommon/WorkloadUtils.cpp new file mode 100644 index 0000000000..fa387a7a0b --- /dev/null +++ b/src/backends/backendsCommon/WorkloadUtils.cpp @@ -0,0 +1,111 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "WorkloadUtils.hpp" + +namespace armnn +{ + +armnn::ConstTensor PermuteTensor(const ConstCpuTensorHandle* tensor, + const PermutationVector& permutationVector, + void* permuteBuffer) +{ + BOOST_ASSERT_MSG(tensor, "Invalid input tensor"); + BOOST_ASSERT_MSG(permuteBuffer, "Invalid permute buffer"); + + TensorInfo tensorInfo = tensor->GetTensorInfo(); + + if (permutationVector.GetSize() > 0) + { + tensorInfo = armnnUtils::Permuted(tensorInfo, permutationVector); + armnnUtils::Permute(tensorInfo.GetShape(), permutationVector, + tensor->GetConstTensor<void>(), permuteBuffer, + GetDataTypeSize(tensorInfo.GetDataType())); + } + else + { + ::memcpy(permuteBuffer, tensor->GetConstTensor<void>(), tensorInfo.GetNumBytes()); + } + + return ConstTensor(tensorInfo, permuteBuffer); +} + +void ReshapeWeightsForAcl(TensorInfo& weightInfo, DataLayout dataLayout) +{ + // Reshape the weights in-place + const TensorShape& weightShape = weightInfo.GetShape(); + switch (dataLayout) + { + case DataLayout::NHWC: + // The data layout is NHWC, reshape from [ H, W, I, M ] to [ 1, H, W, I * M ] + weightInfo.SetShape({ 1, + weightShape[0], + weightShape[1], + weightShape[2] * weightShape[3] }); + break; + case DataLayout::NCHW: + default: + // The data layout is NCHW, reshape from [ M, I, H, W ] to [ 1, I * M, H, W, ] + weightInfo.SetShape({ 1, + weightShape[0] * weightShape[1], + weightShape[2], + weightShape[3] }); + break; + } +} + +TensorInfo ConvertWeightTensorInfoFromArmnnToAcl(const TensorInfo& weightInfo, DataLayout dataLayout) +{ + // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either + // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library + + // 1. Permute the weights if necessary + // If the data layout is NCHW no permutation is necessary, as a reshape to [ 1, I * M, H, W ] can be better done + // starting from the current shape of [ M, I, H, W ] + TensorInfo weightPermutedInfo(weightInfo); + if (dataLayout == DataLayout::NHWC) + { + // The data layout is NHWC, then permute the weights from [ M, I, H, W ] to [ H, W, I, M ] + PermutationVector permutationVector{ 3, 2, 0, 1 }; + weightPermutedInfo = armnnUtils::Permuted(weightInfo, permutationVector); + } + + // 2. Reshape the weights + ReshapeWeightsForAcl(weightPermutedInfo, dataLayout); + + // 3. Return the permuted weight info + return weightPermutedInfo; +} + +armnn::ConstTensor ConvertWeightTensorFromArmnnToAcl(const ConstCpuTensorHandle* weightTensor, + DataLayout dataLayout, + void* permuteBuffer) +{ + BOOST_ASSERT_MSG(weightTensor, "Invalid input tensor"); + BOOST_ASSERT_MSG(permuteBuffer, "Invalid permute buffer"); + + // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either + // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library + + // 1. Permute the weights if necessary + // If the data layout is NCHW no permutation is necessary, as a reshape to [ 1, I * M, H, W ] can be better done + // starting from the current shape of [ M, I, H, W ] + // If no permutation is necessary, leave the permutation vector empty + PermutationVector permutationVector{}; + if (dataLayout == DataLayout::NHWC) + { + // The data layout is NHWC, then permute the weights from [ M, I, H, W ] to [ H, W, I, M ] + permutationVector = { 3, 2, 0, 1 }; + } + ConstTensor weightPermuted = PermuteTensor(weightTensor, permutationVector, permuteBuffer); + + // 2. Reshape the weights + ReshapeWeightsForAcl(weightPermuted.GetInfo(), dataLayout); + + // 3. Return both the tensor and the allocated storage to ensure that the data stays alive + return weightPermuted; +} + +} // namespace armnn diff --git a/src/backends/backendsCommon/WorkloadUtils.hpp b/src/backends/backendsCommon/WorkloadUtils.hpp index 2b07b2b0d2..a1a8d2a475 100644 --- a/src/backends/backendsCommon/WorkloadUtils.hpp +++ b/src/backends/backendsCommon/WorkloadUtils.hpp @@ -6,35 +6,42 @@ #pragma once #include "ITensorHandle.hpp" +#include "CpuTensorHandle.hpp" #include <armnn/Tensor.hpp> +#include <Permute.hpp> +#include <Profiling.hpp> +#include <Half.hpp> + #include <boost/cast.hpp> namespace armnn { namespace { + template<typename ArrayType, typename Arg> void AssignValues(unsigned int num, unsigned int& idx, const ArrayType& array, Arg& arg) { - if (idx >= num) - { - return; - } + if (idx >= num) + { + return; + } - arg = array[(num - 1) - idx]; - idx++; -}; + arg = array[(num - 1) - idx]; + idx++; +} template<typename T, typename ArrayType, typename ...Args> void AssignValues(unsigned int num, unsigned int idx, const ArrayType& array, T& assignee, Args& ... args) { - AssignValues(num, idx, array, assignee); + AssignValues(num, idx, array, assignee); - AssignValues(num, idx, array, args...); + AssignValues(num, idx, array, args...); } -} // namespace + +} // anonymous namespace template<typename CopyFunc> void CopyTensorContentsGeneric(const ITensorHandle* srcTensor, ITensorHandle* dstTensor, CopyFunc copy) @@ -142,4 +149,16 @@ void GatherTensorHandlePairs(const DescriptorType& descriptor, } } -} //namespace armnn
\ No newline at end of file +armnn::ConstTensor PermuteTensor(const ConstCpuTensorHandle* tensor, + const PermutationVector& permutationVector, + void* permuteBuffer); + +void ReshapeWeightsForAcl(TensorInfo& weightInfo, DataLayout dataLayout); + +TensorInfo ConvertWeightTensorInfoFromArmnnToAcl(const TensorInfo& weightInfo, DataLayout dataLayout); + +armnn::ConstTensor ConvertWeightTensorFromArmnnToAcl(const ConstCpuTensorHandle* weightTensor, + DataLayout dataLayout, + void* permuteBuffer); + +} //namespace armnn diff --git a/src/backends/backendsCommon/common.mk b/src/backends/backendsCommon/common.mk index a66b5c4581..4e79bfcd7e 100644 --- a/src/backends/backendsCommon/common.mk +++ b/src/backends/backendsCommon/common.mk @@ -14,7 +14,8 @@ COMMON_SOURCES := \ MemCopyWorkload.cpp \ OutputHandler.cpp \ WorkloadData.cpp \ - WorkloadFactory.cpp + WorkloadFactory.cpp \ + WorkloadUtils.cpp # COMMON_TEST_SOURCES contains the list of files to be included # in the Android unit test build (armnn-tests) and it is picked diff --git a/src/backends/backendsCommon/test/Conv2dTestImpl.hpp b/src/backends/backendsCommon/test/Conv2dTestImpl.hpp index 37fa0f63d6..2ff66b08d5 100755 --- a/src/backends/backendsCommon/test/Conv2dTestImpl.hpp +++ b/src/backends/backendsCommon/test/Conv2dTestImpl.hpp @@ -327,7 +327,7 @@ LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestImpl( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const boost::multi_array<T, 4>& input, - const boost::multi_array<T, 4>& originalKernel, + const boost::multi_array<T, 4>& kernel, const boost::multi_array<B, 1>& bias, const boost::multi_array<T, 4>& outputExpected, float qScale, @@ -344,10 +344,10 @@ LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestImpl( unsigned int inputChannels = boost::numeric_cast<unsigned int>(input.shape()[1]); unsigned int inputHeight = boost::numeric_cast<unsigned int>(input.shape()[2]); unsigned int inputWidth = boost::numeric_cast<unsigned int>(input.shape()[3]); - unsigned int kernelChanMul = boost::numeric_cast<unsigned int>(originalKernel.shape()[0]); - unsigned int kernelChannels = boost::numeric_cast<unsigned int>(originalKernel.shape()[1]); - unsigned int kernelHeight = boost::numeric_cast<unsigned int>(originalKernel.shape()[2]); - unsigned int kernelWidth = boost::numeric_cast<unsigned int>(originalKernel.shape()[3]); + unsigned int kernelChanMul = boost::numeric_cast<unsigned int>(kernel.shape()[0]); + unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[1]); + unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[2]); + unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[3]); unsigned int outputNum = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]); unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]); unsigned int outputHeight = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]); @@ -362,8 +362,7 @@ LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestImpl( armnnUtils::GetTensorInfo<T>(inputNum, inputChannels, inputHeight, inputWidth, layout); armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo<T>(outputNum, outputChannels, outputHeight, outputWidth, layout); - armnn::TensorInfo kernelDesc = - armnnUtils::GetTensorInfo<T>(kernelChanMul, kernelChannels, kernelHeight, kernelWidth, layout); + armnn::TensorInfo kernelDesc({kernelChanMul, kernelChannels, kernelHeight, kernelWidth}, armnn::GetDataType<T>()); armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, armnn::GetDataType<B>()); // Set quantization parameters if the requested type is a quantized type. @@ -423,13 +422,6 @@ LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestImpl( armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc); - // Permute the kernel if necessary - boost::multi_array<T, 4> kernel = boost::multi_array<T, 4>(originalKernel); - if (layout == armnn::DataLayout::NHWC) - { - armnnUtils::Permute(kernelDesc.GetShape(), NCHWToNHWC, originalKernel.data(), kernel.data()); - } - AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]); armnn::ScopedCpuTensorHandle biasTensor(biasDesc); @@ -484,6 +476,7 @@ LayerTestResult<T, 4> DepthwiseConvolution2dDepthMul1TestImpl( unsigned int kernelHeight = 3; unsigned int kernelWidth = 3; unsigned int kernelChannels = inputChannels; + unsigned int kernelDepthMultiplier = 1; unsigned int outputHeight = 1; unsigned int outputWidth = 1; @@ -494,7 +487,8 @@ LayerTestResult<T, 4> DepthwiseConvolution2dDepthMul1TestImpl( armnnUtils::GetTensorInfo<T>(inputNum, inputChannels, inputHeight, inputWidth, layout); armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo<T>(outputNum, outputChannels, outputHeight, outputWidth, layout); - armnn::TensorInfo kernelDesc = armnnUtils::GetTensorInfo<T>(1, outputChannels, kernelHeight, kernelWidth, layout); + armnn::TensorInfo kernelDesc({kernelDepthMultiplier, kernelChannels, kernelHeight, kernelWidth}, + armnn::GetDataType<T>()); armnn::TensorInfo biasDesc({ outputChannels }, armnn::GetDataType<B>()); // Set quantization parameters if the requested type is a quantized type. @@ -543,12 +537,6 @@ LayerTestResult<T, 4> DepthwiseConvolution2dDepthMul1TestImpl( 0.f, 0.f, 0.f, -1.f, 0.f, -1.f, })); - if (layout == armnn::DataLayout::NHWC) - { - std::vector<T> tmp(kernelData.size()); - armnnUtils::Permute(kernelDesc.GetShape(), NCHWToNHWC, kernelData.data(), tmp.data()); - kernelData = tmp; - } auto kernel = MakeTensor<T, 4>(kernelDesc, kernelData); // Manually calculated. @@ -642,8 +630,8 @@ LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl( inputBatchSize, inputChannels, inputHeight, inputWidth, layout); armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo<T>( outputBatchSize, outputChannels, outputHeight, outputWidth, layout); - armnn::TensorInfo kernelDesc = armnnUtils::GetTensorInfo<T>( - depthMultiplier, inputChannels, kernelHeight, kernelWidth, layout); + armnn::TensorInfo kernelDesc({depthMultiplier, inputChannels, kernelHeight, kernelWidth}, + armnn::GetDataType<T>()); armnn::TensorInfo biasDesc({outputChannels}, armnn::GetDataType<B>()); // Set quantization parameters if the requested type is a quantized type. @@ -692,7 +680,7 @@ LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl( {0, 2, 1, -1})); auto bias = MakeTensor<B, 1>(biasDesc, biasV); - std::vector<T> originalKernelData = std::vector<T>( + std::vector<T> kernelData = std::vector<T>( QuantizedVector<T>(kernelDesc.GetQuantizationScale(), kernelDesc.GetQuantizationOffset(), { 1, 1, 1, 1, -1, 1, @@ -717,12 +705,8 @@ LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl( 0, 1, 0, 0, 0, 0, 0, 0, 0 + })); - std::vector<T> kernelData = originalKernelData; - if (layout == armnn::DataLayout::NHWC) - { - armnnUtils::Permute(kernelDesc.GetShape(), NCHWToNHWC, originalKernelData.data(), kernelData.data()); - } auto kernel = MakeTensor<T, 4>(kernelDesc, kernelData); // Manually calculated. @@ -840,9 +824,9 @@ LayerTestResult<T, 4> DepthwiseConvolution2dNhwcTestImpl( unsigned int inputWidth = boost::numeric_cast<unsigned int>(input.shape()[2]); unsigned int kernelChanMul = boost::numeric_cast<unsigned int>(kernel.shape()[0]); - unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[3]); - unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[1]); - unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[2]); + unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[1]); + unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[2]); + unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[3]); unsigned int outputNum = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]); unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]); @@ -853,7 +837,7 @@ LayerTestResult<T, 4> DepthwiseConvolution2dNhwcTestImpl( armnn::TensorInfo inputTensorInfo({inputNum, inputHeight, inputWidth, inputChannels}, armnn::GetDataType<T>()); armnn::TensorInfo outputTensorInfo({outputNum, outputHeight, outputWidth, outputChannels}, armnn::GetDataType<T>()); - armnn::TensorInfo kernelDesc({kernelChanMul, kernelHeight, kernelWidth, kernelChannels}, armnn::GetDataType<T>()); + armnn::TensorInfo kernelDesc({kernelChanMul, kernelChannels, kernelHeight, kernelWidth}, armnn::GetDataType<T>()); armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, armnn::GetDataType<B>()); // Set quantization parameters if the requested type is a quantized type. @@ -1068,10 +1052,10 @@ LayerTestResult<T,4> CompareConvolution2dTestImpl( armnn::TensorInfo kernelDesc; armnn::TensorInfo biasDesc; - unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth}; - unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth}; - unsigned int kernelShape[] = {outputChannels, inputChannels, kernelHeight, kernelWidth}; - unsigned int biasShape[] = {outputChannels}; + unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth}; + unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth}; + unsigned int kernelShape[] = {outputChannels, inputChannels, kernelHeight, kernelWidth}; + unsigned int biasShape[] = {outputChannels}; inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::GetDataType<T>()); outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::GetDataType<T>()); @@ -1171,19 +1155,17 @@ LayerTestResult<T, 4> CompareDepthwiseConvolution2dTestImpl( std::vector<unsigned int> inputShape; std::vector<unsigned int> outputShape; - std::vector<unsigned int> kernelShape; - std::vector<unsigned int> biasShape= { outputChannels }; + std::vector<unsigned int> kernelShape{ channelMultiplier, inputChannels, kernelHeight, kernelWidth }; + std::vector<unsigned int> biasShape{ outputChannels }; switch (layout.GetDataLayout()) { case armnn::DataLayout::NCHW: inputShape = { inputNum, inputChannels, inputHeight, inputWidth }; outputShape = { outputNum, outputChannels, outputHeight, outputWidth }; - kernelShape = { channelMultiplier, inputChannels, kernelHeight, kernelWidth }; break; case armnn::DataLayout ::NHWC: inputShape = { inputNum, inputHeight, inputWidth, inputChannels }; outputShape = { outputNum, outputHeight, outputWidth, outputChannels }; - kernelShape = { channelMultiplier, kernelHeight, kernelWidth, inputChannels }; break; default: throw armnn::InvalidArgumentException("unknown data layout [" diff --git a/src/backends/backendsCommon/test/LayerTests.cpp b/src/backends/backendsCommon/test/LayerTests.cpp index ddf0d0b587..819b9d6e37 100755 --- a/src/backends/backendsCommon/test/LayerTests.cpp +++ b/src/backends/backendsCommon/test/LayerTests.cpp @@ -661,28 +661,18 @@ LayerTestResult<T, 4> DepthwiseConvolution2dNhwcTestCommon( 24, 49 }))); - armnn::TensorInfo kernelTensorInfo({ 1, 4, 4, 2}, armnn::GetDataType<T>()); + armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, armnn::GetDataType<T>()); auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>( QuantizedVector<T>(kernelTensorInfo.GetQuantizationScale(), kernelTensorInfo.GetQuantizationOffset(), { - 32, 16, - 31, 15, - 30, 14, - 29, 13, - - 28, 12, - 27, 11, - 26, 10, - 25, 9, - - 24, 8, - 23, 7, - 22, 6, - 21, 5, - - 20, 4, - 19, 3, - 18, 2, - 17, 1 + 32, 31, 30, 29, + 28, 27, 26, 25, + 24, 23, 22, 21, + 20, 19, 18, 17, + + 16, 15, 14, 13, + 12, 11, 10, 9, + 8, 7, 6, 5, + 4, 3, 2, 1 }))); armnn::TensorInfo outputTensorInfo({ 1, 5, 5, 2}, armnn::GetDataType<T>()); diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp index 9cadbf09ac..1745b8297a 100644 --- a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp +++ b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp @@ -12,6 +12,7 @@ #include <aclCommon/ArmComputeTensorUtils.hpp> #include <cl/ClTensorHandle.hpp> #include <backendsCommon/CpuTensorHandle.hpp> +#include <backendsCommon/WorkloadUtils.hpp> #include <arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h> @@ -21,14 +22,23 @@ namespace armnn using namespace armcomputetensorutils; arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const DepthwiseConvolution2dDescriptor& descriptor, - const TensorInfo& weights, - const Optional<TensorInfo>& biases) + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const Optional<TensorInfo>& biases) { - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); - const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout); + + // ArmNN's weight format is [ M, I, H, W ] + const unsigned int aclDepthMultiplier = weights.GetShape()[0]; + + // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either + // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library + TensorInfo weightsPermuted = ConvertWeightTensorInfoFromArmnnToAcl(weights, descriptor.m_DataLayout); + + // Convert the weights into the compute library format + const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout); arm_compute::TensorInfo aclBiasesInfo; arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr; @@ -42,7 +52,6 @@ arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& inp } const arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor); - const unsigned int aclDepthMultiplier = weights.GetShape()[0]; return arm_compute::CLDepthwiseConvolutionLayer::validate(&aclInputInfo, &aclWeightsInfo, @@ -57,10 +66,18 @@ ClDepthwiseConvolutionWorkload::ClDepthwiseConvolutionWorkload( const WorkloadInfo& info) : BaseWorkload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info) { - auto& weightInfo = m_Data.m_Weight->GetTensorInfo(); + // Allocate a buffer for the swizzling of the weight tensor + std::unique_ptr<unsigned char[]> permuteBuffer(new unsigned char[m_Data.m_Weight->GetTensorInfo().GetNumBytes()]); + + // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either + // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library + ConstTensor weightPermuted = ConvertWeightTensorFromArmnnToAcl(m_Data.m_Weight, + m_Data.m_Parameters.m_DataLayout, + permuteBuffer.get()); + // Convert the weights into the compute library format m_KernelTensor = std::make_unique<arm_compute::CLTensor>(); - BuildArmComputeTensor(*m_KernelTensor, weightInfo, m_Data.m_Parameters.m_DataLayout); + BuildArmComputeTensor(*m_KernelTensor, weightPermuted.GetInfo(), m_Data.m_Parameters.m_DataLayout); if (m_Data.m_Parameters.m_BiasEnabled) { @@ -86,13 +103,14 @@ ClDepthwiseConvolutionWorkload::ClDepthwiseConvolutionWorkload( input.info()->set_data_layout(aclDataLayout); output.info()->set_data_layout(aclDataLayout); - const unsigned int depthMultiplier = weightInfo.GetShape()[0]; + // ArmNN's weight format is [ M, I, H, W ] + auto& weightInfo = m_Data.m_Weight->GetTensorInfo(); - const unsigned int widthIndex = (m_Data.m_Parameters.m_DataLayout == DataLayout::NCHW) ? 3 : 2; - const unsigned int heightIndex = (m_Data.m_Parameters.m_DataLayout == DataLayout::NCHW) ? 2 : 1; + // Get the depth multiplier + const unsigned int depthMultiplier = weightInfo.GetShape()[0]; - //Check for optimisation opportunities. - bool use3x3Optimisation = (weightInfo.GetShape()[widthIndex] == 3) && (weightInfo.GetShape()[heightIndex] == 3); + // Check for optimisation opportunities. + bool use3x3Optimisation = (weightInfo.GetShape()[2] == 3) && (weightInfo.GetShape()[3] == 3); if (use3x3Optimisation) { m_DepthwiseConvolutionLayer = std::make_unique<arm_compute::CLDepthwiseConvolutionLayer3x3>(); @@ -118,7 +136,8 @@ ClDepthwiseConvolutionWorkload::ClDepthwiseConvolutionWorkload( BOOST_ASSERT(m_DepthwiseConvolutionLayer); - InitializeArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight); + ScopedCpuTensorHandle weightsPermutedHandle(weightPermuted); + InitializeArmComputeClTensorData(*m_KernelTensor, &weightsPermutedHandle); if (m_BiasTensor) { diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp index 6cad12cba8..be26359662 100644 --- a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp +++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp @@ -8,10 +8,7 @@ #include <aclCommon/ArmComputeTensorUtils.hpp> #include <neon/NeonLayerSupport.hpp> #include <backendsCommon/CpuTensorHandle.hpp> - -#include <DataLayoutIndexed.hpp> - -using namespace armnnUtils; +#include <backendsCommon/WorkloadUtils.hpp> namespace armnn { @@ -19,17 +16,23 @@ namespace armnn using namespace armcomputetensorutils; arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const DepthwiseConvolution2dDescriptor& descriptor, - const TensorInfo& weights, - const Optional<TensorInfo>& biases) + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const Optional<TensorInfo>& biases) { - const arm_compute::TensorInfo aclInputInfo = - BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); - const arm_compute::TensorInfo aclOutputInfo = - BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); - const arm_compute::TensorInfo aclWeightsInfo = - BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout); + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); + + // ArmNN's weight format is [ M, I, H, W ] + const unsigned int aclDepthMultiplier = weights.GetShape()[0]; + + // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either + // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library + TensorInfo weightsPermuted = ConvertWeightTensorInfoFromArmnnToAcl(weights, descriptor.m_DataLayout); + + // Convert the weights into the compute library format + const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout); arm_compute::TensorInfo aclBiasesInfo; arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr; @@ -42,9 +45,7 @@ arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& i optionalAclBiasesInfo = &aclBiasesInfo; } - const arm_compute::PadStrideInfo aclPadStrideInfo = - BuildArmComputePadStrideInfo(descriptor); - const unsigned int aclDepthMultiplier = weights.GetShape()[0]; + const arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor); return arm_compute::NEDepthwiseConvolutionLayer::validate(&aclInputInfo, &aclWeightsInfo, @@ -59,14 +60,21 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload( const WorkloadInfo& info) : BaseWorkload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info) { - const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); + // ArmNN's weight format is [ M, I, H, W ] + auto& weightInfo = m_Data.m_Weight->GetTensorInfo(); - m_KernelTensor = std::make_unique<arm_compute::Tensor>(); - BuildArmComputeTensor(*m_KernelTensor, weightInfo, m_Data.m_Parameters.m_DataLayout); + // Allocate a buffer for the swizzling of the weight tensor + std::unique_ptr<unsigned char[]> permuteBuffer(new unsigned char[m_Data.m_Weight->GetTensorInfo().GetNumBytes()]); - INeonTensorHandle* inputTensorHandle = static_cast<INeonTensorHandle*>(m_Data.m_Inputs[0]); - INeonTensorHandle* outputTensorHandle = static_cast<INeonTensorHandle*>(m_Data.m_Outputs[0]); - DataLayoutIndexed dataLayoutIndex(m_Data.m_Parameters.m_DataLayout); + // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either + // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library + ConstTensor weightPermuted = ConvertWeightTensorFromArmnnToAcl(m_Data.m_Weight, + m_Data.m_Parameters.m_DataLayout, + permuteBuffer.get()); + + // Convert the weights into the compute library format + m_KernelTensor = std::make_unique<arm_compute::Tensor>(); + BuildArmComputeTensor(*m_KernelTensor, weightPermuted.GetInfo(), m_Data.m_Parameters.m_DataLayout); if (m_Data.m_Parameters.m_BiasEnabled) { @@ -84,6 +92,9 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload( m_Data.ValidateInputsOutputs("NeonDepthwiseConvolutionWorkload", 1, 1); + INeonTensorHandle* inputTensorHandle = static_cast<INeonTensorHandle*>(m_Data.m_Inputs[0]); + INeonTensorHandle* outputTensorHandle = static_cast<INeonTensorHandle*>(m_Data.m_Outputs[0]); + arm_compute::ITensor& input = inputTensorHandle->GetTensor(); arm_compute::ITensor& output = outputTensorHandle->GetTensor(); @@ -91,9 +102,11 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload( input.info()->set_data_layout(aclDataLayout); output.info()->set_data_layout(aclDataLayout); - bool use3x3Optimisation = weightInfo.GetShape()[dataLayoutIndex.GetWidthIndex()] == 3 && - weightInfo.GetShape()[dataLayoutIndex.GetHeightIndex()] == 3; + // Get the depth multiplier + const unsigned int depthMultiplier = weightInfo.GetShape()[0]; + // Check for optimisation opportunities. + bool use3x3Optimisation = (weightInfo.GetShape()[2] == 3) && (weightInfo.GetShape()[3] == 3); if (use3x3Optimisation) { m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer3x3>(); @@ -102,7 +115,8 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload( m_KernelTensor.get(), m_BiasTensor.get(), &output, - padStrideInfo); + padStrideInfo, + depthMultiplier); } else { @@ -112,12 +126,14 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload( m_KernelTensor.get(), m_BiasTensor.get(), &output, - padStrideInfo); + padStrideInfo, + depthMultiplier); } BOOST_ASSERT(m_pDepthwiseConvolutionLayer); - InitializeArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight); + ScopedCpuTensorHandle weightsPermutedHandle(weightPermuted); + InitializeArmComputeTensorData(*m_KernelTensor, &weightsPermutedHandle); if (m_Data.m_Parameters.m_BiasEnabled) { diff --git a/src/backends/reference/workloads/ConvImpl.hpp b/src/backends/reference/workloads/ConvImpl.hpp index 704bc368d2..5c07f57ec0 100644 --- a/src/backends/reference/workloads/ConvImpl.hpp +++ b/src/backends/reference/workloads/ConvImpl.hpp @@ -57,7 +57,6 @@ static void ConvImpl(ConvData data, float filterScale, int32_t filterOffset, const BiasType* biasData, - InputType* outputData, float outputScale, int32_t outputOffset, const TensorInfo& filterInfo, @@ -68,10 +67,10 @@ static void ConvImpl(ConvData data, throw InvalidArgumentException("Bias is enabled but the bias data is invalid"); } - const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]); - const TensorInfo& outputInfo0 = GetTensorInfo(data.m_Outputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(data.m_Inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[0]); - TensorBufferArrayView<InputType> output(outputInfo0.GetShape(), + TensorBufferArrayView<InputType> output(outputInfo.GetShape(), GetOutputTensorData<InputType>(0, data), data.m_Parameters.m_DataLayout); @@ -81,18 +80,18 @@ static void ConvImpl(ConvData data, const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex(); const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex(); - unsigned int depthMult = depthwise ? filterInfo.GetShape()[0] : 1; - unsigned int channelsInput = filterInfo.GetShape()[channelsIndex]; - unsigned int channelsOutput = depthwise ? channelsInput * depthMult : filterInfo.GetShape()[0]; + unsigned int depthMultiplier = depthwise ? filterInfo.GetShape()[0] : 1; + unsigned int inputChannels = depthwise ? filterInfo.GetShape()[1] : filterInfo.GetShape()[channelsIndex]; + unsigned int outputChannels = depthwise ? inputChannels * depthMultiplier : filterInfo.GetShape()[0]; - unsigned int batchSize = outputInfo0.GetShape()[0]; - unsigned int heightOutput = outputInfo0.GetShape()[heightIndex]; - unsigned int widthOutput = outputInfo0.GetShape()[widthIndex]; - unsigned int heightInput = inputInfo0.GetShape()[heightIndex]; - unsigned int widthInput = inputInfo0.GetShape()[widthIndex]; + unsigned int batchSize = outputInfo.GetShape()[0]; + unsigned int outputHeight = outputInfo.GetShape()[heightIndex]; + unsigned int outputWidth = outputInfo.GetShape()[widthIndex]; + unsigned int inputHeight = inputInfo.GetShape()[heightIndex]; + unsigned int inputWidth = inputInfo.GetShape()[widthIndex]; - unsigned int heightFilter = filterInfo.GetShape()[heightIndex]; - unsigned int widthFilter = filterInfo.GetShape()[widthIndex]; + unsigned int filterHeight = depthwise ? filterInfo.GetShape()[2] : filterInfo.GetShape()[heightIndex]; + unsigned int filterWidth = depthwise ? filterInfo.GetShape()[3] : filterInfo.GetShape()[widthIndex]; unsigned int paddingTop = data.m_Parameters.m_PadTop; unsigned int paddingLeft = data.m_Parameters.m_PadLeft; @@ -102,68 +101,56 @@ static void ConvImpl(ConvData data, // The world's least efficient convolution. for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++) { - for (unsigned int cOutput = 0; cOutput < channelsOutput; cOutput++) + for (unsigned int cOutput = 0; cOutput < outputChannels; cOutput++) { - for (unsigned int yOutput = 0; yOutput < heightOutput; yOutput++) + for (unsigned int yOutput = 0; yOutput < outputHeight; yOutput++) { - for (unsigned int xOutput = 0; xOutput < widthOutput; xOutput++) + for (unsigned int xOutput = 0; xOutput < outputWidth; xOutput++) { // This loop goes over each output element. AccumulatorType sum = AccumulatorType(); // For depthwise, each output channel corresponds to exactly one input channel. // For normal, must loop over each input channel. - for (unsigned int cInput = 0; cInput < (depthwise ? 1 : channelsInput); cInput++) + for (unsigned int cInput = 0; cInput < (depthwise ? 1 : inputChannels); cInput++) { unsigned int depthwiseMultiplierIdx = 0; if (depthwise) { - cInput = cOutput / depthMult; - depthwiseMultiplierIdx = cOutput % depthMult; + cInput = cOutput / depthMultiplier; + depthwiseMultiplierIdx = cOutput % depthMultiplier; } - for (unsigned int yFilter = 0; yFilter < heightFilter; yFilter++) + for (unsigned int yFilter = 0; yFilter < filterHeight; yFilter++) { - for (unsigned int xFilter = 0; xFilter < widthFilter; xFilter++) + for (unsigned int xFilter = 0; xFilter < filterWidth; xFilter++) { // This loop goes over each input element for each output element. - unsigned int filterIndex; + unsigned int filterIndex = 0; // Since dimensionality of kernel depends on depthwiseness, so does index. if (depthwise) { - if (data.m_Parameters.m_DataLayout == DataLayout::NHWC) - { - filterIndex = depthwiseMultiplierIdx * heightFilter * widthFilter - * channelsInput + - yFilter * widthFilter * channelsInput + - xFilter * channelsInput + - cInput; - } - else - { - filterIndex = depthwiseMultiplierIdx * widthFilter * heightFilter - * channelsInput + - cInput * widthFilter * heightFilter + - yFilter * widthFilter + - xFilter; - } + filterIndex = depthwiseMultiplierIdx * filterWidth * filterHeight * inputChannels + + cInput * filterWidth * filterHeight + + yFilter * filterWidth + + xFilter; } else { if (data.m_Parameters.m_DataLayout == DataLayout::NHWC) { - filterIndex = cOutput * heightFilter * widthFilter * channelsInput + - yFilter * widthFilter * channelsInput + - xFilter * channelsInput + + filterIndex = cOutput * filterHeight * filterWidth * inputChannels + + yFilter * filterWidth * inputChannels + + xFilter * inputChannels + cInput; } else { - filterIndex = cOutput * widthFilter * heightFilter * channelsInput + - cInput * widthFilter * heightFilter + - yFilter * widthFilter + + filterIndex = cOutput * filterWidth * filterHeight * inputChannels + + cInput * filterWidth * filterHeight + + yFilter * filterWidth + xFilter; } } @@ -177,8 +164,8 @@ static void ConvImpl(ConvData data, AccumulatorType inputValue; // Check if we're in the padding. - if (yInput < paddingTop || yInput >= heightInput + paddingTop || - xInput < paddingLeft || xInput >= widthInput + paddingLeft ) + if (yInput < paddingTop || yInput >= inputHeight + paddingTop || + xInput < paddingLeft || xInput >= inputWidth + paddingLeft ) { inputValue = AccumulatorType(); } @@ -188,17 +175,17 @@ static void ConvImpl(ConvData data, if (data.m_Parameters.m_DataLayout == DataLayout::NHWC) { - inputIndex = batchIdx * heightInput * widthInput * channelsInput + - (yInput - paddingTop) * widthInput * channelsInput + - (xInput - paddingLeft) * channelsInput + + inputIndex = batchIdx * inputHeight * inputWidth * inputChannels + + (yInput - paddingTop) * inputWidth * inputChannels + + (xInput - paddingLeft) * inputChannels + cInput; } else { - inputIndex = batchIdx * widthInput * heightInput * channelsInput + - widthInput * heightInput * cInput + - widthInput * (yInput - paddingTop) + + inputIndex = batchIdx * inputWidth * inputHeight * inputChannels + + inputWidth * inputHeight * cInput + + inputWidth * (yInput - paddingTop) + xInput - paddingLeft; } diff --git a/src/backends/reference/workloads/RefConvolution2dFloat32Workload.cpp b/src/backends/reference/workloads/RefConvolution2dFloat32Workload.cpp index 20905646d7..7b298df967 100644 --- a/src/backends/reference/workloads/RefConvolution2dFloat32Workload.cpp +++ b/src/backends/reference/workloads/RefConvolution2dFloat32Workload.cpp @@ -23,15 +23,13 @@ void RefConvolution2dFloat32Workload::Execute() const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvolution2dFloat32Workload_Execute"); - float* outputData = GetOutputTensorDataFloat(0, m_Data); const float* inputData = GetInputTensorDataFloat(0, m_Data); - const float* weightData = m_Weight->template GetConstTensor<float>(); - const float* biasData = m_Data.m_Parameters.m_BiasEnabled ? - m_Bias->template GetConstTensor<float>() : nullptr; + const float* filterData = m_Weight->template GetConstTensor<float>(); + const float* biasData = m_Data.m_Parameters.m_BiasEnabled ? m_Bias->template GetConstTensor<float>() : nullptr; const TensorInfo& filterInfo = m_Weight->GetTensorInfo(); ConvImpl<armnn::Convolution2dQueueDescriptor, float, float, float>( - m_Data, inputData, 0.0f, 0, weightData, 0.0f, 0, biasData, outputData, 0.0f, 0, filterInfo); + m_Data, inputData, 0.0f, 0, filterData, 0.0f, 0, biasData, 0.0f, 0, filterInfo); } } //namespace armnn diff --git a/src/backends/reference/workloads/RefConvolution2dUint8Workload.cpp b/src/backends/reference/workloads/RefConvolution2dUint8Workload.cpp index 881e9bf6b0..af2c7ad0d6 100644 --- a/src/backends/reference/workloads/RefConvolution2dUint8Workload.cpp +++ b/src/backends/reference/workloads/RefConvolution2dUint8Workload.cpp @@ -27,10 +27,7 @@ void RefConvolution2dUint8Workload::Execute() const const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); const uint8_t* weightsData = m_Weight->template GetConstTensor<uint8_t>(); const TensorInfo& weightsInfo = GetTensorInfo(m_Weight.get()); - const int32_t* biasData = m_Data.m_Parameters.m_BiasEnabled ? - m_Bias->template GetConstTensor<int32_t>() : - nullptr; - uint8_t* outputData = GetOutputTensorDataU8(0, m_Data); + const int32_t* biasData = m_Data.m_Parameters.m_BiasEnabled ? m_Bias->template GetConstTensor<int32_t>() : nullptr; const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); const TensorInfo& filterInfo = m_Weight->GetTensorInfo(); @@ -39,7 +36,7 @@ void RefConvolution2dUint8Workload::Execute() const inputData, inputInfo.GetQuantizationScale(), inputInfo.GetQuantizationOffset(), weightsData, weightsInfo.GetQuantizationScale(), weightsInfo.GetQuantizationOffset(), biasData, - outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), filterInfo); + outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), filterInfo); } } //namespace armnn diff --git a/src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.cpp b/src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.cpp index e89013b9bd..756e958753 100644 --- a/src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.cpp +++ b/src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.cpp @@ -23,15 +23,13 @@ void RefDepthwiseConvolution2dFloat32Workload::Execute() const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDepthwiseConvolution2dFloat32Workload_Execute"); - float* outputData = GetOutputTensorDataFloat(0, m_Data); const float* inputData = GetInputTensorDataFloat(0, m_Data); const float* weightData = m_Weight->template GetConstTensor<float>(); - const float* biasData = m_Data.m_Parameters.m_BiasEnabled ? - m_Bias->template GetConstTensor<float>() : nullptr; + const float* biasData = m_Data.m_Parameters.m_BiasEnabled ? m_Bias->template GetConstTensor<float>() : nullptr; const TensorInfo& filterInfo = m_Weight->GetTensorInfo(); ConvImpl<armnn::DepthwiseConvolution2dQueueDescriptor, float, float, float> - (m_Data, inputData, 0.0f, 0, weightData, 0.0f, 0, biasData, outputData, 0.0f, 0, filterInfo, true); + (m_Data, inputData, 0.0f, 0, weightData, 0.0f, 0, biasData, 0.0f, 0, filterInfo, true); } } //namespace armnn diff --git a/src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.cpp b/src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.cpp index e8e501d6ae..629b729ea6 100644 --- a/src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.cpp +++ b/src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.cpp @@ -28,10 +28,7 @@ void RefDepthwiseConvolution2dUint8Workload::Execute() const const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); const uint8_t* weightsData = m_Weight->template GetConstTensor<uint8_t>(); const TensorInfo& weightsInfo = GetTensorInfo(m_Weight.get()); - const int32_t* biasData = m_Data.m_Parameters.m_BiasEnabled ? - m_Bias->template GetConstTensor<int32_t>() : - nullptr; - uint8_t* outputData = GetOutputTensorDataU8(0, m_Data); + const int32_t* biasData = m_Data.m_Parameters.m_BiasEnabled ? m_Bias->template GetConstTensor<int32_t>() : nullptr; const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); const TensorInfo& filterInfo = m_Weight->GetTensorInfo(); @@ -40,7 +37,7 @@ void RefDepthwiseConvolution2dUint8Workload::Execute() const inputData, inputInfo.GetQuantizationScale(), inputInfo.GetQuantizationOffset(), weightsData, weightsInfo.GetQuantizationScale(), weightsInfo.GetQuantizationOffset(), biasData, - outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), filterInfo, true); + outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), filterInfo, true); } } //namespace armnn |