aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatteo Martincigh <matteo.martincigh@arm.com>2019-06-06 15:46:22 +0100
committerTeresa Charlin <teresa.charlinreyes@arm.com>2019-06-07 07:54:51 +0000
commitf2aaab3a06024b5d5c538cc42799fb2c91b4ca2b (patch)
treedd83c6927fbc53ab2b235ea85de6ddb3a893f92d
parent99a663140294afd2a4ea91ccc61b7266f735b46a (diff)
downloadarmnn-f2aaab3a06024b5d5c538cc42799fb2c91b4ca2b.tar.gz
IVGCVSW-3223 Fix ref convolution performance regression
* Do not use DataLayoutIndexed::GetIndex for weights and inputs, as it causes a large regression in performance * It turned out that the calculation of the indexes for the weights and inputs was the way it was because of an optimization done many months ago * Reverted the relevant hunks and added some comments so we won't make the same mistake again in the future * Made the GetIndex function inline to speed up other usages Change-Id: I343b2ef0446993086f58b9dea1f0de0ba2d92216 Signed-off-by: Matteo Martincigh <matteo.martincigh@arm.com>
-rw-r--r--src/armnnUtils/DataLayoutIndexed.cpp37
-rw-r--r--src/armnnUtils/DataLayoutIndexed.hpp39
-rw-r--r--src/backends/reference/workloads/ConvImpl.cpp44
3 files changed, 70 insertions, 50 deletions
diff --git a/src/armnnUtils/DataLayoutIndexed.cpp b/src/armnnUtils/DataLayoutIndexed.cpp
index b02f07ec85..02f1e816ac 100644
--- a/src/armnnUtils/DataLayoutIndexed.cpp
+++ b/src/armnnUtils/DataLayoutIndexed.cpp
@@ -4,9 +4,6 @@
//
#include "DataLayoutIndexed.hpp"
-
-#include <boost/assert.hpp>
-
using namespace armnn;
namespace armnnUtils
@@ -33,40 +30,6 @@ DataLayoutIndexed::DataLayoutIndexed(armnn::DataLayout dataLayout)
}
}
-unsigned int DataLayoutIndexed::GetIndex(const TensorShape& shape,
- unsigned int batchIndex, unsigned int channelIndex,
- unsigned int heightIndex, unsigned int widthIndex) const
-{
- BOOST_ASSERT( batchIndex < shape[0] || ( shape[0] == 0 && batchIndex == 0 ) );
- BOOST_ASSERT( channelIndex < shape[m_ChannelsIndex] ||
- ( shape[m_ChannelsIndex] == 0 && channelIndex == 0) );
- BOOST_ASSERT( heightIndex < shape[m_HeightIndex] ||
- ( shape[m_HeightIndex] == 0 && heightIndex == 0) );
- BOOST_ASSERT( widthIndex < shape[m_WidthIndex] ||
- ( shape[m_WidthIndex] == 0 && widthIndex == 0) );
-
- // Offset the given indices appropriately depending on the data layout
- switch (m_DataLayout)
- {
- case DataLayout::NHWC:
- batchIndex *= shape[1] * shape[2] * shape[3]; // batchIndex *= heightIndex * widthIndex * channelIndex
- heightIndex *= shape[m_WidthIndex] * shape[m_ChannelsIndex];
- widthIndex *= shape[m_ChannelsIndex];
- // channelIndex stays unchanged
- break;
- case DataLayout::NCHW:
- default:
- batchIndex *= shape[1] * shape[2] * shape[3]; // batchIndex *= heightIndex * widthIndex * channelIndex
- channelIndex *= shape[m_HeightIndex] * shape[m_WidthIndex];
- heightIndex *= shape[m_WidthIndex];
- // widthIndex stays unchanged
- break;
- }
-
- // Get the value using the correct offset
- return batchIndex + channelIndex + heightIndex + widthIndex;
-}
-
bool operator==(const DataLayout& dataLayout, const DataLayoutIndexed& indexed)
{
return dataLayout == indexed.GetDataLayout();
diff --git a/src/armnnUtils/DataLayoutIndexed.hpp b/src/armnnUtils/DataLayoutIndexed.hpp
index 5bb8e0d93f..8bd9701a5e 100644
--- a/src/armnnUtils/DataLayoutIndexed.hpp
+++ b/src/armnnUtils/DataLayoutIndexed.hpp
@@ -8,6 +8,8 @@
#include <armnn/Types.hpp>
#include <armnn/Tensor.hpp>
+#include <boost/assert.hpp>
+
namespace armnnUtils
{
@@ -21,9 +23,40 @@ public:
unsigned int GetChannelsIndex() const { return m_ChannelsIndex; }
unsigned int GetHeightIndex() const { return m_HeightIndex; }
unsigned int GetWidthIndex() const { return m_WidthIndex; }
- unsigned int GetIndex(const armnn::TensorShape& shape,
- unsigned int batchIndex, unsigned int channelIndex,
- unsigned int heightIndex, unsigned int widthIndex) const;
+
+ inline unsigned int GetIndex(const armnn::TensorShape& shape,
+ unsigned int batchIndex, unsigned int channelIndex,
+ unsigned int heightIndex, unsigned int widthIndex) const
+ {
+ BOOST_ASSERT( batchIndex < shape[0] || ( shape[0] == 0 && batchIndex == 0 ) );
+ BOOST_ASSERT( channelIndex < shape[m_ChannelsIndex] ||
+ ( shape[m_ChannelsIndex] == 0 && channelIndex == 0) );
+ BOOST_ASSERT( heightIndex < shape[m_HeightIndex] ||
+ ( shape[m_HeightIndex] == 0 && heightIndex == 0) );
+ BOOST_ASSERT( widthIndex < shape[m_WidthIndex] ||
+ ( shape[m_WidthIndex] == 0 && widthIndex == 0) );
+
+ // Offset the given indices appropriately depending on the data layout
+ switch (m_DataLayout)
+ {
+ case armnn::DataLayout::NHWC:
+ batchIndex *= shape[1] * shape[2] * shape[3]; // batchIndex *= heightIndex * widthIndex * channelIndex
+ heightIndex *= shape[m_WidthIndex] * shape[m_ChannelsIndex];
+ widthIndex *= shape[m_ChannelsIndex];
+ // channelIndex stays unchanged
+ break;
+ case armnn::DataLayout::NCHW:
+ default:
+ batchIndex *= shape[1] * shape[2] * shape[3]; // batchIndex *= heightIndex * widthIndex * channelIndex
+ channelIndex *= shape[m_HeightIndex] * shape[m_WidthIndex];
+ heightIndex *= shape[m_WidthIndex];
+ // widthIndex stays unchanged
+ break;
+ }
+
+ // Get the value using the correct offset
+ return batchIndex + channelIndex + heightIndex + widthIndex;
+ }
private:
armnn::DataLayout m_DataLayout;
diff --git a/src/backends/reference/workloads/ConvImpl.cpp b/src/backends/reference/workloads/ConvImpl.cpp
index 801a29af1a..92e3b2d7dd 100644
--- a/src/backends/reference/workloads/ConvImpl.cpp
+++ b/src/backends/reference/workloads/ConvImpl.cpp
@@ -147,11 +147,22 @@ void Convolve(const TensorShape& rInputShape,
}
else
{
- filterIndex = dataLayoutIndexed.GetIndex(rFilterShape,
- cOutput,
- cInput,
- yFilter,
- xFilter);
+ // Keep this implementation, as using DataLayoutIndexed::GetIndex causes great
+ // performance regression.
+ if (dataLayout == DataLayout::NHWC)
+ {
+ filterIndex = cOutput * filterHeight * filterWidth * inputChannels +
+ yFilter * filterWidth * inputChannels +
+ xFilter * inputChannels +
+ cInput;
+ }
+ else
+ {
+ filterIndex = cOutput * filterWidth * filterHeight * inputChannels +
+ cInput * filterWidth * filterHeight +
+ yFilter * filterWidth +
+ xFilter;
+ }
}
rFilterDecoder[filterIndex];
@@ -170,11 +181,24 @@ void Convolve(const TensorShape& rInputShape,
}
else
{
- unsigned int inputIndex = dataLayoutIndexed.GetIndex(rInputShape,
- batchIdx,
- cInput,
- yInput - paddingTop,
- xInput - paddingLeft);
+ unsigned int inputIndex = 0;
+
+ // Keep this implementation, as using DataLayoutIndexed::GetIndex causes great
+ // performance regression.
+ if (dataLayout == DataLayout::NHWC)
+ {
+ inputIndex = batchIdx * inputHeight * inputWidth * inputChannels +
+ (yInput - paddingTop) * inputWidth * inputChannels +
+ (xInput - paddingLeft) * inputChannels +
+ cInput;
+ }
+ else
+ {
+ inputIndex = batchIdx * inputWidth * inputHeight * inputChannels +
+ inputWidth * inputHeight * cInput +
+ inputWidth * (yInput - paddingTop) +
+ xInput - paddingLeft;
+ }
rInputDecoder[inputIndex];
inputValue = rInputDecoder.Get();