aboutsummaryrefslogtreecommitdiff
path: root/src/backends/reference/workloads/ConvImpl.cpp
diff options
context:
space:
mode:
authorMatteo Martincigh <matteo.martincigh@arm.com>2019-06-06 15:46:22 +0100
committerTeresa Charlin <teresa.charlinreyes@arm.com>2019-06-07 07:54:51 +0000
commitf2aaab3a06024b5d5c538cc42799fb2c91b4ca2b (patch)
treedd83c6927fbc53ab2b235ea85de6ddb3a893f92d /src/backends/reference/workloads/ConvImpl.cpp
parent99a663140294afd2a4ea91ccc61b7266f735b46a (diff)
downloadarmnn-f2aaab3a06024b5d5c538cc42799fb2c91b4ca2b.tar.gz
IVGCVSW-3223 Fix ref convolution performance regression
* Do not use DataLayoutIndexed::GetIndex for weights and inputs, as it causes a large regression in performance * It turned out that the calculation of the indexes for the weights and inputs was the way it was because of an optimization done many months ago * Reverted the relevant hunks and added some comments so we won't make the same mistake again in the future * Made the GetIndex function inline to speed up other usages Change-Id: I343b2ef0446993086f58b9dea1f0de0ba2d92216 Signed-off-by: Matteo Martincigh <matteo.martincigh@arm.com>
Diffstat (limited to 'src/backends/reference/workloads/ConvImpl.cpp')
-rw-r--r--src/backends/reference/workloads/ConvImpl.cpp44
1 files changed, 34 insertions, 10 deletions
diff --git a/src/backends/reference/workloads/ConvImpl.cpp b/src/backends/reference/workloads/ConvImpl.cpp
index 801a29af1a..92e3b2d7dd 100644
--- a/src/backends/reference/workloads/ConvImpl.cpp
+++ b/src/backends/reference/workloads/ConvImpl.cpp
@@ -147,11 +147,22 @@ void Convolve(const TensorShape& rInputShape,
}
else
{
- filterIndex = dataLayoutIndexed.GetIndex(rFilterShape,
- cOutput,
- cInput,
- yFilter,
- xFilter);
+ // Keep this implementation, as using DataLayoutIndexed::GetIndex causes great
+ // performance regression.
+ if (dataLayout == DataLayout::NHWC)
+ {
+ filterIndex = cOutput * filterHeight * filterWidth * inputChannels +
+ yFilter * filterWidth * inputChannels +
+ xFilter * inputChannels +
+ cInput;
+ }
+ else
+ {
+ filterIndex = cOutput * filterWidth * filterHeight * inputChannels +
+ cInput * filterWidth * filterHeight +
+ yFilter * filterWidth +
+ xFilter;
+ }
}
rFilterDecoder[filterIndex];
@@ -170,11 +181,24 @@ void Convolve(const TensorShape& rInputShape,
}
else
{
- unsigned int inputIndex = dataLayoutIndexed.GetIndex(rInputShape,
- batchIdx,
- cInput,
- yInput - paddingTop,
- xInput - paddingLeft);
+ unsigned int inputIndex = 0;
+
+ // Keep this implementation, as using DataLayoutIndexed::GetIndex causes great
+ // performance regression.
+ if (dataLayout == DataLayout::NHWC)
+ {
+ inputIndex = batchIdx * inputHeight * inputWidth * inputChannels +
+ (yInput - paddingTop) * inputWidth * inputChannels +
+ (xInput - paddingLeft) * inputChannels +
+ cInput;
+ }
+ else
+ {
+ inputIndex = batchIdx * inputWidth * inputHeight * inputChannels +
+ inputWidth * inputHeight * cInput +
+ inputWidth * (yInput - paddingTop) +
+ xInput - paddingLeft;
+ }
rInputDecoder[inputIndex];
inputValue = rInputDecoder.Get();