diff options
author | Matteo Martincigh <matteo.martincigh@arm.com> | 2019-06-06 15:46:22 +0100 |
---|---|---|
committer | Teresa Charlin <teresa.charlinreyes@arm.com> | 2019-06-07 07:54:51 +0000 |
commit | f2aaab3a06024b5d5c538cc42799fb2c91b4ca2b (patch) | |
tree | dd83c6927fbc53ab2b235ea85de6ddb3a893f92d /src/backends | |
parent | 99a663140294afd2a4ea91ccc61b7266f735b46a (diff) | |
download | armnn-f2aaab3a06024b5d5c538cc42799fb2c91b4ca2b.tar.gz |
IVGCVSW-3223 Fix ref convolution performance regression
* Do not use DataLayoutIndexed::GetIndex for weights and inputs,
as it causes a large regression in performance
* It turned out that the calculation of the indexes for the weights
and inputs was the way it was because of an optimization done
many months ago
* Reverted the relevant hunks and added some comments so
we won't make the same mistake again in the future
* Made the GetIndex function inline to speed up other usages
Change-Id: I343b2ef0446993086f58b9dea1f0de0ba2d92216
Signed-off-by: Matteo Martincigh <matteo.martincigh@arm.com>
Diffstat (limited to 'src/backends')
-rw-r--r-- | src/backends/reference/workloads/ConvImpl.cpp | 44 |
1 files changed, 34 insertions, 10 deletions
diff --git a/src/backends/reference/workloads/ConvImpl.cpp b/src/backends/reference/workloads/ConvImpl.cpp index 801a29af1a..92e3b2d7dd 100644 --- a/src/backends/reference/workloads/ConvImpl.cpp +++ b/src/backends/reference/workloads/ConvImpl.cpp @@ -147,11 +147,22 @@ void Convolve(const TensorShape& rInputShape, } else { - filterIndex = dataLayoutIndexed.GetIndex(rFilterShape, - cOutput, - cInput, - yFilter, - xFilter); + // Keep this implementation, as using DataLayoutIndexed::GetIndex causes great + // performance regression. + if (dataLayout == DataLayout::NHWC) + { + filterIndex = cOutput * filterHeight * filterWidth * inputChannels + + yFilter * filterWidth * inputChannels + + xFilter * inputChannels + + cInput; + } + else + { + filterIndex = cOutput * filterWidth * filterHeight * inputChannels + + cInput * filterWidth * filterHeight + + yFilter * filterWidth + + xFilter; + } } rFilterDecoder[filterIndex]; @@ -170,11 +181,24 @@ void Convolve(const TensorShape& rInputShape, } else { - unsigned int inputIndex = dataLayoutIndexed.GetIndex(rInputShape, - batchIdx, - cInput, - yInput - paddingTop, - xInput - paddingLeft); + unsigned int inputIndex = 0; + + // Keep this implementation, as using DataLayoutIndexed::GetIndex causes great + // performance regression. + if (dataLayout == DataLayout::NHWC) + { + inputIndex = batchIdx * inputHeight * inputWidth * inputChannels + + (yInput - paddingTop) * inputWidth * inputChannels + + (xInput - paddingLeft) * inputChannels + + cInput; + } + else + { + inputIndex = batchIdx * inputWidth * inputHeight * inputChannels + + inputWidth * inputHeight * cInput + + inputWidth * (yInput - paddingTop) + + xInput - paddingLeft; + } rInputDecoder[inputIndex]; inputValue = rInputDecoder.Get(); |