diff options
author | Matthew Bentham <matthew.bentham@arm.com> | 2019-09-15 00:06:05 +0100 |
---|---|---|
committer | Matthew Bentham <matthew.bentham@arm.com> | 2019-09-16 09:07:13 +0000 |
commit | 019c4b1cd87849194cd30cb5bfd0e5d68089df08 (patch) | |
tree | 68d7907a31a23a4e6fc47827ba49538e7b0688c2 | |
parent | efdbca6b1a25dbd491be7f308dd729ff2255bb28 (diff) | |
download | armnn-019c4b1cd87849194cd30cb5bfd0e5d68089df08.tar.gz |
In CopyTensorContentsGeneric coalesce inner dimensions where possible
This reduces the number of function calls in the inner loop, and
allows for optimised implementations of memcpy to improve bandwidth
Signed-off-by: Matthew Bentham <matthew.bentham@arm.com>
Change-Id: I7458b45c075c87805242e92e54448b9dd762227f
-rw-r--r-- | src/backends/backendsCommon/WorkloadUtils.hpp | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/src/backends/backendsCommon/WorkloadUtils.hpp b/src/backends/backendsCommon/WorkloadUtils.hpp index 3e0c40d890..cb614ea5b9 100644 --- a/src/backends/backendsCommon/WorkloadUtils.hpp +++ b/src/backends/backendsCommon/WorkloadUtils.hpp @@ -125,6 +125,27 @@ void CopyTensorContentsGeneric(const ITensorHandle* srcTensor, ITensorHandle* ds size_t copyBatches = std::min(srcBatches, dstBatches); size_t copyDepth = std::min(srcDepth, dstDepth); + // Coalesce inner dimensions where possible + // to reduce overheard calling copy() and to + // allow for memory bandwidth optimisations + if (copyLength == srcWidthStride && + copyLength == dstWidthStride) + { + // There is no special padding between rows, + // and sizes are compatible, so copy whole rows + copyLength *= copyWidth; + copyWidth = 1; + + if (copyLength == srcHeightStride && + copyLength == dstHeightStride) + { + // There is no special padding between batches + // and sizes are compatible so copy whole batches + copyLength *= copyHeight; + copyHeight = 1; + } + } + for (unsigned int d = 0; d < copyDepth; ++d) { auto srcPtrDepth = srcData; |