aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMatthew Bentham <matthew.bentham@arm.com>2019-09-15 00:06:05 +0100
committerMatthew Bentham <matthew.bentham@arm.com>2019-09-16 09:07:13 +0000
commit019c4b1cd87849194cd30cb5bfd0e5d68089df08 (patch)
tree68d7907a31a23a4e6fc47827ba49538e7b0688c2 /src
parentefdbca6b1a25dbd491be7f308dd729ff2255bb28 (diff)
downloadarmnn-019c4b1cd87849194cd30cb5bfd0e5d68089df08.tar.gz
In CopyTensorContentsGeneric coalesce inner dimensions where possible
This reduces the number of function calls in the inner loop, and allows for optimised implementations of memcpy to improve bandwidth Signed-off-by: Matthew Bentham <matthew.bentham@arm.com> Change-Id: I7458b45c075c87805242e92e54448b9dd762227f
Diffstat (limited to 'src')
-rw-r--r--src/backends/backendsCommon/WorkloadUtils.hpp21
1 files changed, 21 insertions, 0 deletions
diff --git a/src/backends/backendsCommon/WorkloadUtils.hpp b/src/backends/backendsCommon/WorkloadUtils.hpp
index 3e0c40d890..cb614ea5b9 100644
--- a/src/backends/backendsCommon/WorkloadUtils.hpp
+++ b/src/backends/backendsCommon/WorkloadUtils.hpp
@@ -125,6 +125,27 @@ void CopyTensorContentsGeneric(const ITensorHandle* srcTensor, ITensorHandle* ds
size_t copyBatches = std::min(srcBatches, dstBatches);
size_t copyDepth = std::min(srcDepth, dstDepth);
+ // Coalesce inner dimensions where possible
+ // to reduce overheard calling copy() and to
+ // allow for memory bandwidth optimisations
+ if (copyLength == srcWidthStride &&
+ copyLength == dstWidthStride)
+ {
+ // There is no special padding between rows,
+ // and sizes are compatible, so copy whole rows
+ copyLength *= copyWidth;
+ copyWidth = 1;
+
+ if (copyLength == srcHeightStride &&
+ copyLength == dstHeightStride)
+ {
+ // There is no special padding between batches
+ // and sizes are compatible so copy whole batches
+ copyLength *= copyHeight;
+ copyHeight = 1;
+ }
+ }
+
for (unsigned int d = 0; d < copyDepth; ++d)
{
auto srcPtrDepth = srcData;