diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/backends/backendsCommon/WorkloadUtils.hpp | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/src/backends/backendsCommon/WorkloadUtils.hpp b/src/backends/backendsCommon/WorkloadUtils.hpp index 3e0c40d890..cb614ea5b9 100644 --- a/src/backends/backendsCommon/WorkloadUtils.hpp +++ b/src/backends/backendsCommon/WorkloadUtils.hpp @@ -125,6 +125,27 @@ void CopyTensorContentsGeneric(const ITensorHandle* srcTensor, ITensorHandle* ds size_t copyBatches = std::min(srcBatches, dstBatches); size_t copyDepth = std::min(srcDepth, dstDepth); + // Coalesce inner dimensions where possible + // to reduce overheard calling copy() and to + // allow for memory bandwidth optimisations + if (copyLength == srcWidthStride && + copyLength == dstWidthStride) + { + // There is no special padding between rows, + // and sizes are compatible, so copy whole rows + copyLength *= copyWidth; + copyWidth = 1; + + if (copyLength == srcHeightStride && + copyLength == dstHeightStride) + { + // There is no special padding between batches + // and sizes are compatible so copy whole batches + copyLength *= copyHeight; + copyHeight = 1; + } + } + for (unsigned int d = 0; d < copyDepth; ++d) { auto srcPtrDepth = srcData; |