aboutsummaryrefslogtreecommitdiff
path: root/src/backends/cl/ClImportTensorHandle.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/backends/cl/ClImportTensorHandle.hpp')
-rw-r--r--src/backends/cl/ClImportTensorHandle.hpp12
1 files changed, 10 insertions, 2 deletions
diff --git a/src/backends/cl/ClImportTensorHandle.hpp b/src/backends/cl/ClImportTensorHandle.hpp
index a236a70d7c..54710d8135 100644
--- a/src/backends/cl/ClImportTensorHandle.hpp
+++ b/src/backends/cl/ClImportTensorHandle.hpp
@@ -205,7 +205,11 @@ public:
// We do this to match the behaviour of the Import function later on.
auto cachelineAlignment =
arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
- auto roundedSize = cachelineAlignment + totalBytes - (totalBytes % cachelineAlignment);
+ auto roundedSize = totalBytes;
+ if (totalBytes % cachelineAlignment != 0)
+ {
+ roundedSize = cachelineAlignment + totalBytes - (totalBytes % cachelineAlignment);
+ }
cl_int error = CL_SUCCESS;
cl_mem buffer;
@@ -252,7 +256,11 @@ private:
// This does not change the size of the buffer, only the size of the mapping the buffer is mapped to
auto cachelineAlignment =
arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
- auto roundedSize = cachelineAlignment + totalBytes - (totalBytes % cachelineAlignment);
+ auto roundedSize = totalBytes;
+ if (totalBytes % cachelineAlignment != 0)
+ {
+ roundedSize = cachelineAlignment + totalBytes - (totalBytes % cachelineAlignment);
+ }
cl_int error = CL_SUCCESS;
cl_mem buffer;