aboutsummaryrefslogtreecommitdiff
path: root/src/backends/cl/ClImportTensorHandle.hpp
diff options
context:
space:
mode:
authorJan Eilers <jan.eilers@arm.com>2021-07-22 13:17:04 +0100
committerDavid Monahan <david.monahan@arm.com>2021-08-10 16:35:33 +0100
commitc1c872f12797ef6fe52c4589113e7efc353e56eb (patch)
tree911320c5306f9d2273ee76201806bfb12cbe4cd3 /src/backends/cl/ClImportTensorHandle.hpp
parentf487486c843a38fced90229923433d09f99fc2e5 (diff)
downloadarmnn-c1c872f12797ef6fe52c4589113e7efc353e56eb.tar.gz
Adds CustomAllocator interface and Sample App
* Updates the runtime options with a CustomAllocatorMap which allows to define a CustomAllocator for specific backends * Change IBackendInternal interface to use a shared pointer to a custom allocator * Update ClBackend.hpp/cpp to use the CustomAllocator * Adds an example application and unit test which uses a CustomAllocator for GpuAcc * Refactor of the interface to use MemorySource instead of the user Mapping cl_mem directly * Modify the BackendRegistry to also hold a registry of CustomAllocators * BackendRegistry Deregister will also deregister any allocators associated with that backend id * set_global_allocator within the BaseMemoryManager so that it always matches the currently used allocator Signed-off-by: Jan Eilers <jan.eilers@arm.com> Change-Id: I156d819686021865f4375e6cb7a5c5dec8fee9e8 Signed-off-by: David Monahan <david.monahan@arm.com>
Diffstat (limited to 'src/backends/cl/ClImportTensorHandle.hpp')
-rw-r--r--src/backends/cl/ClImportTensorHandle.hpp10
1 files changed, 8 insertions, 2 deletions
diff --git a/src/backends/cl/ClImportTensorHandle.hpp b/src/backends/cl/ClImportTensorHandle.hpp
index 3fca7cb127..69cd4a6d81 100644
--- a/src/backends/cl/ClImportTensorHandle.hpp
+++ b/src/backends/cl/ClImportTensorHandle.hpp
@@ -140,10 +140,16 @@ public:
private:
bool ClImport(const cl_import_properties_arm* importProperties, void* memory)
{
- const size_t totalBytes = m_Tensor.info()->total_size();
+ size_t totalBytes = m_Tensor.info()->total_size();
+
+ // Round the size of the buffer to a multiple of the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE
+ auto cachelineAlignment =
+ arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
+ auto roundedSize = cachelineAlignment + totalBytes - (totalBytes % cachelineAlignment);
+
cl_int error = CL_SUCCESS;
cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
- CL_MEM_READ_WRITE, importProperties, memory, totalBytes, &error);
+ CL_MEM_READ_WRITE, importProperties, memory, roundedSize, &error);
if (error != CL_SUCCESS)
{
throw MemoryImportException("ClImportTensorHandle::Invalid imported memory" + std::to_string(error));