1 files changed, 38 insertions, 37 deletions
diff --git a/src/runtime/CL/CLTensorAllocator.cpp b/src/runtime/CL/CLTensorAllocator.cpp
index eaf46d42ca..e6457218c7 100644
--- a/src/runtime/CL/CLTensorAllocator.cpp
+++ b/src/runtime/CL/CLTensorAllocator.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -31,34 +31,33 @@
 namespace arm_compute
 {
 const cl::Buffer CLTensorAllocator::_empty_buffer = cl::Buffer();
-
 namespace
 {
+/** Global user-defined allocator that can be used for all internal allocations of a CLTensor */
+static IAllocator *static_global_cl_allocator = nullptr;
+
 /** Helper function used to allocate the backing memory of a tensor
  *
- * @param[in] context   OpenCL context to use
  * @param[in] size      Size of the allocation
  * @param[in] alignment Alignment of the allocation
  *
  * @return A wrapped memory region
  */
-std::unique_ptr<ICLMemoryRegion> allocate_region(CLCoreRuntimeContext *ctx, size_t size, cl_uint alignment)
+std::unique_ptr<ICLMemoryRegion> allocate_region(size_t size, cl_uint alignment)
 {
     // Try fine-grain SVM
-    std::unique_ptr<ICLMemoryRegion> region = support::cpp14::make_unique<CLFineSVMMemoryRegion>(ctx,
-                                                                                                 CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER,
-                                                                                                 size,
-                                                                                                 alignment);
+    std::unique_ptr<ICLMemoryRegion> region =
+        std::make_unique<CLFineSVMMemoryRegion>(CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, size, alignment);
 
     // Try coarse-grain SVM in case of failure
-    if(region != nullptr && region->ptr() == nullptr)
+    if (region != nullptr && region->ptr() == nullptr)
     {
-        region = support::cpp14::make_unique<CLCoarseSVMMemoryRegion>(ctx, CL_MEM_READ_WRITE, size, alignment);
+        region = std::make_unique<CLCoarseSVMMemoryRegion>(CL_MEM_READ_WRITE, size, alignment);
     }
     // Try legacy buffer memory in case of failure
-    if(region != nullptr && region->ptr() == nullptr)
+    if (region != nullptr && region->ptr() == nullptr)
     {
-        region = support::cpp14::make_unique<CLBufferMemoryRegion>(ctx, CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, size);
+        region = std::make_unique<CLBufferMemoryRegion>(CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, size);
     }
     return region;
 }
@@ -80,7 +79,10 @@ void clear_quantization_arrays(CLFloatArray &scale, CLInt32Array &offset)
  * @param[in]      qinfo    Quantization info
  * @param[in]      pad_size Pad size to use in case array needs to be padded for computation purposes
  */
-void populate_quantization_info(CLFloatArray &scale, CLInt32Array &offset, const QuantizationInfo &qinfo, size_t pad_size)
+void populate_quantization_info(CLFloatArray           &scale,
+                                CLInt32Array           &offset,
+                                const QuantizationInfo &qinfo,
+                                size_t                  pad_size)
 {
     clear_quantization_arrays(scale, offset);
 
@@ -90,16 +92,18 @@ void populate_quantization_info(CLFloatArray &scale, CLInt32Array &offset, const
     const size_t              element_size = sizeof(std::remove_reference<decltype(qscale)>::type::value_type);
     scale                                  = CLFloatArray(num_elements + pad_size);
     scale.resize(num_elements);
-    CLScheduler::get().queue().enqueueWriteBuffer(scale.cl_buffer(), CL_TRUE, 0, num_elements * element_size, qinfo.scale().data());
+    CLScheduler::get().queue().enqueueWriteBuffer(scale.cl_buffer(), CL_TRUE, 0, num_elements * element_size,
+                                                  qinfo.scale().data());
 
-    if(!qinfo.offset().empty())
+    if (!qinfo.offset().empty())
     {
         // Create offset array
-        const std::vector<int32_t> &qoffset             = qinfo.offset();
-        const size_t                offset_element_size = sizeof(std::remove_reference<decltype(qoffset)>::type::value_type);
-        offset                                          = CLInt32Array(num_elements + pad_size);
+        const std::vector<int32_t> &qoffset = qinfo.offset();
+        const size_t offset_element_size    = sizeof(std::remove_reference<decltype(qoffset)>::type::value_type);
+        offset                              = CLInt32Array(num_elements + pad_size);
         offset.resize(num_elements);
-        CLScheduler::get().queue().enqueueWriteBuffer(offset.cl_buffer(), CL_TRUE, 0, num_elements * offset_element_size, qinfo.offset().data());
+        CLScheduler::get().queue().enqueueWriteBuffer(offset.cl_buffer(), CL_TRUE, 0,
+                                                      num_elements * offset_element_size, qinfo.offset().data());
     }
 }
 } // namespace
@@ -111,7 +115,7 @@ CLTensorAllocator::CLTensorAllocator(IMemoryManageable *owner, CLRuntimeContext
 
 CLQuantization CLTensorAllocator::quantization() const
 {
-    return { &_scale, &_offset };
+    return {&_scale, &_offset};
 }
 
 uint8_t *CLTensorAllocator::data()
@@ -127,26 +131,26 @@ const cl::Buffer &CLTensorAllocator::cl_data() const
 void CLTensorAllocator::allocate()
 {
     // Allocate tensor backing memory
-    if(_associated_memory_group == nullptr)
+    if (_associated_memory_group == nullptr)
     {
         // Perform memory allocation
-        if(_ctx == nullptr)
+        if (static_global_cl_allocator != nullptr)
         {
-            auto legacy_ctx = CLCoreRuntimeContext(nullptr, CLScheduler::get().context(), CLScheduler::get().queue());
-            _memory.set_owned_region(allocate_region(&legacy_ctx, info().total_size(), 0));
+            _memory.set_owned_region(static_global_cl_allocator->make_region(info().total_size(), 0));
         }
         else
         {
-            _memory.set_owned_region(allocate_region(_ctx->core_runtime_context(), info().total_size(), 0));
+            _memory.set_owned_region(allocate_region(info().total_size(), 0));
         }
     }
     else
     {
+        // Finalize memory management instead
         _associated_memory_group->finalize_memory(_owner, _memory, info().total_size(), alignment());
     }
 
     // Allocate and fill the quantization parameter arrays
-    if(is_data_type_quantized_per_channel(info().data_type()))
+    if (is_data_type_quantized_per_channel(info().data_type()))
     {
         const size_t pad_size = 0;
         populate_quantization_info(_scale, _offset, info().quantization_info(), pad_size);
@@ -171,15 +175,7 @@ Status CLTensorAllocator::import_memory(cl::Buffer buffer)
     ARM_COMPUTE_RETURN_ERROR_ON(buffer.getInfo<CL_MEM_CONTEXT>().get() != CLScheduler::get().context().get());
     ARM_COMPUTE_RETURN_ERROR_ON(_associated_memory_group != nullptr);
 
-    if(_ctx == nullptr)
-    {
-        auto legacy_ctx = CLCoreRuntimeContext(nullptr, CLScheduler::get().context(), CLScheduler::get().queue());
-        _memory.set_owned_region(support::cpp14::make_unique<CLBufferMemoryRegion>(buffer, &legacy_ctx));
-    }
-    else
-    {
-        _memory.set_owned_region(support::cpp14::make_unique<CLBufferMemoryRegion>(buffer, _ctx->core_runtime_context()));
-    }
+    _memory.set_owned_region(std::make_unique<CLBufferMemoryRegion>(buffer));
 
     info().set_is_resizable(false);
     return Status{};
@@ -194,9 +190,14 @@ void CLTensorAllocator::set_associated_memory_group(IMemoryGroup *associated_mem
     _associated_memory_group = associated_memory_group;
 }
 
+void CLTensorAllocator::set_global_allocator(IAllocator *allocator)
+{
+    static_global_cl_allocator = allocator;
+}
+
 uint8_t *CLTensorAllocator::lock()
 {
-    if(_ctx)
+    if (_ctx)
     {
         return map(_ctx->gpu_scheduler()->queue(), true);
     }
@@ -209,7 +210,7 @@ uint8_t *CLTensorAllocator::lock()
 void CLTensorAllocator::unlock()
 {
     ARM_COMPUTE_ERROR_ON(_memory.region() == nullptr);
-    if(_ctx)
+    if (_ctx)
     {
         unmap(_ctx->gpu_scheduler()->queue(), reinterpret_cast<uint8_t *>(_memory.region()->buffer()));
     }