aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/CL/CLTensorAllocator.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/runtime/CL/CLTensorAllocator.cpp')
-rw-r--r--src/runtime/CL/CLTensorAllocator.cpp75
1 files changed, 38 insertions, 37 deletions
diff --git a/src/runtime/CL/CLTensorAllocator.cpp b/src/runtime/CL/CLTensorAllocator.cpp
index eaf46d42ca..e6457218c7 100644
--- a/src/runtime/CL/CLTensorAllocator.cpp
+++ b/src/runtime/CL/CLTensorAllocator.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -31,34 +31,33 @@
namespace arm_compute
{
const cl::Buffer CLTensorAllocator::_empty_buffer = cl::Buffer();
-
namespace
{
+/** Global user-defined allocator that can be used for all internal allocations of a CLTensor */
+static IAllocator *static_global_cl_allocator = nullptr;
+
/** Helper function used to allocate the backing memory of a tensor
*
- * @param[in] context OpenCL context to use
* @param[in] size Size of the allocation
* @param[in] alignment Alignment of the allocation
*
* @return A wrapped memory region
*/
-std::unique_ptr<ICLMemoryRegion> allocate_region(CLCoreRuntimeContext *ctx, size_t size, cl_uint alignment)
+std::unique_ptr<ICLMemoryRegion> allocate_region(size_t size, cl_uint alignment)
{
// Try fine-grain SVM
- std::unique_ptr<ICLMemoryRegion> region = support::cpp14::make_unique<CLFineSVMMemoryRegion>(ctx,
- CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER,
- size,
- alignment);
+ std::unique_ptr<ICLMemoryRegion> region =
+ std::make_unique<CLFineSVMMemoryRegion>(CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, size, alignment);
// Try coarse-grain SVM in case of failure
- if(region != nullptr && region->ptr() == nullptr)
+ if (region != nullptr && region->ptr() == nullptr)
{
- region = support::cpp14::make_unique<CLCoarseSVMMemoryRegion>(ctx, CL_MEM_READ_WRITE, size, alignment);
+ region = std::make_unique<CLCoarseSVMMemoryRegion>(CL_MEM_READ_WRITE, size, alignment);
}
// Try legacy buffer memory in case of failure
- if(region != nullptr && region->ptr() == nullptr)
+ if (region != nullptr && region->ptr() == nullptr)
{
- region = support::cpp14::make_unique<CLBufferMemoryRegion>(ctx, CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, size);
+ region = std::make_unique<CLBufferMemoryRegion>(CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, size);
}
return region;
}
@@ -80,7 +79,10 @@ void clear_quantization_arrays(CLFloatArray &scale, CLInt32Array &offset)
* @param[in] qinfo Quantization info
* @param[in] pad_size Pad size to use in case array needs to be padded for computation purposes
*/
-void populate_quantization_info(CLFloatArray &scale, CLInt32Array &offset, const QuantizationInfo &qinfo, size_t pad_size)
+void populate_quantization_info(CLFloatArray &scale,
+ CLInt32Array &offset,
+ const QuantizationInfo &qinfo,
+ size_t pad_size)
{
clear_quantization_arrays(scale, offset);
@@ -90,16 +92,18 @@ void populate_quantization_info(CLFloatArray &scale, CLInt32Array &offset, const
const size_t element_size = sizeof(std::remove_reference<decltype(qscale)>::type::value_type);
scale = CLFloatArray(num_elements + pad_size);
scale.resize(num_elements);
- CLScheduler::get().queue().enqueueWriteBuffer(scale.cl_buffer(), CL_TRUE, 0, num_elements * element_size, qinfo.scale().data());
+ CLScheduler::get().queue().enqueueWriteBuffer(scale.cl_buffer(), CL_TRUE, 0, num_elements * element_size,
+ qinfo.scale().data());
- if(!qinfo.offset().empty())
+ if (!qinfo.offset().empty())
{
// Create offset array
- const std::vector<int32_t> &qoffset = qinfo.offset();
- const size_t offset_element_size = sizeof(std::remove_reference<decltype(qoffset)>::type::value_type);
- offset = CLInt32Array(num_elements + pad_size);
+ const std::vector<int32_t> &qoffset = qinfo.offset();
+ const size_t offset_element_size = sizeof(std::remove_reference<decltype(qoffset)>::type::value_type);
+ offset = CLInt32Array(num_elements + pad_size);
offset.resize(num_elements);
- CLScheduler::get().queue().enqueueWriteBuffer(offset.cl_buffer(), CL_TRUE, 0, num_elements * offset_element_size, qinfo.offset().data());
+ CLScheduler::get().queue().enqueueWriteBuffer(offset.cl_buffer(), CL_TRUE, 0,
+ num_elements * offset_element_size, qinfo.offset().data());
}
}
} // namespace
@@ -111,7 +115,7 @@ CLTensorAllocator::CLTensorAllocator(IMemoryManageable *owner, CLRuntimeContext
CLQuantization CLTensorAllocator::quantization() const
{
- return { &_scale, &_offset };
+ return {&_scale, &_offset};
}
uint8_t *CLTensorAllocator::data()
@@ -127,26 +131,26 @@ const cl::Buffer &CLTensorAllocator::cl_data() const
void CLTensorAllocator::allocate()
{
// Allocate tensor backing memory
- if(_associated_memory_group == nullptr)
+ if (_associated_memory_group == nullptr)
{
// Perform memory allocation
- if(_ctx == nullptr)
+ if (static_global_cl_allocator != nullptr)
{
- auto legacy_ctx = CLCoreRuntimeContext(nullptr, CLScheduler::get().context(), CLScheduler::get().queue());
- _memory.set_owned_region(allocate_region(&legacy_ctx, info().total_size(), 0));
+ _memory.set_owned_region(static_global_cl_allocator->make_region(info().total_size(), 0));
}
else
{
- _memory.set_owned_region(allocate_region(_ctx->core_runtime_context(), info().total_size(), 0));
+ _memory.set_owned_region(allocate_region(info().total_size(), 0));
}
}
else
{
+ // Finalize memory management instead
_associated_memory_group->finalize_memory(_owner, _memory, info().total_size(), alignment());
}
// Allocate and fill the quantization parameter arrays
- if(is_data_type_quantized_per_channel(info().data_type()))
+ if (is_data_type_quantized_per_channel(info().data_type()))
{
const size_t pad_size = 0;
populate_quantization_info(_scale, _offset, info().quantization_info(), pad_size);
@@ -171,15 +175,7 @@ Status CLTensorAllocator::import_memory(cl::Buffer buffer)
ARM_COMPUTE_RETURN_ERROR_ON(buffer.getInfo<CL_MEM_CONTEXT>().get() != CLScheduler::get().context().get());
ARM_COMPUTE_RETURN_ERROR_ON(_associated_memory_group != nullptr);
- if(_ctx == nullptr)
- {
- auto legacy_ctx = CLCoreRuntimeContext(nullptr, CLScheduler::get().context(), CLScheduler::get().queue());
- _memory.set_owned_region(support::cpp14::make_unique<CLBufferMemoryRegion>(buffer, &legacy_ctx));
- }
- else
- {
- _memory.set_owned_region(support::cpp14::make_unique<CLBufferMemoryRegion>(buffer, _ctx->core_runtime_context()));
- }
+ _memory.set_owned_region(std::make_unique<CLBufferMemoryRegion>(buffer));
info().set_is_resizable(false);
return Status{};
@@ -194,9 +190,14 @@ void CLTensorAllocator::set_associated_memory_group(IMemoryGroup *associated_mem
_associated_memory_group = associated_memory_group;
}
+void CLTensorAllocator::set_global_allocator(IAllocator *allocator)
+{
+ static_global_cl_allocator = allocator;
+}
+
uint8_t *CLTensorAllocator::lock()
{
- if(_ctx)
+ if (_ctx)
{
return map(_ctx->gpu_scheduler()->queue(), true);
}
@@ -209,7 +210,7 @@ uint8_t *CLTensorAllocator::lock()
void CLTensorAllocator::unlock()
{
ARM_COMPUTE_ERROR_ON(_memory.region() == nullptr);
- if(_ctx)
+ if (_ctx)
{
unmap(_ctx->gpu_scheduler()->queue(), reinterpret_cast<uint8_t *>(_memory.region()->buffer()));
}