diff options
Diffstat (limited to 'src/runtime/CL/CLTensorAllocator.cpp')
-rw-r--r-- | src/runtime/CL/CLTensorAllocator.cpp | 75 |
1 files changed, 38 insertions, 37 deletions
diff --git a/src/runtime/CL/CLTensorAllocator.cpp b/src/runtime/CL/CLTensorAllocator.cpp index fc789fa4b9..e6457218c7 100644 --- a/src/runtime/CL/CLTensorAllocator.cpp +++ b/src/runtime/CL/CLTensorAllocator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -31,34 +31,33 @@ namespace arm_compute { const cl::Buffer CLTensorAllocator::_empty_buffer = cl::Buffer(); - namespace { +/** Global user-defined allocator that can be used for all internal allocations of a CLTensor */ +static IAllocator *static_global_cl_allocator = nullptr; + /** Helper function used to allocate the backing memory of a tensor * - * @param[in] context OpenCL context to use * @param[in] size Size of the allocation * @param[in] alignment Alignment of the allocation * * @return A wrapped memory region */ -std::unique_ptr<ICLMemoryRegion> allocate_region(CLCoreRuntimeContext *ctx, size_t size, cl_uint alignment) +std::unique_ptr<ICLMemoryRegion> allocate_region(size_t size, cl_uint alignment) { // Try fine-grain SVM - std::unique_ptr<ICLMemoryRegion> region = std::make_unique<CLFineSVMMemoryRegion>(ctx, - CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, - size, - alignment); + std::unique_ptr<ICLMemoryRegion> region = + std::make_unique<CLFineSVMMemoryRegion>(CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, size, alignment); // Try coarse-grain SVM in case of failure - if(region != nullptr && region->ptr() == nullptr) + if (region != nullptr && region->ptr() == nullptr) { - region = std::make_unique<CLCoarseSVMMemoryRegion>(ctx, CL_MEM_READ_WRITE, size, alignment); + region = std::make_unique<CLCoarseSVMMemoryRegion>(CL_MEM_READ_WRITE, size, alignment); } // Try legacy buffer memory in case of failure - if(region != nullptr && region->ptr() == nullptr) + if (region != nullptr && region->ptr() == nullptr) { - region = std::make_unique<CLBufferMemoryRegion>(ctx, CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, size); + region = std::make_unique<CLBufferMemoryRegion>(CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, size); } return region; } @@ -80,7 +79,10 @@ void clear_quantization_arrays(CLFloatArray &scale, CLInt32Array &offset) * @param[in] qinfo Quantization info * @param[in] pad_size Pad size to use in case array needs to be padded for computation purposes */ -void populate_quantization_info(CLFloatArray &scale, CLInt32Array &offset, const QuantizationInfo &qinfo, size_t pad_size) +void populate_quantization_info(CLFloatArray &scale, + CLInt32Array &offset, + const QuantizationInfo &qinfo, + size_t pad_size) { clear_quantization_arrays(scale, offset); @@ -90,16 +92,18 @@ void populate_quantization_info(CLFloatArray &scale, CLInt32Array &offset, const const size_t element_size = sizeof(std::remove_reference<decltype(qscale)>::type::value_type); scale = CLFloatArray(num_elements + pad_size); scale.resize(num_elements); - CLScheduler::get().queue().enqueueWriteBuffer(scale.cl_buffer(), CL_TRUE, 0, num_elements * element_size, qinfo.scale().data()); + CLScheduler::get().queue().enqueueWriteBuffer(scale.cl_buffer(), CL_TRUE, 0, num_elements * element_size, + qinfo.scale().data()); - if(!qinfo.offset().empty()) + if (!qinfo.offset().empty()) { // Create offset array - const std::vector<int32_t> &qoffset = qinfo.offset(); - const size_t offset_element_size = sizeof(std::remove_reference<decltype(qoffset)>::type::value_type); - offset = CLInt32Array(num_elements + pad_size); + const std::vector<int32_t> &qoffset = qinfo.offset(); + const size_t offset_element_size = sizeof(std::remove_reference<decltype(qoffset)>::type::value_type); + offset = CLInt32Array(num_elements + pad_size); offset.resize(num_elements); - CLScheduler::get().queue().enqueueWriteBuffer(offset.cl_buffer(), CL_TRUE, 0, num_elements * offset_element_size, qinfo.offset().data()); + CLScheduler::get().queue().enqueueWriteBuffer(offset.cl_buffer(), CL_TRUE, 0, + num_elements * offset_element_size, qinfo.offset().data()); } } } // namespace @@ -111,7 +115,7 @@ CLTensorAllocator::CLTensorAllocator(IMemoryManageable *owner, CLRuntimeContext CLQuantization CLTensorAllocator::quantization() const { - return { &_scale, &_offset }; + return {&_scale, &_offset}; } uint8_t *CLTensorAllocator::data() @@ -127,26 +131,26 @@ const cl::Buffer &CLTensorAllocator::cl_data() const void CLTensorAllocator::allocate() { // Allocate tensor backing memory - if(_associated_memory_group == nullptr) + if (_associated_memory_group == nullptr) { // Perform memory allocation - if(_ctx == nullptr) + if (static_global_cl_allocator != nullptr) { - auto legacy_ctx = CLCoreRuntimeContext(nullptr, CLScheduler::get().context(), CLScheduler::get().queue()); - _memory.set_owned_region(allocate_region(&legacy_ctx, info().total_size(), 0)); + _memory.set_owned_region(static_global_cl_allocator->make_region(info().total_size(), 0)); } else { - _memory.set_owned_region(allocate_region(_ctx->core_runtime_context(), info().total_size(), 0)); + _memory.set_owned_region(allocate_region(info().total_size(), 0)); } } else { + // Finalize memory management instead _associated_memory_group->finalize_memory(_owner, _memory, info().total_size(), alignment()); } // Allocate and fill the quantization parameter arrays - if(is_data_type_quantized_per_channel(info().data_type())) + if (is_data_type_quantized_per_channel(info().data_type())) { const size_t pad_size = 0; populate_quantization_info(_scale, _offset, info().quantization_info(), pad_size); @@ -171,15 +175,7 @@ Status CLTensorAllocator::import_memory(cl::Buffer buffer) ARM_COMPUTE_RETURN_ERROR_ON(buffer.getInfo<CL_MEM_CONTEXT>().get() != CLScheduler::get().context().get()); ARM_COMPUTE_RETURN_ERROR_ON(_associated_memory_group != nullptr); - if(_ctx == nullptr) - { - auto legacy_ctx = CLCoreRuntimeContext(nullptr, CLScheduler::get().context(), CLScheduler::get().queue()); - _memory.set_owned_region(std::make_unique<CLBufferMemoryRegion>(buffer, &legacy_ctx)); - } - else - { - _memory.set_owned_region(std::make_unique<CLBufferMemoryRegion>(buffer, _ctx->core_runtime_context())); - } + _memory.set_owned_region(std::make_unique<CLBufferMemoryRegion>(buffer)); info().set_is_resizable(false); return Status{}; @@ -194,9 +190,14 @@ void CLTensorAllocator::set_associated_memory_group(IMemoryGroup *associated_mem _associated_memory_group = associated_memory_group; } +void CLTensorAllocator::set_global_allocator(IAllocator *allocator) +{ + static_global_cl_allocator = allocator; +} + uint8_t *CLTensorAllocator::lock() { - if(_ctx) + if (_ctx) { return map(_ctx->gpu_scheduler()->queue(), true); } @@ -209,7 +210,7 @@ uint8_t *CLTensorAllocator::lock() void CLTensorAllocator::unlock() { ARM_COMPUTE_ERROR_ON(_memory.region() == nullptr); - if(_ctx) + if (_ctx) { unmap(_ctx->gpu_scheduler()->queue(), reinterpret_cast<uint8_t *>(_memory.region()->buffer())); } |