From 035004e65dbffb6534ad4183cf8f95da0544fd28 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Tue, 13 Apr 2021 19:44:17 +0100 Subject: Add support for a global allocator for OpenCL tensors Give the ability to the user to specify an allocator that can be used by all the internal function tensors. This being a global needs to outlive all the tensors/functions that are using it. Resolves: COMPMID-4212, COMPMID-4213 Signed-off-by: Georgios Pinitas Change-Id: I251871c242879976819ebca1452404133a8e62d7 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5420 Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins --- arm_compute/runtime/CL/CLTensorAllocator.h | 10 ++- src/runtime/CL/CLTensorAllocator.cpp | 18 ++++- tests/validation/CL/UNIT/TensorAllocator.cpp | 103 ++++++++++++++++++++++++++- 3 files changed, 126 insertions(+), 5 deletions(-) diff --git a/arm_compute/runtime/CL/CLTensorAllocator.h b/arm_compute/runtime/CL/CLTensorAllocator.h index 067c391489..1b061ee1d6 100644 --- a/arm_compute/runtime/CL/CLTensorAllocator.h +++ b/arm_compute/runtime/CL/CLTensorAllocator.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,6 +26,7 @@ #include "arm_compute/runtime/CL/CLArray.h" #include "arm_compute/runtime/CL/CLMemory.h" +#include "arm_compute/runtime/IAllocator.h" #include "arm_compute/runtime/ITensorAllocator.h" #include "arm_compute/runtime/MemoryGroup.h" @@ -127,6 +128,13 @@ public: */ void set_associated_memory_group(IMemoryGroup *associated_memory_group); + /** Sets global allocator that will be used by all CLTensor objects + * + * + * @param[in] allocator Allocator to be used as a global allocator + */ + static void set_global_allocator(IAllocator *allocator); + protected: /** Call map() on the OpenCL buffer. * diff --git a/src/runtime/CL/CLTensorAllocator.cpp b/src/runtime/CL/CLTensorAllocator.cpp index fc789fa4b9..c82e9dfc67 100644 --- a/src/runtime/CL/CLTensorAllocator.cpp +++ b/src/runtime/CL/CLTensorAllocator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -31,9 +31,11 @@ namespace arm_compute { const cl::Buffer CLTensorAllocator::_empty_buffer = cl::Buffer(); - namespace { +/** Global user-defined allocator that can be used for all internal allocations of a CLTensor */ +static IAllocator *static_global_cl_allocator = nullptr; + /** Helper function used to allocate the backing memory of a tensor * * @param[in] context OpenCL context to use @@ -130,7 +132,11 @@ void CLTensorAllocator::allocate() if(_associated_memory_group == nullptr) { // Perform memory allocation - if(_ctx == nullptr) + if(static_global_cl_allocator != nullptr) + { + _memory.set_owned_region(static_global_cl_allocator->make_region(info().total_size(), 0)); + } + else if(_ctx == nullptr) { auto legacy_ctx = CLCoreRuntimeContext(nullptr, CLScheduler::get().context(), CLScheduler::get().queue()); _memory.set_owned_region(allocate_region(&legacy_ctx, info().total_size(), 0)); @@ -142,6 +148,7 @@ void CLTensorAllocator::allocate() } else { + // Finalize memory management instead _associated_memory_group->finalize_memory(_owner, _memory, info().total_size(), alignment()); } @@ -194,6 +201,11 @@ void CLTensorAllocator::set_associated_memory_group(IMemoryGroup *associated_mem _associated_memory_group = associated_memory_group; } +void CLTensorAllocator::set_global_allocator(IAllocator *allocator) +{ + static_global_cl_allocator = allocator; +} + uint8_t *CLTensorAllocator::lock() { if(_ctx) diff --git a/tests/validation/CL/UNIT/TensorAllocator.cpp b/tests/validation/CL/UNIT/TensorAllocator.cpp index 3ccdd99fe3..4ebd521bf0 100644 --- a/tests/validation/CL/UNIT/TensorAllocator.cpp +++ b/tests/validation/CL/UNIT/TensorAllocator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,9 +24,14 @@ #include "arm_compute/runtime/CL/CLTensorAllocator.h" #include "arm_compute/core/utils/misc/MMappedFile.h" +#include "arm_compute/runtime/BlobLifetimeManager.h" +#include "arm_compute/runtime/CL/CLBufferAllocator.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/CL/functions/CLActivationLayer.h" +#include "arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h" #include "arm_compute/runtime/MemoryGroup.h" +#include "arm_compute/runtime/MemoryManagerOnDemand.h" +#include "arm_compute/runtime/PoolManager.h" #include "tests/CL/CLAccessor.h" #include "tests/Globals.h" #include "tests/framework/Asserts.h" @@ -60,12 +65,108 @@ cl_mem import_malloc_memory_helper(void *ptr, size_t size) return buf; } + +class DummyAllocator final : public IAllocator +{ +public: + DummyAllocator() = default; + + void *allocate(size_t size, size_t alignment) override + { + ++_n_calls; + return _backend_allocator.allocate(size, alignment); + } + void free(void *ptr) override + { + return _backend_allocator.free(ptr); + } + std::unique_ptr make_region(size_t size, size_t alignment) override + { + // Needs to be implemented as is the one that is used internally by the CLTensorAllocator + ++_n_calls; + return std::move(_backend_allocator.make_region(size, alignment)); + } + int get_n_calls() const + { + return _n_calls; + } + +private: + int _n_calls{}; + CLBufferAllocator _backend_allocator{}; +}; + +void run_conv2d(std::shared_ptr mm, IAllocator &mm_allocator) +{ + // Create tensors + CLTensor src, weights, bias, dst; + src.allocator()->init(TensorInfo(TensorShape(16U, 32U, 32U, 2U), 1, DataType::F32, DataLayout::NHWC)); + weights.allocator()->init(TensorInfo(TensorShape(16U, 3U, 3U, 32U), 1, DataType::F32, DataLayout::NHWC)); + bias.allocator()->init(TensorInfo(TensorShape(32U), 1, DataType::F32, DataLayout::NHWC)); + dst.allocator()->init(TensorInfo(TensorShape(32U, 32U, 32U, 2U), 1, DataType::F32, DataLayout::NHWC)); + + // Create and configure function + CLGEMMConvolutionLayer conv(mm); + conv.configure(&src, &weights, &bias, &dst, PadStrideInfo(1U, 1U, 1U, 1U)); + + // Allocate tensors + src.allocator()->allocate(); + weights.allocator()->allocate(); + bias.allocator()->allocate(); + dst.allocator()->allocate(); + + // Finalize memory manager + if(mm != nullptr) + { + mm->populate(mm_allocator, 1 /* num_pools */); + ARM_COMPUTE_EXPECT(mm->lifetime_manager()->are_all_finalized(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(mm->pool_manager()->num_pools() == 1, framework::LogLevel::ERRORS); + } + + conv.run(); +} } // namespace TEST_SUITE(CL) TEST_SUITE(UNIT) TEST_SUITE(TensorAllocator) +/* Validate that an external global allocator can be used for all internal allocations */ +TEST_CASE(ExternalGlobalAllocator, framework::DatasetMode::ALL) +{ + DummyAllocator global_tensor_alloc; + CLTensorAllocator::set_global_allocator(&global_tensor_alloc); + + // Run a convolution + run_conv2d(nullptr /* mm */, global_tensor_alloc); + + // Check that allocator has been called multiple times > 4 + ARM_COMPUTE_EXPECT(global_tensor_alloc.get_n_calls() > 4, framework::LogLevel::ERRORS); + + // Nullify global allocator + CLTensorAllocator::set_global_allocator(nullptr); +} + +/* Validate that an external global allocator can be used for the pool manager */ +TEST_CASE(ExternalGlobalAllocatorMemoryPool, framework::DatasetMode::ALL) +{ + auto lifetime_mgr = std::make_shared(); + auto pool_mgr = std::make_shared(); + auto mm = std::make_shared(lifetime_mgr, pool_mgr); + + DummyAllocator global_tensor_alloc; + CLTensorAllocator::set_global_allocator(&global_tensor_alloc); + + // Run a convolution + run_conv2d(mm, global_tensor_alloc); + + // Check that allocator has been called multiple times > 4 + ARM_COMPUTE_EXPECT(global_tensor_alloc.get_n_calls() > 4, framework::LogLevel::ERRORS); + + // Nullify global allocator + CLTensorAllocator::set_global_allocator(nullptr); +} + /** Validates import memory interface when importing cl buffer objects */ TEST_CASE(ImportMemoryBuffer, framework::DatasetMode::ALL) { -- cgit v1.2.1