aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2021-04-13 19:44:17 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2021-04-14 14:37:44 +0000
commit035004e65dbffb6534ad4183cf8f95da0544fd28 (patch)
tree6ba1c45e6fc39c27672448cd5f5ed7e86cd61eed
parent327225d3b2f716d5c62d801a7fafc7d377521f34 (diff)
downloadComputeLibrary-035004e65dbffb6534ad4183cf8f95da0544fd28.tar.gz
Add support for a global allocator for OpenCL tensors
Give the ability to the user to specify an allocator that can be used by all the internal function tensors. This being a global needs to outlive all the tensors/functions that are using it. Resolves: COMPMID-4212, COMPMID-4213 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: I251871c242879976819ebca1452404133a8e62d7 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5420 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/runtime/CL/CLTensorAllocator.h10
-rw-r--r--src/runtime/CL/CLTensorAllocator.cpp18
-rw-r--r--tests/validation/CL/UNIT/TensorAllocator.cpp103
3 files changed, 126 insertions, 5 deletions
diff --git a/arm_compute/runtime/CL/CLTensorAllocator.h b/arm_compute/runtime/CL/CLTensorAllocator.h
index 067c391489..1b061ee1d6 100644
--- a/arm_compute/runtime/CL/CLTensorAllocator.h
+++ b/arm_compute/runtime/CL/CLTensorAllocator.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,6 +26,7 @@
#include "arm_compute/runtime/CL/CLArray.h"
#include "arm_compute/runtime/CL/CLMemory.h"
+#include "arm_compute/runtime/IAllocator.h"
#include "arm_compute/runtime/ITensorAllocator.h"
#include "arm_compute/runtime/MemoryGroup.h"
@@ -127,6 +128,13 @@ public:
*/
void set_associated_memory_group(IMemoryGroup *associated_memory_group);
+ /** Sets global allocator that will be used by all CLTensor objects
+ *
+ *
+ * @param[in] allocator Allocator to be used as a global allocator
+ */
+ static void set_global_allocator(IAllocator *allocator);
+
protected:
/** Call map() on the OpenCL buffer.
*
diff --git a/src/runtime/CL/CLTensorAllocator.cpp b/src/runtime/CL/CLTensorAllocator.cpp
index fc789fa4b9..c82e9dfc67 100644
--- a/src/runtime/CL/CLTensorAllocator.cpp
+++ b/src/runtime/CL/CLTensorAllocator.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -31,9 +31,11 @@
namespace arm_compute
{
const cl::Buffer CLTensorAllocator::_empty_buffer = cl::Buffer();
-
namespace
{
+/** Global user-defined allocator that can be used for all internal allocations of a CLTensor */
+static IAllocator *static_global_cl_allocator = nullptr;
+
/** Helper function used to allocate the backing memory of a tensor
*
* @param[in] context OpenCL context to use
@@ -130,7 +132,11 @@ void CLTensorAllocator::allocate()
if(_associated_memory_group == nullptr)
{
// Perform memory allocation
- if(_ctx == nullptr)
+ if(static_global_cl_allocator != nullptr)
+ {
+ _memory.set_owned_region(static_global_cl_allocator->make_region(info().total_size(), 0));
+ }
+ else if(_ctx == nullptr)
{
auto legacy_ctx = CLCoreRuntimeContext(nullptr, CLScheduler::get().context(), CLScheduler::get().queue());
_memory.set_owned_region(allocate_region(&legacy_ctx, info().total_size(), 0));
@@ -142,6 +148,7 @@ void CLTensorAllocator::allocate()
}
else
{
+ // Finalize memory management instead
_associated_memory_group->finalize_memory(_owner, _memory, info().total_size(), alignment());
}
@@ -194,6 +201,11 @@ void CLTensorAllocator::set_associated_memory_group(IMemoryGroup *associated_mem
_associated_memory_group = associated_memory_group;
}
+void CLTensorAllocator::set_global_allocator(IAllocator *allocator)
+{
+ static_global_cl_allocator = allocator;
+}
+
uint8_t *CLTensorAllocator::lock()
{
if(_ctx)
diff --git a/tests/validation/CL/UNIT/TensorAllocator.cpp b/tests/validation/CL/UNIT/TensorAllocator.cpp
index 3ccdd99fe3..4ebd521bf0 100644
--- a/tests/validation/CL/UNIT/TensorAllocator.cpp
+++ b/tests/validation/CL/UNIT/TensorAllocator.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,9 +24,14 @@
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
#include "arm_compute/core/utils/misc/MMappedFile.h"
+#include "arm_compute/runtime/BlobLifetimeManager.h"
+#include "arm_compute/runtime/CL/CLBufferAllocator.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
+#include "arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h"
#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/MemoryManagerOnDemand.h"
+#include "arm_compute/runtime/PoolManager.h"
#include "tests/CL/CLAccessor.h"
#include "tests/Globals.h"
#include "tests/framework/Asserts.h"
@@ -60,12 +65,108 @@ cl_mem import_malloc_memory_helper(void *ptr, size_t size)
return buf;
}
+
+class DummyAllocator final : public IAllocator
+{
+public:
+ DummyAllocator() = default;
+
+ void *allocate(size_t size, size_t alignment) override
+ {
+ ++_n_calls;
+ return _backend_allocator.allocate(size, alignment);
+ }
+ void free(void *ptr) override
+ {
+ return _backend_allocator.free(ptr);
+ }
+ std::unique_ptr<IMemoryRegion> make_region(size_t size, size_t alignment) override
+ {
+ // Needs to be implemented as is the one that is used internally by the CLTensorAllocator
+ ++_n_calls;
+ return std::move(_backend_allocator.make_region(size, alignment));
+ }
+ int get_n_calls() const
+ {
+ return _n_calls;
+ }
+
+private:
+ int _n_calls{};
+ CLBufferAllocator _backend_allocator{};
+};
+
+void run_conv2d(std::shared_ptr<IMemoryManager> mm, IAllocator &mm_allocator)
+{
+ // Create tensors
+ CLTensor src, weights, bias, dst;
+ src.allocator()->init(TensorInfo(TensorShape(16U, 32U, 32U, 2U), 1, DataType::F32, DataLayout::NHWC));
+ weights.allocator()->init(TensorInfo(TensorShape(16U, 3U, 3U, 32U), 1, DataType::F32, DataLayout::NHWC));
+ bias.allocator()->init(TensorInfo(TensorShape(32U), 1, DataType::F32, DataLayout::NHWC));
+ dst.allocator()->init(TensorInfo(TensorShape(32U, 32U, 32U, 2U), 1, DataType::F32, DataLayout::NHWC));
+
+ // Create and configure function
+ CLGEMMConvolutionLayer conv(mm);
+ conv.configure(&src, &weights, &bias, &dst, PadStrideInfo(1U, 1U, 1U, 1U));
+
+ // Allocate tensors
+ src.allocator()->allocate();
+ weights.allocator()->allocate();
+ bias.allocator()->allocate();
+ dst.allocator()->allocate();
+
+ // Finalize memory manager
+ if(mm != nullptr)
+ {
+ mm->populate(mm_allocator, 1 /* num_pools */);
+ ARM_COMPUTE_EXPECT(mm->lifetime_manager()->are_all_finalized(), framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(mm->pool_manager()->num_pools() == 1, framework::LogLevel::ERRORS);
+ }
+
+ conv.run();
+}
} // namespace
TEST_SUITE(CL)
TEST_SUITE(UNIT)
TEST_SUITE(TensorAllocator)
+/* Validate that an external global allocator can be used for all internal allocations */
+TEST_CASE(ExternalGlobalAllocator, framework::DatasetMode::ALL)
+{
+ DummyAllocator global_tensor_alloc;
+ CLTensorAllocator::set_global_allocator(&global_tensor_alloc);
+
+ // Run a convolution
+ run_conv2d(nullptr /* mm */, global_tensor_alloc);
+
+ // Check that allocator has been called multiple times > 4
+ ARM_COMPUTE_EXPECT(global_tensor_alloc.get_n_calls() > 4, framework::LogLevel::ERRORS);
+
+ // Nullify global allocator
+ CLTensorAllocator::set_global_allocator(nullptr);
+}
+
+/* Validate that an external global allocator can be used for the pool manager */
+TEST_CASE(ExternalGlobalAllocatorMemoryPool, framework::DatasetMode::ALL)
+{
+ auto lifetime_mgr = std::make_shared<BlobLifetimeManager>();
+ auto pool_mgr = std::make_shared<PoolManager>();
+ auto mm = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr, pool_mgr);
+
+ DummyAllocator global_tensor_alloc;
+ CLTensorAllocator::set_global_allocator(&global_tensor_alloc);
+
+ // Run a convolution
+ run_conv2d(mm, global_tensor_alloc);
+
+ // Check that allocator has been called multiple times > 4
+ ARM_COMPUTE_EXPECT(global_tensor_alloc.get_n_calls() > 4, framework::LogLevel::ERRORS);
+
+ // Nullify global allocator
+ CLTensorAllocator::set_global_allocator(nullptr);
+}
+
/** Validates import memory interface when importing cl buffer objects */
TEST_CASE(ImportMemoryBuffer, framework::DatasetMode::ALL)
{