From 4d0351cf322df51baa5a445f637008992aa37809 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Wed, 3 Apr 2019 15:11:16 +0100 Subject: COMPMID-2057: Implement and test import memory interfaces. Change-Id: I1559bea47ae6403177d248e2f7be47d5f1a6513f Signed-off-by: Georgios Pinitas Reviewed-on: https://review.mlplatform.org/c/956 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Michalis Spyrou --- arm_compute/core/CL/OpenCL.h | 5 +- arm_compute/runtime/CL/CLTensorAllocator.h | 14 ++- arm_compute/runtime/TensorAllocator.h | 13 ++- src/core/CL/OpenCL.cpp | 29 +++++- src/runtime/CL/CLTensorAllocator.cpp | 6 +- src/runtime/TensorAllocator.cpp | 6 +- tests/validation/CL/UNIT/TensorAllocator.cpp | 121 ++++++++++++++++++++++--- tests/validation/NEON/UNIT/TensorAllocator.cpp | 77 ++++++++++++++-- 8 files changed, 231 insertions(+), 40 deletions(-) diff --git a/arm_compute/core/CL/OpenCL.h b/arm_compute/core/CL/OpenCL.h index 65cbb3282b..fc7083d276 100644 --- a/arm_compute/core/CL/OpenCL.h +++ b/arm_compute/core/CL/OpenCL.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -131,6 +131,9 @@ public: DECLARE_FUNCTION_PTR(clEnqueueMarker); DECLARE_FUNCTION_PTR(clWaitForEvents); + // Third-party extensions + DECLARE_FUNCTION_PTR(clImportMemoryARM); + #undef DECLARE_FUNCTION_PTR private: diff --git a/arm_compute/runtime/CL/CLTensorAllocator.h b/arm_compute/runtime/CL/CLTensorAllocator.h index de5f482d05..302bd6d52a 100644 --- a/arm_compute/runtime/CL/CLTensorAllocator.h +++ b/arm_compute/runtime/CL/CLTensorAllocator.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -103,13 +103,17 @@ public: void free() override; /** Import an existing memory as a tensor's backing memory * - * @warning ownership of memory is not transferred + * @warning memory should have been created under the same context that ACL uses. + * @warning memory is expected to be aligned with the device requirements. + * @warning tensor shouldn't be memory managed. + * @warning ownership of memory is not transferred. + * @note buffer size will be checked to be compliant with total_size reported by ITensorInfo. * - * @param[in] buffer Buffer to import + * @param[in] buffer Buffer to be used as backing memory * - * @return error status + * @return An error status */ - arm_compute::Status import_memory(cl::Buffer buffer); + Status import_memory(cl::Buffer buffer); /** Associates the tensor with a memory group * * @param[in] associated_memory_group Memory group to associate the tensor with diff --git a/arm_compute/runtime/TensorAllocator.h b/arm_compute/runtime/TensorAllocator.h index ba9e5163ab..f296bc2e3d 100644 --- a/arm_compute/runtime/TensorAllocator.h +++ b/arm_compute/runtime/TensorAllocator.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -95,11 +95,16 @@ public: void free() override; /** Import an existing memory as a tensor's backing memory * - * @warning ownership of memory is not transferred + * @warning size is expected to be compliant with total_size reported by ITensorInfo. + * @warning ownership of memory is not transferred. + * @warning tensor shouldn't be memory managed. + * @note buffer alignment will be checked to be compliant with alignment reported by ITensorInfo. * - * @return error status + * @param[in] memory Raw memory pointer to be used as backing memory + * + * @return An error status */ - arm_compute::Status import_memory(void *memory, size_t size); + Status import_memory(void *memory); /** Associates the tensor with a memory group * * @param[in] associated_memory_group Memory group to associate the tensor with diff --git a/src/core/CL/OpenCL.cpp b/src/core/CL/OpenCL.cpp index 6725f36a5d..ef03a5a302 100644 --- a/src/core/CL/OpenCL.cpp +++ b/src/core/CL/OpenCL.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -120,6 +120,9 @@ bool CLSymbols::load(const std::string &library) LOAD_FUNCTION_PTR(clEnqueueMarker, handle); LOAD_FUNCTION_PTR(clWaitForEvents, handle); + // Third-party extensions + LOAD_FUNCTION_PTR(clImportMemoryARM, handle); + #undef LOAD_FUNCTION_PTR //Don't call dlclose(handle) or all the symbols will be unloaded ! @@ -919,3 +922,27 @@ clGetEventProfilingInfo(cl_event event, return CL_OUT_OF_RESOURCES; } } + +cl_mem +clImportMemoryARM(cl_context context, + cl_mem_flags flags, + const cl_import_properties_arm *properties, + void *memory, + size_t size, + cl_int *errcode_ret) +{ + arm_compute::CLSymbols::get().load_default(); + auto func = arm_compute::CLSymbols::get().clImportMemoryARM_ptr; + if(func != nullptr) + { + return func(context, flags, properties, memory, size, errcode_ret); + } + else + { + if(errcode_ret != nullptr) + { + *errcode_ret = CL_OUT_OF_RESOURCES; + } + return nullptr; + } +} \ No newline at end of file diff --git a/src/runtime/CL/CLTensorAllocator.cpp b/src/runtime/CL/CLTensorAllocator.cpp index 0307498335..2ce64551ae 100644 --- a/src/runtime/CL/CLTensorAllocator.cpp +++ b/src/runtime/CL/CLTensorAllocator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -101,10 +101,10 @@ void CLTensorAllocator::free() info().set_is_resizable(true); } -arm_compute::Status CLTensorAllocator::import_memory(cl::Buffer buffer) +Status CLTensorAllocator::import_memory(cl::Buffer buffer) { ARM_COMPUTE_RETURN_ERROR_ON(buffer.get() == nullptr); - ARM_COMPUTE_RETURN_ERROR_ON(buffer.getInfo() == 0); + ARM_COMPUTE_RETURN_ERROR_ON(buffer.getInfo() < info().total_size()); ARM_COMPUTE_RETURN_ERROR_ON(buffer.getInfo().get() != CLScheduler::get().context().get()); ARM_COMPUTE_RETURN_ERROR_ON(_associated_memory_group != nullptr); diff --git a/src/runtime/TensorAllocator.cpp b/src/runtime/TensorAllocator.cpp index 38edb8ba03..0612d751f0 100644 --- a/src/runtime/TensorAllocator.cpp +++ b/src/runtime/TensorAllocator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -149,11 +149,11 @@ void TensorAllocator::free() info().set_is_resizable(true); } -arm_compute::Status TensorAllocator::import_memory(void *memory, size_t size) +Status TensorAllocator::import_memory(void *memory) { ARM_COMPUTE_RETURN_ERROR_ON(memory == nullptr); - ARM_COMPUTE_RETURN_ERROR_ON(size == 0); ARM_COMPUTE_RETURN_ERROR_ON(_associated_memory_group != nullptr); + ARM_COMPUTE_RETURN_ERROR_ON(alignment() != 0 && !arm_compute::utility::check_aligned(memory, alignment())); _memory.set_owned_region(support::cpp14::make_unique(memory, info().total_size())); info().set_is_resizable(false); diff --git a/tests/validation/CL/UNIT/TensorAllocator.cpp b/tests/validation/CL/UNIT/TensorAllocator.cpp index 849eee84d0..abe06c544b 100644 --- a/tests/validation/CL/UNIT/TensorAllocator.cpp +++ b/tests/validation/CL/UNIT/TensorAllocator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -25,10 +25,16 @@ #include "arm_compute/runtime/CL/CLMemoryGroup.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/runtime/CL/functions/CLActivationLayer.h" +#include "tests/CL/CLAccessor.h" +#include "tests/Globals.h" #include "tests/framework/Asserts.h" #include "tests/framework/Macros.h" +#include "tests/validation/Validation.h" +#include "tests/validation/reference/ActivationLayer.h" #include +#include namespace arm_compute { @@ -36,14 +42,33 @@ namespace test { namespace validation { +namespace +{ +cl_mem import_malloc_memory_helper(void *ptr, size_t size) +{ + const cl_import_properties_arm import_properties[] = + { + CL_IMPORT_TYPE_ARM, + CL_IMPORT_TYPE_HOST_ARM, + 0 + }; + + cl_int err = CL_SUCCESS; + cl_mem buf = clImportMemoryARM(CLKernelLibrary::get().context().get(), CL_MEM_READ_WRITE, import_properties, ptr, size, &err); + ARM_COMPUTE_EXPECT(err == CL_SUCCESS, framework::LogLevel::ERRORS); + + return buf; +} +} // namespace + TEST_SUITE(CL) TEST_SUITE(UNIT) TEST_SUITE(TensorAllocator) -TEST_CASE(ImportMemory, framework::DatasetMode::ALL) +TEST_CASE(ImportMemoryBuffer, framework::DatasetMode::ALL) { // Init tensor info - TensorInfo info(TensorShape(24U, 16U, 3U), 1, DataType::F32); + const TensorInfo info(TensorShape(24U, 16U, 3U), 1, DataType::F32); // Allocate memory buffer const size_t total_size = info.total_size(); @@ -62,20 +87,88 @@ TEST_CASE(ImportMemory, framework::DatasetMode::ALL) ARM_COMPUTE_EXPECT(!bool(t2.allocator()->import_memory(buf)), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(t2.info()->is_resizable(), framework::LogLevel::ERRORS); - // Positive case : Set raw pointer - CLTensor t3; - t3.allocator()->init(info); - ARM_COMPUTE_EXPECT(bool(t3.allocator()->import_memory(buf)), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!t3.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(t3.cl_buffer().get() == buf.get(), framework::LogLevel::ERRORS); - t3.allocator()->free(); + // Negative case : Invalid buffer size + CLTensor t3; + const TensorInfo info_neg(TensorShape(32U, 16U, 3U), 1, DataType::F32); + t3.allocator()->init(info_neg); + ARM_COMPUTE_EXPECT(!bool(t3.allocator()->import_memory(buf)), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(t3.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(t3.cl_buffer().get() != buf.get(), framework::LogLevel::ERRORS); + + // Positive case : Set raw pointer + CLTensor t4; + t4.allocator()->init(info); + ARM_COMPUTE_EXPECT(bool(t4.allocator()->import_memory(buf)), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!t4.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(t4.cl_buffer().get() == buf.get(), framework::LogLevel::ERRORS); + t4.allocator()->free(); + ARM_COMPUTE_EXPECT(t4.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(t4.cl_buffer().get() != buf.get(), framework::LogLevel::ERRORS); +} + +TEST_CASE(ImportMemoryMalloc, framework::DatasetMode::ALL) +{ + // Check if import extension is supported + if(!device_supports_extension(CLKernelLibrary::get().get_device(), "cl_arm_import_memory")) + { + return; + } + else + { + const ActivationLayerInfo act_info(ActivationLayerInfo::ActivationFunction::RELU); + const TensorShape shape = TensorShape(24U, 16U, 3U); + const DataType data_type = DataType::F32; + + // Create tensor + const TensorInfo info(shape, 1, data_type); + CLTensor tensor; + tensor.allocator()->init(info); + + // Create and configure activation function + CLActivationLayer act_func; + act_func.configure(&tensor, nullptr, act_info); + + // Allocate and import tensor + const size_t total_size_in_elems = tensor.info()->tensor_shape().total_size(); + const size_t total_size_in_bytes = tensor.info()->total_size(); + const size_t alignment = CLKernelLibrary::get().get_device().getInfo(); + size_t space = total_size_in_bytes + alignment; + auto raw_data = support::cpp14::make_unique(space); + + void *aligned_ptr = raw_data.get(); + support::cpp11::align(alignment, total_size_in_bytes, aligned_ptr, space); + + cl::Buffer wrapped_buffer(import_malloc_memory_helper(aligned_ptr, total_size_in_bytes)); + ARM_COMPUTE_EXPECT(bool(tensor.allocator()->import_memory(wrapped_buffer)), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!tensor.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Fill tensor + std::uniform_real_distribution distribution(-5.f, 5.f); + std::mt19937 gen(library->seed()); + auto *typed_ptr = reinterpret_cast(aligned_ptr); + for(unsigned int i = 0; i < total_size_in_elems; ++i) + { + typed_ptr[i] = distribution(gen); + } + + // Execute function and sync + act_func.run(); + CLScheduler::get().sync(); + + // Validate result by checking that the input has no negative values + for(unsigned int i = 0; i < total_size_in_elems; ++i) + { + ARM_COMPUTE_EXPECT(typed_ptr[i] >= 0, framework::LogLevel::ERRORS); + } + + // Release resources + tensor.allocator()->free(); + ARM_COMPUTE_EXPECT(tensor.info()->is_resizable(), framework::LogLevel::ERRORS); + } } -TEST_SUITE_END() -TEST_SUITE_END() -TEST_SUITE_END() +TEST_SUITE_END() // TensorAllocator +TEST_SUITE_END() // UNIT +TEST_SUITE_END() // CL } // namespace validation } // namespace test } // namespace arm_compute diff --git a/tests/validation/NEON/UNIT/TensorAllocator.cpp b/tests/validation/NEON/UNIT/TensorAllocator.cpp index 384a00855b..7ba83c11b3 100644 --- a/tests/validation/NEON/UNIT/TensorAllocator.cpp +++ b/tests/validation/NEON/UNIT/TensorAllocator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -26,12 +26,19 @@ #include "arm_compute/core/utils/misc/Utility.h" #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/MemoryRegion.h" +#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" #include "support/ToolchainSupport.h" +#include "tests/Globals.h" #include "tests/Utils.h" #include "tests/framework/Asserts.h" #include "tests/framework/Macros.h" +#include "tests/validation/Validation.h" +#include "tests/validation/reference/ActivationLayer.h" + +#include +#include namespace arm_compute { @@ -52,29 +59,30 @@ TEST_CASE(ImportMemory, framework::DatasetMode::ALL) const size_t total_size = info.total_size(); auto data = support::cpp14::make_unique(total_size); - // Negative case : Import pointer with zero size + // Negative case : Import nullptr Tensor t1; t1.allocator()->init(info); - ARM_COMPUTE_EXPECT(!bool(t1.allocator()->import_memory(data.get(), 0)), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!bool(t1.allocator()->import_memory(nullptr)), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(t1.info()->is_resizable(), framework::LogLevel::ERRORS); - // Negative case : Import nullptr - Tensor t2; - t2.allocator()->init(info); - ARM_COMPUTE_EXPECT(!bool(t2.allocator()->import_memory(nullptr, total_size)), framework::LogLevel::ERRORS); + // Negative case : Import misaligned pointer + Tensor t2; + const size_t required_alignment = 339; + t2.allocator()->init(info, required_alignment); + ARM_COMPUTE_EXPECT(!bool(t2.allocator()->import_memory(data.get())), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(t2.info()->is_resizable(), framework::LogLevel::ERRORS); // Negative case : Import memory to a tensor that is memory managed Tensor t3; MemoryGroup mg; t3.allocator()->set_associated_memory_group(&mg); - ARM_COMPUTE_EXPECT(!bool(t3.allocator()->import_memory(data.get(), total_size)), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!bool(t3.allocator()->import_memory(data.get())), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(t3.info()->is_resizable(), framework::LogLevel::ERRORS); // Positive case : Set raw pointer Tensor t4; t4.allocator()->init(info); - ARM_COMPUTE_EXPECT(bool(t4.allocator()->import_memory(data.get(), total_size)), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(bool(t4.allocator()->import_memory(data.get())), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(!t4.info()->is_resizable(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(t4.buffer() == reinterpret_cast(data.get()), framework::LogLevel::ERRORS); t4.allocator()->free(); @@ -82,6 +90,57 @@ TEST_CASE(ImportMemory, framework::DatasetMode::ALL) ARM_COMPUTE_EXPECT(t4.buffer() == nullptr, framework::LogLevel::ERRORS); } +TEST_CASE(ImportMemoryMalloc, framework::DatasetMode::ALL) +{ + const ActivationLayerInfo act_info(ActivationLayerInfo::ActivationFunction::RELU); + const TensorShape shape = TensorShape(24U, 16U, 3U); + const DataType data_type = DataType::F32; + + // Create tensor + const TensorInfo info(shape, 1, data_type); + const size_t required_alignment = 64; + Tensor tensor; + tensor.allocator()->init(info, required_alignment); + + // Create and configure activation function + NEActivationLayer act_func; + act_func.configure(&tensor, nullptr, act_info); + + // Allocate and import tensor + const size_t total_size_in_elems = tensor.info()->tensor_shape().total_size(); + const size_t total_size_in_bytes = tensor.info()->total_size(); + size_t space = total_size_in_bytes + required_alignment; + auto raw_data = support::cpp14::make_unique(space); + + void *aligned_ptr = raw_data.get(); + support::cpp11::align(required_alignment, total_size_in_bytes, aligned_ptr, space); + + ARM_COMPUTE_EXPECT(bool(tensor.allocator()->import_memory(aligned_ptr)), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!tensor.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Fill tensor + std::uniform_real_distribution distribution(-5.f, 5.f); + std::mt19937 gen(library->seed()); + auto *typed_ptr = reinterpret_cast(aligned_ptr); + for(unsigned int i = 0; i < total_size_in_elems; ++i) + { + typed_ptr[i] = distribution(gen); + } + + // Execute function and sync + act_func.run(); + + // Validate result by checking that the input has no negative values + for(unsigned int i = 0; i < total_size_in_elems; ++i) + { + ARM_COMPUTE_EXPECT(typed_ptr[i] >= 0, framework::LogLevel::ERRORS); + } + + // Release resources + tensor.allocator()->free(); + ARM_COMPUTE_EXPECT(tensor.info()->is_resizable(), framework::LogLevel::ERRORS); +} + TEST_CASE(AlignedAlloc, framework::DatasetMode::ALL) { // Init tensor info -- cgit v1.2.1