From db8485ac24135f17e9882c76196924435abc064f Mon Sep 17 00:00:00 2001 From: Pablo Tello Date: Tue, 24 Sep 2019 11:03:47 +0100 Subject: COMPMID-2205: CL runtime context. CL Interfaces implemented. Concrete classes implemented. One test (ActivationLayer) ported to the new interface. Change-Id: I283808bec36ccfc2f13fe048c45cbbee698ce525 Signed-off-by: Pablo Tello Reviewed-on: https://review.mlplatform.org/c/1998 Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas Comments-Addressed: Arm Jenkins --- src/core/CL/CLCoreRuntimeContext.cpp | 52 ++++++++++++++++++ src/core/CL/CLHelpers.cpp | 16 ++++++ src/core/CL/CLKernelLibrary.cpp | 65 +++++++++++++++++++++++ src/core/CL/OpenCL.cpp | 5 +- src/core/CL/kernels/CLActivationLayerKernel.cpp | 8 +-- src/graph/backends/CL/CLDeviceBackend.cpp | 7 +-- src/runtime/CL/CLBufferAllocator.cpp | 27 +++++++--- src/runtime/CL/CLHelpers.cpp | 16 ++++++ src/runtime/CL/CLMemoryRegion.cpp | 39 ++++++++------ src/runtime/CL/CLRuntimeContext.cpp | 67 +++++++++++++++++++++++ src/runtime/CL/CLScheduler.cpp | 70 +++++++++++++++++++++++-- src/runtime/CL/CLTensor.cpp | 16 ++++-- src/runtime/CL/CLTensorAllocator.cpp | 54 +++++++++++++++---- src/runtime/CL/ICLSimpleFunction.cpp | 13 ++--- src/runtime/CL/functions/CLActivationLayer.cpp | 9 ++-- src/runtime/GLES_COMPUTE/GCTensor.cpp | 4 +- src/runtime/Tensor.cpp | 4 +- 17 files changed, 405 insertions(+), 67 deletions(-) create mode 100644 src/core/CL/CLCoreRuntimeContext.cpp create mode 100644 src/runtime/CL/CLRuntimeContext.cpp (limited to 'src') diff --git a/src/core/CL/CLCoreRuntimeContext.cpp b/src/core/CL/CLCoreRuntimeContext.cpp new file mode 100644 index 0000000000..f9efad2c0d --- /dev/null +++ b/src/core/CL/CLCoreRuntimeContext.cpp @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/CL/CLCoreRuntimeContext.h" + +namespace arm_compute +{ +cl::Context CLCoreRuntimeContext::context() +{ + return _ctx; +} + +cl::CommandQueue CLCoreRuntimeContext::queue() +{ + return _queue; +} + +CLCoreRuntimeContext::CLCoreRuntimeContext() + : _kernel_lib(nullptr), _ctx(), _queue() +{ +} + +CLCoreRuntimeContext::CLCoreRuntimeContext(CLKernelLibrary *kernel_lib, cl::Context ctx, cl::CommandQueue queue) + : _kernel_lib(kernel_lib), _ctx(ctx), _queue(queue) +{ +} + +CLKernelLibrary *CLCoreRuntimeContext::kernel_library() const +{ + return _kernel_lib; +} +} // namespace arm_compute diff --git a/src/core/CL/CLHelpers.cpp b/src/core/CL/CLHelpers.cpp index d051810090..a3c73677c7 100644 --- a/src/core/CL/CLHelpers.cpp +++ b/src/core/CL/CLHelpers.cpp @@ -22,6 +22,8 @@ * SOFTWARE. */ #include "arm_compute/core/CL/CLHelpers.h" +#include "arm_compute/core/CL/CLCoreRuntimeContext.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/CLTypes.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Log.h" @@ -283,4 +285,18 @@ bool preferred_dummy_work_items_support(const cl::Device &device) // TODO (COMPMID-2044) return true; } + +cl::Kernel create_opencl_kernel(CLCoreRuntimeContext *ctx, const std::string &kernel_name, const CLBuildOptions &build_opts) +{ + if(ctx && ctx->kernel_library()) + { + //New api going through the core context + return static_cast(ctx->kernel_library()->create_kernel(kernel_name, build_opts.options())); + } + else + { + //Legacy code through the singleton + return static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + } +} } // namespace arm_compute diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp index 7b7263fca7..c27f886129 100644 --- a/src/core/CL/CLKernelLibrary.cpp +++ b/src/core/CL/CLKernelLibrary.cpp @@ -1144,6 +1144,49 @@ Kernel CLKernelLibrary::create_kernel(const std::string &kernel_name, const Stri return Kernel(kernel_name, cl_program); } +void CLKernelLibrary::init(std::string kernel_path, cl::Context context, cl::Device device) +{ + _kernel_path = std::move(kernel_path); + _context = std::move(context); + _device = std::move(device); +} + +void CLKernelLibrary::set_kernel_path(const std::string &kernel_path) +{ + _kernel_path = kernel_path; +} + +cl::Context &CLKernelLibrary::context() +{ + return _context; +} + +cl::Device &CLKernelLibrary::get_device() +{ + return _device; +} + +void CLKernelLibrary::set_device(cl::Device device) +{ + _device = std::move(device); +} + +std::string CLKernelLibrary::get_kernel_path() +{ + return _kernel_path; +} + +void CLKernelLibrary::clear_programs_cache() +{ + _programs_map.clear(); + _built_programs_map.clear(); +} + +const std::map &CLKernelLibrary::get_built_programs() const +{ + return _built_programs_map; +} + void CLKernelLibrary::add_built_program(const std::string &built_program_name, const cl::Program &program) { _built_programs_map.emplace(built_program_name, program); @@ -1205,6 +1248,28 @@ const Program &CLKernelLibrary::load_program(const std::string &program_name) co return new_program.first->second; } +void CLKernelLibrary::set_context(cl::Context context) +{ + _context = std::move(context); + if(_context.get() == nullptr) + { + _device = cl::Device(); + } + else + { + const auto cl_devices = _context.getInfo(); + + if(cl_devices.empty()) + { + _device = cl::Device(); + } + else + { + _device = cl_devices[0]; + } + } +} + std::string CLKernelLibrary::stringify_set(const StringSet &s) const { std::string concat_set; diff --git a/src/core/CL/OpenCL.cpp b/src/core/CL/OpenCL.cpp index 1ce1b526d7..74c5b041d7 100644 --- a/src/core/CL/OpenCL.cpp +++ b/src/core/CL/OpenCL.cpp @@ -27,6 +27,8 @@ #include "arm_compute/core/CL/OpenCL.h" #pragma GCC diagnostic pop +#include "arm_compute/core/Error.h" + #include #include @@ -54,6 +56,7 @@ bool CLSymbols::load_default() { if(load(lib)) { + ARM_COMPUTE_ERROR_ON_MSG(this->clBuildProgram_ptr == nullptr, "Failed to load OpenCL symbols from shared library"); return true; } } @@ -948,4 +951,4 @@ clImportMemoryARM(cl_context context, } return nullptr; } -} \ No newline at end of file +} diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp index 97a0ff6c6c..5062fd1801 100644 --- a/src/core/CL/kernels/CLActivationLayerKernel.cpp +++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp @@ -23,8 +23,8 @@ */ #include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h" +#include "arm_compute/core/CL/CLCoreRuntimeContext.h" #include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" @@ -111,8 +111,8 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen } } // namespace -CLActivationLayerKernel::CLActivationLayerKernel() - : _input(nullptr), _output(nullptr), _run_in_place(false) +CLActivationLayerKernel::CLActivationLayerKernel(CLCoreRuntimeContext *ctx) + : _input(nullptr), _output(nullptr), _run_in_place(false), _ctx(ctx) { } @@ -205,8 +205,8 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act { kernel_name += perform_activation_in_float ? std::string("_quant_f32") : std::string("_quant"); } - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + _kernel = create_opencl_kernel(_ctx, kernel_name, build_opts); // Make sure _kernel is initialized before calling the parent's configure _input = input; _output = output; diff --git a/src/graph/backends/CL/CLDeviceBackend.cpp b/src/graph/backends/CL/CLDeviceBackend.cpp index ea3b6b801a..58c666c3cc 100644 --- a/src/graph/backends/CL/CLDeviceBackend.cpp +++ b/src/graph/backends/CL/CLDeviceBackend.cpp @@ -34,6 +34,7 @@ #include "arm_compute/graph/backends/CL/CLSubTensorHandle.h" #include "arm_compute/graph/backends/CL/CLTensorHandle.h" +#include "arm_compute/core/CL/CLCoreRuntimeContext.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/runtime/BlobLifetimeManager.h" #include "arm_compute/runtime/CL/CLBufferAllocator.h" @@ -64,7 +65,7 @@ bool file_exists(const std::string &filename) static detail::BackendRegistrar CLDeviceBackend_registrar(Target::CL); CLDeviceBackend::CLDeviceBackend() - : _context_count(0), _tuner(), _allocator(nullptr), _tuner_file() + : _context_count(0), _tuner(), _allocator(nullptr), _tuner_file(), _legacy_ctx() { } @@ -91,9 +92,9 @@ void CLDeviceBackend::initialize_backend() { // Setup Scheduler CLScheduler::get().default_init(&_tuner); - + _legacy_ctx = support::cpp14::make_unique(nullptr, CLScheduler::get().context(), CLScheduler::get().queue()); // Create allocator with new context - _allocator = support::cpp14::make_unique(); + _allocator = support::cpp14::make_unique(_legacy_ctx.get()); } void CLDeviceBackend::release_backend_context(GraphContext &ctx) diff --git a/src/runtime/CL/CLBufferAllocator.cpp b/src/runtime/CL/CLBufferAllocator.cpp index 84789e70d2..ed27320650 100644 --- a/src/runtime/CL/CLBufferAllocator.cpp +++ b/src/runtime/CL/CLBufferAllocator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -22,25 +22,35 @@ * SOFTWARE. */ #include "arm_compute/runtime/CL/CLBufferAllocator.h" -#include "arm_compute/runtime/CL/CLMemoryRegion.h" +#include "arm_compute/core/CL/CLCoreRuntimeContext.h" #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/Error.h" +#include "arm_compute/runtime/CL/CLMemoryRegion.h" +#include "arm_compute/runtime/CL/CLScheduler.h" #include "support/ToolchainSupport.h" #include -using namespace arm_compute; - -CLBufferAllocator::CLBufferAllocator(cl::Context context) - : _context(std::move(context)) +namespace arm_compute +{ +CLBufferAllocator::CLBufferAllocator(CLCoreRuntimeContext *ctx) + : _ctx(ctx) { } void *CLBufferAllocator::allocate(size_t size, size_t alignment) { ARM_COMPUTE_UNUSED(alignment); - cl_mem buf = clCreateBuffer(_context.get(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, size, nullptr, nullptr); + cl_mem buf; + if(_ctx == nullptr) + { + buf = clCreateBuffer(CLScheduler::get().context().get(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, size, nullptr, nullptr); + } + else + { + buf = clCreateBuffer(_ctx->context().get(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, size, nullptr, nullptr); + } return static_cast(buf); } @@ -53,5 +63,6 @@ void CLBufferAllocator::free(void *ptr) std::unique_ptr CLBufferAllocator::make_region(size_t size, size_t alignment) { ARM_COMPUTE_UNUSED(alignment); - return arm_compute::support::cpp14::make_unique(_context, CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, size); + return arm_compute::support::cpp14::make_unique(_ctx, CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, size); } +} // namespace arm_compute diff --git a/src/runtime/CL/CLHelpers.cpp b/src/runtime/CL/CLHelpers.cpp index edfc8ed2aa..c4c7ee2107 100644 --- a/src/runtime/CL/CLHelpers.cpp +++ b/src/runtime/CL/CLHelpers.cpp @@ -26,6 +26,7 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/Error.h" +#include "arm_compute/runtime/CL/CLRuntimeContext.h" namespace { @@ -103,4 +104,19 @@ create_opencl_context_and_device() ARM_COMPUTE_ERROR_ON_MSG(err != CL_SUCCESS, "Failed to create OpenCL context"); return std::make_tuple(cl_context, device, err); } + +void schedule_kernel_on_ctx(CLRuntimeContext *ctx, ICLKernel *kernel, bool flush) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(kernel); + if(ctx) + { + ARM_COMPUTE_ERROR_ON(ctx->gpu_scheduler() == nullptr); + ctx->gpu_scheduler()->enqueue(*kernel, flush); + } + else + { + CLScheduler::get().enqueue(*kernel, flush); + } +} + } // namespace arm_compute diff --git a/src/runtime/CL/CLMemoryRegion.cpp b/src/runtime/CL/CLMemoryRegion.cpp index 2976903c93..52906a893f 100644 --- a/src/runtime/CL/CLMemoryRegion.cpp +++ b/src/runtime/CL/CLMemoryRegion.cpp @@ -23,13 +23,18 @@ */ #include "arm_compute/runtime/CL/CLMemoryRegion.h" +#include "arm_compute/core/CL/CLCoreRuntimeContext.h" #include "arm_compute/core/Error.h" #include "arm_compute/runtime/CL/CLScheduler.h" namespace arm_compute { -ICLMemoryRegion::ICLMemoryRegion(cl::Context ctx, size_t size) - : IMemoryRegion(size), _ctx(std::move(ctx)), _mapping(nullptr), _mem() +ICLMemoryRegion::ICLMemoryRegion(CLCoreRuntimeContext *ctx, size_t size) + : IMemoryRegion(size), + _queue((ctx != nullptr) ? ctx->queue() : CLScheduler::get().queue()), + _ctx((ctx != nullptr) ? ctx->context() : CLScheduler::get().context()), + _mapping(nullptr), + _mem() { } @@ -54,17 +59,17 @@ std::unique_ptr ICLMemoryRegion::extract_subregion(size_t offset, return nullptr; } -CLBufferMemoryRegion::CLBufferMemoryRegion(cl::Context ctx, cl_mem_flags flags, size_t size) - : ICLMemoryRegion(std::move(ctx), size) +CLBufferMemoryRegion::CLBufferMemoryRegion(CLCoreRuntimeContext *ctx, cl_mem_flags flags, size_t size) + : ICLMemoryRegion(ctx, size) { if(_size != 0) { - _mem = cl::Buffer(_ctx, flags, _size); + _mem = cl::Buffer((ctx != nullptr) ? ctx->context() : CLScheduler::get().context(), flags, _size); } } -CLBufferMemoryRegion::CLBufferMemoryRegion(const cl::Buffer &buffer) - : ICLMemoryRegion(buffer.getInfo(), buffer.getInfo()) +CLBufferMemoryRegion::CLBufferMemoryRegion(const cl::Buffer &buffer, CLCoreRuntimeContext *ctx) + : ICLMemoryRegion(ctx, buffer.getInfo()) { _mem = buffer; } @@ -88,15 +93,15 @@ void CLBufferMemoryRegion::unmap(cl::CommandQueue &q) _mapping = nullptr; } -ICLSVMMemoryRegion::ICLSVMMemoryRegion(cl::Context ctx, cl_mem_flags flags, size_t size, size_t alignment) - : ICLMemoryRegion(std::move(ctx), size), _ptr(nullptr) +ICLSVMMemoryRegion::ICLSVMMemoryRegion(CLCoreRuntimeContext *ctx, cl_mem_flags flags, size_t size, size_t alignment) + : ICLMemoryRegion(ctx, size), _ptr(nullptr) { if(size != 0) { - _ptr = clSVMAlloc(_ctx.get(), flags, size, alignment); + _ptr = clSVMAlloc((ctx != nullptr) ? ctx->context().get() : CLScheduler::get().context().get(), flags, size, alignment); if(_ptr != nullptr) { - _mem = cl::Buffer(_ctx, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, _size, _ptr); + _mem = cl::Buffer((ctx != nullptr) ? ctx->context() : CLScheduler::get().context(), CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, _size, _ptr); } } } @@ -107,7 +112,7 @@ ICLSVMMemoryRegion::~ICLSVMMemoryRegion() { try { - clFinish(CLScheduler::get().queue().get()); + clFinish(_queue.get()); _mem = cl::Buffer(); clSVMFree(_ctx.get(), _ptr); } @@ -122,8 +127,8 @@ void *ICLSVMMemoryRegion::ptr() return _ptr; } -CLCoarseSVMMemoryRegion::CLCoarseSVMMemoryRegion(cl::Context ctx, cl_mem_flags flags, size_t size, size_t alignment) - : ICLSVMMemoryRegion(std::move(ctx), flags, size, alignment) +CLCoarseSVMMemoryRegion::CLCoarseSVMMemoryRegion(CLCoreRuntimeContext *ctx, cl_mem_flags flags, size_t size, size_t alignment) + : ICLSVMMemoryRegion(ctx, flags, size, alignment) { } @@ -142,8 +147,8 @@ void CLCoarseSVMMemoryRegion::unmap(cl::CommandQueue &q) _mapping = nullptr; } -CLFineSVMMemoryRegion::CLFineSVMMemoryRegion(cl::Context ctx, cl_mem_flags flags, size_t size, size_t alignment) - : ICLSVMMemoryRegion(std::move(ctx), flags, size, alignment) +CLFineSVMMemoryRegion::CLFineSVMMemoryRegion(CLCoreRuntimeContext *ctx, cl_mem_flags flags, size_t size, size_t alignment) + : ICLSVMMemoryRegion(ctx, flags, size, alignment) { } @@ -162,4 +167,4 @@ void CLFineSVMMemoryRegion::unmap(cl::CommandQueue &q) ARM_COMPUTE_UNUSED(q); _mapping = nullptr; } -} // namespace arm_compute \ No newline at end of file +} // namespace arm_compute diff --git a/src/runtime/CL/CLRuntimeContext.cpp b/src/runtime/CL/CLRuntimeContext.cpp new file mode 100644 index 0000000000..49e4c10c84 --- /dev/null +++ b/src/runtime/CL/CLRuntimeContext.cpp @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/CLRuntimeContext.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/CL/CLHelpers.h" +#include "arm_compute/runtime/CL/CLScheduler.h" + +namespace arm_compute +{ +CLRuntimeContext::CLRuntimeContext() + : _gpu_owned_scheduler(support::cpp14::make_unique()), _gpu_scheduler(_gpu_owned_scheduler.get()), _symbols(), _core_context() +{ + _symbols.load_default(); + auto ctx_dev_err = create_opencl_context_and_device(); + ARM_COMPUTE_ERROR_ON_MSG(std::get<2>(ctx_dev_err) != CL_SUCCESS, "Failed to create OpenCL context"); + auto ctx = std::get<0>(ctx_dev_err); + auto dev = std::get<1>(ctx_dev_err); + cl::CommandQueue queue = cl::CommandQueue(ctx, dev); + _gpu_owned_scheduler->init(ctx, queue, dev, &_tuner); + const std::string cl_kernels_folder("./cl_kernels"); + _kernel_lib.init(cl_kernels_folder, ctx, dev); + _core_context = CLCoreRuntimeContext(&_kernel_lib, _gpu_owned_scheduler->context(), _gpu_owned_scheduler->queue()); +} + +CLKernelLibrary &CLRuntimeContext::kernel_library() +{ + return _kernel_lib; +} + +CLCoreRuntimeContext *CLRuntimeContext::core_runtime_context() +{ + return &_core_context; +} + +void CLRuntimeContext::set_gpu_scheduler(CLScheduler *scheduler) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(scheduler); + _gpu_scheduler = scheduler; +} + +CLScheduler *CLRuntimeContext::gpu_scheduler() +{ + return _gpu_scheduler; +} + +} // namespace arm_compute diff --git a/src/runtime/CL/CLScheduler.cpp b/src/runtime/CL/CLScheduler.cpp index 701ffe0ab1..e78eaa482f 100644 --- a/src/runtime/CL/CLScheduler.cpp +++ b/src/runtime/CL/CLScheduler.cpp @@ -23,13 +23,71 @@ */ #include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/CL/CLHelpers.h" - +#include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/runtime/CL/CLHelpers.h" #include "arm_compute/runtime/CL/CLTuner.h" #include "arm_compute/runtime/CL/tuners/Tuners.h" -using namespace arm_compute; +namespace arm_compute +{ +cl::Context &CLScheduler::context() +{ + ARM_COMPUTE_ERROR_ON(!_is_initialised); + _context = CLKernelLibrary::get().context(); + return _context; +} + +cl::CommandQueue &CLScheduler::queue() +{ + ARM_COMPUTE_ERROR_ON(!_is_initialised); + return _queue; +} + +GPUTarget CLScheduler::target() const +{ + return _target; +} + +void CLScheduler::set_queue(cl::CommandQueue queue) +{ + _queue = std::move(queue); +} + +void CLScheduler::set_target(GPUTarget target) +{ + _target = target; +} + +void CLScheduler::set_tuner(ICLTuner *tuner) +{ + _cl_tuner = tuner; +} + +void CLScheduler::sync() +{ + _queue.finish(); +} + +cl::Event CLScheduler::enqueue_sync_event() +{ + cl::Event event; + _queue.enqueueMarker(&event); + return event; +} + +void CLScheduler::tune_kernel_static(ICLKernel &kernel) +{ + if(_cl_tuner != nullptr) + { + _cl_tuner->tune_kernel_static(kernel); + } +} + +bool CLScheduler::is_initialised() const +{ + return _is_initialised; +} std::once_flag CLScheduler::_initialize_symbols; @@ -49,8 +107,9 @@ void CLScheduler::default_init_with_context(cl::Device &device, cl::Context &ctx { if(!_is_initialised) { - cl::CommandQueue queue = cl::CommandQueue(ctx, device); - CLKernelLibrary::get().init("./cl_kernels/", ctx, device); + const std::string cl_kernels_folder("./cl_kernels/"); + cl::CommandQueue queue = cl::CommandQueue(ctx, device); + CLKernelLibrary::get().init(cl_kernels_folder, ctx, device); init(ctx, queue, device, cl_tuner); _cl_default_static_tuner = tuners::TunerFactory::create_tuner(_target); _cl_tuner = (cl_tuner == nullptr) ? _cl_default_static_tuner.get() : cl_tuner; @@ -113,3 +172,4 @@ void CLScheduler::enqueue(ICLKernel &kernel, bool flush) _queue.flush(); } } +} // namespace arm_compute diff --git a/src/runtime/CL/CLTensor.cpp b/src/runtime/CL/CLTensor.cpp index 9bbf926b58..a6d0cf77ca 100644 --- a/src/runtime/CL/CLTensor.cpp +++ b/src/runtime/CL/CLTensor.cpp @@ -23,15 +23,21 @@ */ #include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/CLRuntimeContext.h" #include "arm_compute/runtime/CL/CLScheduler.h" namespace arm_compute { -CLTensor::CLTensor() - : _allocator(this) +CLTensor::CLTensor(IRuntimeContext *ctx) + : _allocator(this, static_cast(ctx)), _ctx(static_cast(ctx)) { } +CLRuntimeContext *CLTensor::context() +{ + return _ctx; +} + TensorInfo *CLTensor::info() const { return &_allocator.info(); @@ -59,12 +65,12 @@ CLTensorAllocator *CLTensor::allocator() void CLTensor::map(bool blocking) { - ICLTensor::map(CLScheduler::get().queue(), blocking); + ICLTensor::map(_ctx == nullptr ? CLScheduler::get().queue() : _ctx->gpu_scheduler()->queue(), blocking); } void CLTensor::unmap() { - ICLTensor::unmap(CLScheduler::get().queue()); + ICLTensor::unmap(_ctx == nullptr ? CLScheduler::get().queue() : _ctx->gpu_scheduler()->queue()); } uint8_t *CLTensor::do_map(cl::CommandQueue &q, bool blocking) @@ -81,4 +87,4 @@ void CLTensor::associate_memory_group(arm_compute::IMemoryGroup *memory_group) { _allocator.set_associated_memory_group(memory_group); } -} // namespace arm_compute \ No newline at end of file +} // namespace arm_compute diff --git a/src/runtime/CL/CLTensorAllocator.cpp b/src/runtime/CL/CLTensorAllocator.cpp index 2b5fbb8241..eaf46d42ca 100644 --- a/src/runtime/CL/CLTensorAllocator.cpp +++ b/src/runtime/CL/CLTensorAllocator.cpp @@ -25,6 +25,7 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/TensorInfo.h" +#include "arm_compute/runtime/CL/CLRuntimeContext.h" #include "arm_compute/runtime/CL/CLScheduler.h" namespace arm_compute @@ -41,10 +42,10 @@ namespace * * @return A wrapped memory region */ -std::unique_ptr allocate_region(const cl::Context &context, size_t size, cl_uint alignment) +std::unique_ptr allocate_region(CLCoreRuntimeContext *ctx, size_t size, cl_uint alignment) { // Try fine-grain SVM - std::unique_ptr region = support::cpp14::make_unique(context, + std::unique_ptr region = support::cpp14::make_unique(ctx, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, size, alignment); @@ -52,12 +53,12 @@ std::unique_ptr allocate_region(const cl::Context &context, siz // Try coarse-grain SVM in case of failure if(region != nullptr && region->ptr() == nullptr) { - region = support::cpp14::make_unique(context, CL_MEM_READ_WRITE, size, alignment); + region = support::cpp14::make_unique(ctx, CL_MEM_READ_WRITE, size, alignment); } // Try legacy buffer memory in case of failure if(region != nullptr && region->ptr() == nullptr) { - region = support::cpp14::make_unique(context, CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, size); + region = support::cpp14::make_unique(ctx, CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, size); } return region; } @@ -103,8 +104,8 @@ void populate_quantization_info(CLFloatArray &scale, CLInt32Array &offset, const } } // namespace -CLTensorAllocator::CLTensorAllocator(IMemoryManageable *owner) - : _owner(owner), _associated_memory_group(nullptr), _memory(), _mapping(nullptr), _scale(), _offset() +CLTensorAllocator::CLTensorAllocator(IMemoryManageable *owner, CLRuntimeContext *ctx) + : _ctx(ctx), _owner(owner), _associated_memory_group(nullptr), _memory(), _mapping(nullptr), _scale(), _offset() { } @@ -129,7 +130,15 @@ void CLTensorAllocator::allocate() if(_associated_memory_group == nullptr) { // Perform memory allocation - _memory.set_owned_region(allocate_region(CLScheduler::get().context(), info().total_size(), 0)); + if(_ctx == nullptr) + { + auto legacy_ctx = CLCoreRuntimeContext(nullptr, CLScheduler::get().context(), CLScheduler::get().queue()); + _memory.set_owned_region(allocate_region(&legacy_ctx, info().total_size(), 0)); + } + else + { + _memory.set_owned_region(allocate_region(_ctx->core_runtime_context(), info().total_size(), 0)); + } } else { @@ -162,9 +171,17 @@ Status CLTensorAllocator::import_memory(cl::Buffer buffer) ARM_COMPUTE_RETURN_ERROR_ON(buffer.getInfo().get() != CLScheduler::get().context().get()); ARM_COMPUTE_RETURN_ERROR_ON(_associated_memory_group != nullptr); - _memory.set_owned_region(support::cpp14::make_unique(buffer)); - info().set_is_resizable(false); + if(_ctx == nullptr) + { + auto legacy_ctx = CLCoreRuntimeContext(nullptr, CLScheduler::get().context(), CLScheduler::get().queue()); + _memory.set_owned_region(support::cpp14::make_unique(buffer, &legacy_ctx)); + } + else + { + _memory.set_owned_region(support::cpp14::make_unique(buffer, _ctx->core_runtime_context())); + } + info().set_is_resizable(false); return Status{}; } @@ -179,13 +196,28 @@ void CLTensorAllocator::set_associated_memory_group(IMemoryGroup *associated_mem uint8_t *CLTensorAllocator::lock() { - return map(CLScheduler::get().queue(), true); + if(_ctx) + { + return map(_ctx->gpu_scheduler()->queue(), true); + } + else + { + return map(CLScheduler::get().queue(), true); + } } void CLTensorAllocator::unlock() { ARM_COMPUTE_ERROR_ON(_memory.region() == nullptr); - unmap(CLScheduler::get().queue(), reinterpret_cast(_memory.region()->buffer())); + if(_ctx) + { + unmap(_ctx->gpu_scheduler()->queue(), reinterpret_cast(_memory.region()->buffer())); + } + else + { + //Legacy singleton api + unmap(CLScheduler::get().queue(), reinterpret_cast(_memory.region()->buffer())); + } } uint8_t *CLTensorAllocator::map(cl::CommandQueue &q, bool blocking) diff --git a/src/runtime/CL/ICLSimpleFunction.cpp b/src/runtime/CL/ICLSimpleFunction.cpp index a1a56fd06c..fb8eba8aa4 100644 --- a/src/runtime/CL/ICLSimpleFunction.cpp +++ b/src/runtime/CL/ICLSimpleFunction.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -24,20 +24,21 @@ #include "arm_compute/runtime/CL/ICLSimpleFunction.h" #include "arm_compute/core/Error.h" +#include "arm_compute/runtime/CL/CLHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" using namespace arm_compute; -ICLSimpleFunction::ICLSimpleFunction() // NOLINT +ICLSimpleFunction::ICLSimpleFunction(CLRuntimeContext *ctx) // NOLINT : _kernel(), - _border_handler() + _border_handler(), + _ctx(ctx) { } void ICLSimpleFunction::run() { ARM_COMPUTE_ERROR_ON_MSG(!_kernel, "The child class didn't set the CL kernel or function isn't configured"); - - CLScheduler::get().enqueue(_border_handler, false); - CLScheduler::get().enqueue(*_kernel); + schedule_kernel_on_ctx(_ctx, &_border_handler, false); + schedule_kernel_on_ctx(_ctx, _kernel.get()); } diff --git a/src/runtime/CL/functions/CLActivationLayer.cpp b/src/runtime/CL/functions/CLActivationLayer.cpp index 2b66795cf9..00dbb71f4c 100644 --- a/src/runtime/CL/functions/CLActivationLayer.cpp +++ b/src/runtime/CL/functions/CLActivationLayer.cpp @@ -25,18 +25,21 @@ #include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLRuntimeContext.h" #include "support/ToolchainSupport.h" namespace arm_compute { -CLActivationLayer::CLActivationLayer(void *ctx) +CLActivationLayer::CLActivationLayer(CLRuntimeContext *ctx) + : ICLSimpleFunction(ctx) { - ARM_COMPUTE_UNUSED(ctx); } void CLActivationLayer::configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info) { - auto k = arm_compute::support::cpp14::make_unique(); + auto core_ctx = _ctx ? _ctx->core_runtime_context() : /* Legacy */ nullptr; + + auto k = arm_compute::support::cpp14::make_unique(core_ctx); k->configure(input, output, act_info); _kernel = std::move(k); } diff --git a/src/runtime/GLES_COMPUTE/GCTensor.cpp b/src/runtime/GLES_COMPUTE/GCTensor.cpp index 66c1abdb6d..e05eb4c4ae 100644 --- a/src/runtime/GLES_COMPUTE/GCTensor.cpp +++ b/src/runtime/GLES_COMPUTE/GCTensor.cpp @@ -26,7 +26,7 @@ namespace arm_compute { -GCTensor::GCTensor() +GCTensor::GCTensor(IRuntimeContext *) : _allocator(this) { } @@ -80,4 +80,4 @@ void GCTensor::do_unmap() { _allocator.unmap(); } -} // namespace arm_compute \ No newline at end of file +} // namespace arm_compute diff --git a/src/runtime/Tensor.cpp b/src/runtime/Tensor.cpp index de08efd731..8f7ecd6ffa 100644 --- a/src/runtime/Tensor.cpp +++ b/src/runtime/Tensor.cpp @@ -25,7 +25,7 @@ namespace arm_compute { -Tensor::Tensor() +Tensor::Tensor(IRuntimeContext *) : _allocator(this) { } @@ -54,4 +54,4 @@ void Tensor::associate_memory_group(IMemoryGroup *memory_group) { _allocator.set_associated_memory_group(memory_group); } -} // namespace arm_compute \ No newline at end of file +} // namespace arm_compute -- cgit v1.2.1