aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/CL
diff options
context:
space:
mode:
Diffstat (limited to 'src/runtime/CL')
-rw-r--r--src/runtime/CL/CLBufferAllocator.cpp27
-rw-r--r--src/runtime/CL/CLHelpers.cpp16
-rw-r--r--src/runtime/CL/CLMemoryRegion.cpp39
-rw-r--r--src/runtime/CL/CLRuntimeContext.cpp67
-rw-r--r--src/runtime/CL/CLScheduler.cpp70
-rw-r--r--src/runtime/CL/CLTensor.cpp16
-rw-r--r--src/runtime/CL/CLTensorAllocator.cpp54
-rw-r--r--src/runtime/CL/ICLSimpleFunction.cpp13
-rw-r--r--src/runtime/CL/functions/CLActivationLayer.cpp9
9 files changed, 256 insertions, 55 deletions
diff --git a/src/runtime/CL/CLBufferAllocator.cpp b/src/runtime/CL/CLBufferAllocator.cpp
index 84789e70d2..ed27320650 100644
--- a/src/runtime/CL/CLBufferAllocator.cpp
+++ b/src/runtime/CL/CLBufferAllocator.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -22,25 +22,35 @@
* SOFTWARE.
*/
#include "arm_compute/runtime/CL/CLBufferAllocator.h"
-#include "arm_compute/runtime/CL/CLMemoryRegion.h"
+#include "arm_compute/core/CL/CLCoreRuntimeContext.h"
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Error.h"
+#include "arm_compute/runtime/CL/CLMemoryRegion.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
#include "support/ToolchainSupport.h"
#include <cstddef>
-using namespace arm_compute;
-
-CLBufferAllocator::CLBufferAllocator(cl::Context context)
- : _context(std::move(context))
+namespace arm_compute
+{
+CLBufferAllocator::CLBufferAllocator(CLCoreRuntimeContext *ctx)
+ : _ctx(ctx)
{
}
void *CLBufferAllocator::allocate(size_t size, size_t alignment)
{
ARM_COMPUTE_UNUSED(alignment);
- cl_mem buf = clCreateBuffer(_context.get(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, size, nullptr, nullptr);
+ cl_mem buf;
+ if(_ctx == nullptr)
+ {
+ buf = clCreateBuffer(CLScheduler::get().context().get(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, size, nullptr, nullptr);
+ }
+ else
+ {
+ buf = clCreateBuffer(_ctx->context().get(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, size, nullptr, nullptr);
+ }
return static_cast<void *>(buf);
}
@@ -53,5 +63,6 @@ void CLBufferAllocator::free(void *ptr)
std::unique_ptr<IMemoryRegion> CLBufferAllocator::make_region(size_t size, size_t alignment)
{
ARM_COMPUTE_UNUSED(alignment);
- return arm_compute::support::cpp14::make_unique<CLBufferMemoryRegion>(_context, CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, size);
+ return arm_compute::support::cpp14::make_unique<CLBufferMemoryRegion>(_ctx, CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, size);
}
+} // namespace arm_compute
diff --git a/src/runtime/CL/CLHelpers.cpp b/src/runtime/CL/CLHelpers.cpp
index edfc8ed2aa..c4c7ee2107 100644
--- a/src/runtime/CL/CLHelpers.cpp
+++ b/src/runtime/CL/CLHelpers.cpp
@@ -26,6 +26,7 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/Error.h"
+#include "arm_compute/runtime/CL/CLRuntimeContext.h"
namespace
{
@@ -103,4 +104,19 @@ create_opencl_context_and_device()
ARM_COMPUTE_ERROR_ON_MSG(err != CL_SUCCESS, "Failed to create OpenCL context");
return std::make_tuple(cl_context, device, err);
}
+
+void schedule_kernel_on_ctx(CLRuntimeContext *ctx, ICLKernel *kernel, bool flush)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(kernel);
+ if(ctx)
+ {
+ ARM_COMPUTE_ERROR_ON(ctx->gpu_scheduler() == nullptr);
+ ctx->gpu_scheduler()->enqueue(*kernel, flush);
+ }
+ else
+ {
+ CLScheduler::get().enqueue(*kernel, flush);
+ }
+}
+
} // namespace arm_compute
diff --git a/src/runtime/CL/CLMemoryRegion.cpp b/src/runtime/CL/CLMemoryRegion.cpp
index 2976903c93..52906a893f 100644
--- a/src/runtime/CL/CLMemoryRegion.cpp
+++ b/src/runtime/CL/CLMemoryRegion.cpp
@@ -23,13 +23,18 @@
*/
#include "arm_compute/runtime/CL/CLMemoryRegion.h"
+#include "arm_compute/core/CL/CLCoreRuntimeContext.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
namespace arm_compute
{
-ICLMemoryRegion::ICLMemoryRegion(cl::Context ctx, size_t size)
- : IMemoryRegion(size), _ctx(std::move(ctx)), _mapping(nullptr), _mem()
+ICLMemoryRegion::ICLMemoryRegion(CLCoreRuntimeContext *ctx, size_t size)
+ : IMemoryRegion(size),
+ _queue((ctx != nullptr) ? ctx->queue() : CLScheduler::get().queue()),
+ _ctx((ctx != nullptr) ? ctx->context() : CLScheduler::get().context()),
+ _mapping(nullptr),
+ _mem()
{
}
@@ -54,17 +59,17 @@ std::unique_ptr<IMemoryRegion> ICLMemoryRegion::extract_subregion(size_t offset,
return nullptr;
}
-CLBufferMemoryRegion::CLBufferMemoryRegion(cl::Context ctx, cl_mem_flags flags, size_t size)
- : ICLMemoryRegion(std::move(ctx), size)
+CLBufferMemoryRegion::CLBufferMemoryRegion(CLCoreRuntimeContext *ctx, cl_mem_flags flags, size_t size)
+ : ICLMemoryRegion(ctx, size)
{
if(_size != 0)
{
- _mem = cl::Buffer(_ctx, flags, _size);
+ _mem = cl::Buffer((ctx != nullptr) ? ctx->context() : CLScheduler::get().context(), flags, _size);
}
}
-CLBufferMemoryRegion::CLBufferMemoryRegion(const cl::Buffer &buffer)
- : ICLMemoryRegion(buffer.getInfo<CL_MEM_CONTEXT>(), buffer.getInfo<CL_MEM_SIZE>())
+CLBufferMemoryRegion::CLBufferMemoryRegion(const cl::Buffer &buffer, CLCoreRuntimeContext *ctx)
+ : ICLMemoryRegion(ctx, buffer.getInfo<CL_MEM_SIZE>())
{
_mem = buffer;
}
@@ -88,15 +93,15 @@ void CLBufferMemoryRegion::unmap(cl::CommandQueue &q)
_mapping = nullptr;
}
-ICLSVMMemoryRegion::ICLSVMMemoryRegion(cl::Context ctx, cl_mem_flags flags, size_t size, size_t alignment)
- : ICLMemoryRegion(std::move(ctx), size), _ptr(nullptr)
+ICLSVMMemoryRegion::ICLSVMMemoryRegion(CLCoreRuntimeContext *ctx, cl_mem_flags flags, size_t size, size_t alignment)
+ : ICLMemoryRegion(ctx, size), _ptr(nullptr)
{
if(size != 0)
{
- _ptr = clSVMAlloc(_ctx.get(), flags, size, alignment);
+ _ptr = clSVMAlloc((ctx != nullptr) ? ctx->context().get() : CLScheduler::get().context().get(), flags, size, alignment);
if(_ptr != nullptr)
{
- _mem = cl::Buffer(_ctx, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, _size, _ptr);
+ _mem = cl::Buffer((ctx != nullptr) ? ctx->context() : CLScheduler::get().context(), CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, _size, _ptr);
}
}
}
@@ -107,7 +112,7 @@ ICLSVMMemoryRegion::~ICLSVMMemoryRegion()
{
try
{
- clFinish(CLScheduler::get().queue().get());
+ clFinish(_queue.get());
_mem = cl::Buffer();
clSVMFree(_ctx.get(), _ptr);
}
@@ -122,8 +127,8 @@ void *ICLSVMMemoryRegion::ptr()
return _ptr;
}
-CLCoarseSVMMemoryRegion::CLCoarseSVMMemoryRegion(cl::Context ctx, cl_mem_flags flags, size_t size, size_t alignment)
- : ICLSVMMemoryRegion(std::move(ctx), flags, size, alignment)
+CLCoarseSVMMemoryRegion::CLCoarseSVMMemoryRegion(CLCoreRuntimeContext *ctx, cl_mem_flags flags, size_t size, size_t alignment)
+ : ICLSVMMemoryRegion(ctx, flags, size, alignment)
{
}
@@ -142,8 +147,8 @@ void CLCoarseSVMMemoryRegion::unmap(cl::CommandQueue &q)
_mapping = nullptr;
}
-CLFineSVMMemoryRegion::CLFineSVMMemoryRegion(cl::Context ctx, cl_mem_flags flags, size_t size, size_t alignment)
- : ICLSVMMemoryRegion(std::move(ctx), flags, size, alignment)
+CLFineSVMMemoryRegion::CLFineSVMMemoryRegion(CLCoreRuntimeContext *ctx, cl_mem_flags flags, size_t size, size_t alignment)
+ : ICLSVMMemoryRegion(ctx, flags, size, alignment)
{
}
@@ -162,4 +167,4 @@ void CLFineSVMMemoryRegion::unmap(cl::CommandQueue &q)
ARM_COMPUTE_UNUSED(q);
_mapping = nullptr;
}
-} // namespace arm_compute \ No newline at end of file
+} // namespace arm_compute
diff --git a/src/runtime/CL/CLRuntimeContext.cpp b/src/runtime/CL/CLRuntimeContext.cpp
new file mode 100644
index 0000000000..49e4c10c84
--- /dev/null
+++ b/src/runtime/CL/CLRuntimeContext.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/CLRuntimeContext.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/CL/CLHelpers.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+
+namespace arm_compute
+{
+CLRuntimeContext::CLRuntimeContext()
+ : _gpu_owned_scheduler(support::cpp14::make_unique<CLScheduler>()), _gpu_scheduler(_gpu_owned_scheduler.get()), _symbols(), _core_context()
+{
+ _symbols.load_default();
+ auto ctx_dev_err = create_opencl_context_and_device();
+ ARM_COMPUTE_ERROR_ON_MSG(std::get<2>(ctx_dev_err) != CL_SUCCESS, "Failed to create OpenCL context");
+ auto ctx = std::get<0>(ctx_dev_err);
+ auto dev = std::get<1>(ctx_dev_err);
+ cl::CommandQueue queue = cl::CommandQueue(ctx, dev);
+ _gpu_owned_scheduler->init(ctx, queue, dev, &_tuner);
+ const std::string cl_kernels_folder("./cl_kernels");
+ _kernel_lib.init(cl_kernels_folder, ctx, dev);
+ _core_context = CLCoreRuntimeContext(&_kernel_lib, _gpu_owned_scheduler->context(), _gpu_owned_scheduler->queue());
+}
+
+CLKernelLibrary &CLRuntimeContext::kernel_library()
+{
+ return _kernel_lib;
+}
+
+CLCoreRuntimeContext *CLRuntimeContext::core_runtime_context()
+{
+ return &_core_context;
+}
+
+void CLRuntimeContext::set_gpu_scheduler(CLScheduler *scheduler)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(scheduler);
+ _gpu_scheduler = scheduler;
+}
+
+CLScheduler *CLRuntimeContext::gpu_scheduler()
+{
+ return _gpu_scheduler;
+}
+
+} // namespace arm_compute
diff --git a/src/runtime/CL/CLScheduler.cpp b/src/runtime/CL/CLScheduler.cpp
index 701ffe0ab1..e78eaa482f 100644
--- a/src/runtime/CL/CLScheduler.cpp
+++ b/src/runtime/CL/CLScheduler.cpp
@@ -23,13 +23,71 @@
*/
#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/CL/CLHelpers.h"
-
+#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/runtime/CL/CLHelpers.h"
#include "arm_compute/runtime/CL/CLTuner.h"
#include "arm_compute/runtime/CL/tuners/Tuners.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+cl::Context &CLScheduler::context()
+{
+ ARM_COMPUTE_ERROR_ON(!_is_initialised);
+ _context = CLKernelLibrary::get().context();
+ return _context;
+}
+
+cl::CommandQueue &CLScheduler::queue()
+{
+ ARM_COMPUTE_ERROR_ON(!_is_initialised);
+ return _queue;
+}
+
+GPUTarget CLScheduler::target() const
+{
+ return _target;
+}
+
+void CLScheduler::set_queue(cl::CommandQueue queue)
+{
+ _queue = std::move(queue);
+}
+
+void CLScheduler::set_target(GPUTarget target)
+{
+ _target = target;
+}
+
+void CLScheduler::set_tuner(ICLTuner *tuner)
+{
+ _cl_tuner = tuner;
+}
+
+void CLScheduler::sync()
+{
+ _queue.finish();
+}
+
+cl::Event CLScheduler::enqueue_sync_event()
+{
+ cl::Event event;
+ _queue.enqueueMarker(&event);
+ return event;
+}
+
+void CLScheduler::tune_kernel_static(ICLKernel &kernel)
+{
+ if(_cl_tuner != nullptr)
+ {
+ _cl_tuner->tune_kernel_static(kernel);
+ }
+}
+
+bool CLScheduler::is_initialised() const
+{
+ return _is_initialised;
+}
std::once_flag CLScheduler::_initialize_symbols;
@@ -49,8 +107,9 @@ void CLScheduler::default_init_with_context(cl::Device &device, cl::Context &ctx
{
if(!_is_initialised)
{
- cl::CommandQueue queue = cl::CommandQueue(ctx, device);
- CLKernelLibrary::get().init("./cl_kernels/", ctx, device);
+ const std::string cl_kernels_folder("./cl_kernels/");
+ cl::CommandQueue queue = cl::CommandQueue(ctx, device);
+ CLKernelLibrary::get().init(cl_kernels_folder, ctx, device);
init(ctx, queue, device, cl_tuner);
_cl_default_static_tuner = tuners::TunerFactory::create_tuner(_target);
_cl_tuner = (cl_tuner == nullptr) ? _cl_default_static_tuner.get() : cl_tuner;
@@ -113,3 +172,4 @@ void CLScheduler::enqueue(ICLKernel &kernel, bool flush)
_queue.flush();
}
}
+} // namespace arm_compute
diff --git a/src/runtime/CL/CLTensor.cpp b/src/runtime/CL/CLTensor.cpp
index 9bbf926b58..a6d0cf77ca 100644
--- a/src/runtime/CL/CLTensor.cpp
+++ b/src/runtime/CL/CLTensor.cpp
@@ -23,15 +23,21 @@
*/
#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLRuntimeContext.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
namespace arm_compute
{
-CLTensor::CLTensor()
- : _allocator(this)
+CLTensor::CLTensor(IRuntimeContext *ctx)
+ : _allocator(this, static_cast<CLRuntimeContext *>(ctx)), _ctx(static_cast<CLRuntimeContext *>(ctx))
{
}
+CLRuntimeContext *CLTensor::context()
+{
+ return _ctx;
+}
+
TensorInfo *CLTensor::info() const
{
return &_allocator.info();
@@ -59,12 +65,12 @@ CLTensorAllocator *CLTensor::allocator()
void CLTensor::map(bool blocking)
{
- ICLTensor::map(CLScheduler::get().queue(), blocking);
+ ICLTensor::map(_ctx == nullptr ? CLScheduler::get().queue() : _ctx->gpu_scheduler()->queue(), blocking);
}
void CLTensor::unmap()
{
- ICLTensor::unmap(CLScheduler::get().queue());
+ ICLTensor::unmap(_ctx == nullptr ? CLScheduler::get().queue() : _ctx->gpu_scheduler()->queue());
}
uint8_t *CLTensor::do_map(cl::CommandQueue &q, bool blocking)
@@ -81,4 +87,4 @@ void CLTensor::associate_memory_group(arm_compute::IMemoryGroup *memory_group)
{
_allocator.set_associated_memory_group(memory_group);
}
-} // namespace arm_compute \ No newline at end of file
+} // namespace arm_compute
diff --git a/src/runtime/CL/CLTensorAllocator.cpp b/src/runtime/CL/CLTensorAllocator.cpp
index 2b5fbb8241..eaf46d42ca 100644
--- a/src/runtime/CL/CLTensorAllocator.cpp
+++ b/src/runtime/CL/CLTensorAllocator.cpp
@@ -25,6 +25,7 @@
#include "arm_compute/core/Error.h"
#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/runtime/CL/CLRuntimeContext.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
namespace arm_compute
@@ -41,10 +42,10 @@ namespace
*
* @return A wrapped memory region
*/
-std::unique_ptr<ICLMemoryRegion> allocate_region(const cl::Context &context, size_t size, cl_uint alignment)
+std::unique_ptr<ICLMemoryRegion> allocate_region(CLCoreRuntimeContext *ctx, size_t size, cl_uint alignment)
{
// Try fine-grain SVM
- std::unique_ptr<ICLMemoryRegion> region = support::cpp14::make_unique<CLFineSVMMemoryRegion>(context,
+ std::unique_ptr<ICLMemoryRegion> region = support::cpp14::make_unique<CLFineSVMMemoryRegion>(ctx,
CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER,
size,
alignment);
@@ -52,12 +53,12 @@ std::unique_ptr<ICLMemoryRegion> allocate_region(const cl::Context &context, siz
// Try coarse-grain SVM in case of failure
if(region != nullptr && region->ptr() == nullptr)
{
- region = support::cpp14::make_unique<CLCoarseSVMMemoryRegion>(context, CL_MEM_READ_WRITE, size, alignment);
+ region = support::cpp14::make_unique<CLCoarseSVMMemoryRegion>(ctx, CL_MEM_READ_WRITE, size, alignment);
}
// Try legacy buffer memory in case of failure
if(region != nullptr && region->ptr() == nullptr)
{
- region = support::cpp14::make_unique<CLBufferMemoryRegion>(context, CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, size);
+ region = support::cpp14::make_unique<CLBufferMemoryRegion>(ctx, CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, size);
}
return region;
}
@@ -103,8 +104,8 @@ void populate_quantization_info(CLFloatArray &scale, CLInt32Array &offset, const
}
} // namespace
-CLTensorAllocator::CLTensorAllocator(IMemoryManageable *owner)
- : _owner(owner), _associated_memory_group(nullptr), _memory(), _mapping(nullptr), _scale(), _offset()
+CLTensorAllocator::CLTensorAllocator(IMemoryManageable *owner, CLRuntimeContext *ctx)
+ : _ctx(ctx), _owner(owner), _associated_memory_group(nullptr), _memory(), _mapping(nullptr), _scale(), _offset()
{
}
@@ -129,7 +130,15 @@ void CLTensorAllocator::allocate()
if(_associated_memory_group == nullptr)
{
// Perform memory allocation
- _memory.set_owned_region(allocate_region(CLScheduler::get().context(), info().total_size(), 0));
+ if(_ctx == nullptr)
+ {
+ auto legacy_ctx = CLCoreRuntimeContext(nullptr, CLScheduler::get().context(), CLScheduler::get().queue());
+ _memory.set_owned_region(allocate_region(&legacy_ctx, info().total_size(), 0));
+ }
+ else
+ {
+ _memory.set_owned_region(allocate_region(_ctx->core_runtime_context(), info().total_size(), 0));
+ }
}
else
{
@@ -162,9 +171,17 @@ Status CLTensorAllocator::import_memory(cl::Buffer buffer)
ARM_COMPUTE_RETURN_ERROR_ON(buffer.getInfo<CL_MEM_CONTEXT>().get() != CLScheduler::get().context().get());
ARM_COMPUTE_RETURN_ERROR_ON(_associated_memory_group != nullptr);
- _memory.set_owned_region(support::cpp14::make_unique<CLBufferMemoryRegion>(buffer));
- info().set_is_resizable(false);
+ if(_ctx == nullptr)
+ {
+ auto legacy_ctx = CLCoreRuntimeContext(nullptr, CLScheduler::get().context(), CLScheduler::get().queue());
+ _memory.set_owned_region(support::cpp14::make_unique<CLBufferMemoryRegion>(buffer, &legacy_ctx));
+ }
+ else
+ {
+ _memory.set_owned_region(support::cpp14::make_unique<CLBufferMemoryRegion>(buffer, _ctx->core_runtime_context()));
+ }
+ info().set_is_resizable(false);
return Status{};
}
@@ -179,13 +196,28 @@ void CLTensorAllocator::set_associated_memory_group(IMemoryGroup *associated_mem
uint8_t *CLTensorAllocator::lock()
{
- return map(CLScheduler::get().queue(), true);
+ if(_ctx)
+ {
+ return map(_ctx->gpu_scheduler()->queue(), true);
+ }
+ else
+ {
+ return map(CLScheduler::get().queue(), true);
+ }
}
void CLTensorAllocator::unlock()
{
ARM_COMPUTE_ERROR_ON(_memory.region() == nullptr);
- unmap(CLScheduler::get().queue(), reinterpret_cast<uint8_t *>(_memory.region()->buffer()));
+ if(_ctx)
+ {
+ unmap(_ctx->gpu_scheduler()->queue(), reinterpret_cast<uint8_t *>(_memory.region()->buffer()));
+ }
+ else
+ {
+ //Legacy singleton api
+ unmap(CLScheduler::get().queue(), reinterpret_cast<uint8_t *>(_memory.region()->buffer()));
+ }
}
uint8_t *CLTensorAllocator::map(cl::CommandQueue &q, bool blocking)
diff --git a/src/runtime/CL/ICLSimpleFunction.cpp b/src/runtime/CL/ICLSimpleFunction.cpp
index a1a56fd06c..fb8eba8aa4 100644
--- a/src/runtime/CL/ICLSimpleFunction.cpp
+++ b/src/runtime/CL/ICLSimpleFunction.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,20 +24,21 @@
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
#include "arm_compute/core/Error.h"
+#include "arm_compute/runtime/CL/CLHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
using namespace arm_compute;
-ICLSimpleFunction::ICLSimpleFunction() // NOLINT
+ICLSimpleFunction::ICLSimpleFunction(CLRuntimeContext *ctx) // NOLINT
: _kernel(),
- _border_handler()
+ _border_handler(),
+ _ctx(ctx)
{
}
void ICLSimpleFunction::run()
{
ARM_COMPUTE_ERROR_ON_MSG(!_kernel, "The child class didn't set the CL kernel or function isn't configured");
-
- CLScheduler::get().enqueue(_border_handler, false);
- CLScheduler::get().enqueue(*_kernel);
+ schedule_kernel_on_ctx(_ctx, &_border_handler, false);
+ schedule_kernel_on_ctx(_ctx, _kernel.get());
}
diff --git a/src/runtime/CL/functions/CLActivationLayer.cpp b/src/runtime/CL/functions/CLActivationLayer.cpp
index 2b66795cf9..00dbb71f4c 100644
--- a/src/runtime/CL/functions/CLActivationLayer.cpp
+++ b/src/runtime/CL/functions/CLActivationLayer.cpp
@@ -25,18 +25,21 @@
#include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLRuntimeContext.h"
#include "support/ToolchainSupport.h"
namespace arm_compute
{
-CLActivationLayer::CLActivationLayer(void *ctx)
+CLActivationLayer::CLActivationLayer(CLRuntimeContext *ctx)
+ : ICLSimpleFunction(ctx)
{
- ARM_COMPUTE_UNUSED(ctx);
}
void CLActivationLayer::configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info)
{
- auto k = arm_compute::support::cpp14::make_unique<CLActivationLayerKernel>();
+ auto core_ctx = _ctx ? _ctx->core_runtime_context() : /* Legacy */ nullptr;
+
+ auto k = arm_compute::support::cpp14::make_unique<CLActivationLayerKernel>(core_ctx);
k->configure(input, output, act_info);
_kernel = std::move(k);
}