aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnthony Barbier <anthony.barbier@arm.com>2018-04-20 11:31:52 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:49:37 +0000
commit48c19f1308ecdc7ea37a6bf5ce9459e0954e9007 (patch)
tree030de83723221b69de29146826884bebb8ec3939
parentb34690b71a0125c37d087b00becf2e60d66b46d1 (diff)
downloadComputeLibrary-48c19f1308ecdc7ea37a6bf5ce9459e0954e9007.tar.gz
COMPMID-959 Refactor OpenCL interceptors to use lambda functions
Change-Id: I29b73a311d7278255b77524f2a5eaaa4dccab711 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/128392 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/core/CL/CLKernelLibrary.h3
-rw-r--r--src/runtime/CL/CLTuner.cpp75
-rw-r--r--tests/framework/instruments/OpenCLTimer.cpp83
3 files changed, 55 insertions, 106 deletions
diff --git a/arm_compute/core/CL/CLKernelLibrary.h b/arm_compute/core/CL/CLKernelLibrary.h
index 257ef7ded2..4c42d8e23a 100644
--- a/arm_compute/core/CL/CLKernelLibrary.h
+++ b/arm_compute/core/CL/CLKernelLibrary.h
@@ -300,8 +300,7 @@ public:
_built_programs_map.clear();
}
- /** Access the cache of built OpenCL programs
- */
+ /** Access the cache of built OpenCL programs */
const std::map<std::string, cl::Program> &get_built_programs() const
{
return _built_programs_map;
diff --git a/src/runtime/CL/CLTuner.cpp b/src/runtime/CL/CLTuner.cpp
index 17a62ab46e..5f82cd3fbe 100644
--- a/src/runtime/CL/CLTuner.cpp
+++ b/src/runtime/CL/CLTuner.cpp
@@ -35,61 +35,6 @@
using namespace arm_compute;
-namespace
-{
-/* Function to be used to intercept kernel enqueues and store their OpenCL Event */
-class Interceptor
-{
-public:
- explicit Interceptor(CLTuner &tuner);
-
- /** clEnqueueNDRangeKernel interface
- *
- * @param[in] command_queue A valid command-queue. The kernel will be queued for execution on the device associated with command_queue.
- * @param[in] kernel A valid kernel object. The OpenCL context associated with kernel and command_queue must be the same.
- * @param[in] work_dim The number of dimensions used to specify the global work-items and work-items in the work-group. work_dim must be greater than zero and less than or equal to CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS.
- * @param[in] gwo Global-Workgroup-Offset. It can be used to specify an array of work_dim unsigned values that describe the offset used to calculate the global ID of a work-item. If global_work_offset is NULL, the global IDs start at offset (0, 0, ... 0).
- * @param[in] gws Global-Workgroup-Size. Points to an array of work_dim unsigned values that describe the number of global work-items in work_dim dimensions that will execute the kernel function.
- * @param[in] lws Local-Workgroup-Size. Points to an array of work_dim unsigned values that describe the number of work-items that make up a work-group
- * @param[in] num_events_in_wait_list Number of events in the waiting list
- * @param[in] event_wait_list Event waiting list
- * @param[in] event OpenCL kernel event
- *
- * @return the OpenCL status
- */
- cl_int operator()(cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, const size_t *gwo, const size_t *gws, const size_t *lws, cl_uint num_events_in_wait_list,
- const cl_event *event_wait_list, cl_event *event);
-
-private:
- CLTuner &_tuner;
-};
-
-Interceptor::Interceptor(CLTuner &tuner)
- : _tuner(tuner)
-{
-}
-
-cl_int Interceptor::operator()(cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, const size_t *gwo, const size_t *gws, const size_t *lws, cl_uint num_events_in_wait_list,
- const cl_event *event_wait_list, cl_event *event)
-{
- ARM_COMPUTE_ERROR_ON_MSG(event != nullptr, "Not supported");
- ARM_COMPUTE_UNUSED(event);
- if(_tuner.kernel_event_is_set())
- {
- // If the event is already set it means the kernel enqueue is sliced: given that we only time the first slice we can save time by skipping the other enqueues.
- return CL_SUCCESS;
- }
- cl_event tmp;
- cl_int retval = _tuner.real_clEnqueueNDRangeKernel(command_queue, kernel, work_dim, gwo, gws, lws, num_events_in_wait_list, event_wait_list, &tmp);
-
- // Set OpenCL event
- _tuner.set_cl_kernel_event(tmp);
-
- return retval;
-}
-
-} // namespace
-
CLTuner::CLTuner(bool tune_new_kernels)
: real_clEnqueueNDRangeKernel(nullptr), _lws_table(), _queue(), _queue_profiler(), _kernel_event(), _tune_new_kernels(tune_new_kernels)
{
@@ -178,7 +123,25 @@ cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel)
}
}
// Start intercepting enqueues:
- CLSymbols::get().clEnqueueNDRangeKernel_ptr = Interceptor(*this);
+ auto interceptor = [this](cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, const size_t *gwo, const size_t *gws, const size_t *lws, cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list, cl_event * event)
+ {
+ ARM_COMPUTE_ERROR_ON_MSG(event != nullptr, "Not supported");
+ ARM_COMPUTE_UNUSED(event);
+ if(this->kernel_event_is_set())
+ {
+ // If the event is already set it means the kernel enqueue is sliced: given that we only time the first slice we can save time by skipping the other enqueues.
+ return CL_SUCCESS;
+ }
+ cl_event tmp;
+ cl_int retval = this->real_clEnqueueNDRangeKernel(command_queue, kernel, work_dim, gwo, gws, lws, num_events_in_wait_list, event_wait_list, &tmp);
+
+ // Set OpenCL event
+ this->set_cl_kernel_event(tmp);
+
+ return retval;
+ };
+ CLSymbols::get().clEnqueueNDRangeKernel_ptr = interceptor;
cl_ulong min_exec_time = std::numeric_limits<cl_ulong>::max();
diff --git a/tests/framework/instruments/OpenCLTimer.cpp b/tests/framework/instruments/OpenCLTimer.cpp
index 9743015cec..d9d16bc829 100644
--- a/tests/framework/instruments/OpenCLTimer.cpp
+++ b/tests/framework/instruments/OpenCLTimer.cpp
@@ -43,53 +43,6 @@ std::string OpenCLTimer::id() const
return "OpenCLTimer";
}
-/* Function to be used to intercept kernel enqueues and store their OpenCL Event */
-class Interceptor
-{
-public:
- explicit Interceptor(OpenCLTimer &timer)
- : _timer(timer)
- {
- }
-
- cl_int operator()(
- cl_command_queue command_queue,
- cl_kernel kernel,
- cl_uint work_dim,
- const size_t *gwo,
- const size_t *gws,
- const size_t *lws,
- cl_uint num_events_in_wait_list,
- const cl_event *event_wait_list,
- cl_event *event)
- {
- ARM_COMPUTE_ERROR_ON_MSG(event != nullptr, "Not supported");
- ARM_COMPUTE_UNUSED(event);
-
- OpenCLTimer::kernel_info info;
- cl::Kernel cpp_kernel(kernel, true);
- std::stringstream ss;
- ss << cpp_kernel.getInfo<CL_KERNEL_FUNCTION_NAME>();
- if(gws != nullptr)
- {
- ss << " GWS[" << gws[0] << "," << gws[1] << "," << gws[2] << "]";
- }
- if(lws != nullptr)
- {
- ss << " LWS[" << lws[0] << "," << lws[1] << "," << lws[2] << "]";
- }
- info.name = ss.str();
- cl_event tmp;
- cl_int retval = _timer.real_function(command_queue, kernel, work_dim, gwo, gws, lws, num_events_in_wait_list, event_wait_list, &tmp);
- info.event = tmp;
- _timer.kernels.push_back(std::move(info));
- return retval;
- }
-
-private:
- OpenCLTimer &_timer;
-};
-
OpenCLTimer::OpenCLTimer(ScaleFactor scale_factor)
: real_function(CLSymbols::get().clEnqueueNDRangeKernel_ptr)
{
@@ -127,7 +80,41 @@ void OpenCLTimer::start()
{
kernels.clear();
// Start intercepting enqueues:
- CLSymbols::get().clEnqueueNDRangeKernel_ptr = Interceptor(*this);
+ auto interceptor = [this](
+ cl_command_queue command_queue,
+ cl_kernel kernel,
+ cl_uint work_dim,
+ const size_t *gwo,
+ const size_t *gws,
+ const size_t *lws,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event)
+ {
+ ARM_COMPUTE_ERROR_ON_MSG(event != nullptr, "Not supported");
+ ARM_COMPUTE_UNUSED(event);
+
+ OpenCLTimer::kernel_info info;
+ cl::Kernel cpp_kernel(kernel, true);
+ std::stringstream ss;
+ ss << cpp_kernel.getInfo<CL_KERNEL_FUNCTION_NAME>();
+ if(gws != nullptr)
+ {
+ ss << " GWS[" << gws[0] << "," << gws[1] << "," << gws[2] << "]";
+ }
+ if(lws != nullptr)
+ {
+ ss << " LWS[" << lws[0] << "," << lws[1] << "," << lws[2] << "]";
+ }
+ info.name = ss.str();
+ cl_event tmp;
+ cl_int retval = this->real_function(command_queue, kernel, work_dim, gwo, gws, lws, num_events_in_wait_list, event_wait_list, &tmp);
+ info.event = tmp;
+ this->kernels.push_back(std::move(info));
+ return retval;
+ };
+
+ CLSymbols::get().clEnqueueNDRangeKernel_ptr = interceptor;
}
void OpenCLTimer::stop()