diff options
author | Anthony Barbier <anthony.barbier@arm.com> | 2018-04-20 11:31:52 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:49:37 +0000 |
commit | 48c19f1308ecdc7ea37a6bf5ce9459e0954e9007 (patch) | |
tree | 030de83723221b69de29146826884bebb8ec3939 | |
parent | b34690b71a0125c37d087b00becf2e60d66b46d1 (diff) | |
download | ComputeLibrary-48c19f1308ecdc7ea37a6bf5ce9459e0954e9007.tar.gz |
COMPMID-959 Refactor OpenCL interceptors to use lambda functions
Change-Id: I29b73a311d7278255b77524f2a5eaaa4dccab711
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/128392
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Tested-by: Jenkins <bsgcomp@arm.com>
-rw-r--r-- | arm_compute/core/CL/CLKernelLibrary.h | 3 | ||||
-rw-r--r-- | src/runtime/CL/CLTuner.cpp | 75 | ||||
-rw-r--r-- | tests/framework/instruments/OpenCLTimer.cpp | 83 |
3 files changed, 55 insertions, 106 deletions
diff --git a/arm_compute/core/CL/CLKernelLibrary.h b/arm_compute/core/CL/CLKernelLibrary.h index 257ef7ded2..4c42d8e23a 100644 --- a/arm_compute/core/CL/CLKernelLibrary.h +++ b/arm_compute/core/CL/CLKernelLibrary.h @@ -300,8 +300,7 @@ public: _built_programs_map.clear(); } - /** Access the cache of built OpenCL programs - */ + /** Access the cache of built OpenCL programs */ const std::map<std::string, cl::Program> &get_built_programs() const { return _built_programs_map; diff --git a/src/runtime/CL/CLTuner.cpp b/src/runtime/CL/CLTuner.cpp index 17a62ab46e..5f82cd3fbe 100644 --- a/src/runtime/CL/CLTuner.cpp +++ b/src/runtime/CL/CLTuner.cpp @@ -35,61 +35,6 @@ using namespace arm_compute; -namespace -{ -/* Function to be used to intercept kernel enqueues and store their OpenCL Event */ -class Interceptor -{ -public: - explicit Interceptor(CLTuner &tuner); - - /** clEnqueueNDRangeKernel interface - * - * @param[in] command_queue A valid command-queue. The kernel will be queued for execution on the device associated with command_queue. - * @param[in] kernel A valid kernel object. The OpenCL context associated with kernel and command_queue must be the same. - * @param[in] work_dim The number of dimensions used to specify the global work-items and work-items in the work-group. work_dim must be greater than zero and less than or equal to CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS. - * @param[in] gwo Global-Workgroup-Offset. It can be used to specify an array of work_dim unsigned values that describe the offset used to calculate the global ID of a work-item. If global_work_offset is NULL, the global IDs start at offset (0, 0, ... 0). - * @param[in] gws Global-Workgroup-Size. Points to an array of work_dim unsigned values that describe the number of global work-items in work_dim dimensions that will execute the kernel function. - * @param[in] lws Local-Workgroup-Size. Points to an array of work_dim unsigned values that describe the number of work-items that make up a work-group - * @param[in] num_events_in_wait_list Number of events in the waiting list - * @param[in] event_wait_list Event waiting list - * @param[in] event OpenCL kernel event - * - * @return the OpenCL status - */ - cl_int operator()(cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, const size_t *gwo, const size_t *gws, const size_t *lws, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, cl_event *event); - -private: - CLTuner &_tuner; -}; - -Interceptor::Interceptor(CLTuner &tuner) - : _tuner(tuner) -{ -} - -cl_int Interceptor::operator()(cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, const size_t *gwo, const size_t *gws, const size_t *lws, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, cl_event *event) -{ - ARM_COMPUTE_ERROR_ON_MSG(event != nullptr, "Not supported"); - ARM_COMPUTE_UNUSED(event); - if(_tuner.kernel_event_is_set()) - { - // If the event is already set it means the kernel enqueue is sliced: given that we only time the first slice we can save time by skipping the other enqueues. - return CL_SUCCESS; - } - cl_event tmp; - cl_int retval = _tuner.real_clEnqueueNDRangeKernel(command_queue, kernel, work_dim, gwo, gws, lws, num_events_in_wait_list, event_wait_list, &tmp); - - // Set OpenCL event - _tuner.set_cl_kernel_event(tmp); - - return retval; -} - -} // namespace - CLTuner::CLTuner(bool tune_new_kernels) : real_clEnqueueNDRangeKernel(nullptr), _lws_table(), _queue(), _queue_profiler(), _kernel_event(), _tune_new_kernels(tune_new_kernels) { @@ -178,7 +123,25 @@ cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel) } } // Start intercepting enqueues: - CLSymbols::get().clEnqueueNDRangeKernel_ptr = Interceptor(*this); + auto interceptor = [this](cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, const size_t *gwo, const size_t *gws, const size_t *lws, cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, cl_event * event) + { + ARM_COMPUTE_ERROR_ON_MSG(event != nullptr, "Not supported"); + ARM_COMPUTE_UNUSED(event); + if(this->kernel_event_is_set()) + { + // If the event is already set it means the kernel enqueue is sliced: given that we only time the first slice we can save time by skipping the other enqueues. + return CL_SUCCESS; + } + cl_event tmp; + cl_int retval = this->real_clEnqueueNDRangeKernel(command_queue, kernel, work_dim, gwo, gws, lws, num_events_in_wait_list, event_wait_list, &tmp); + + // Set OpenCL event + this->set_cl_kernel_event(tmp); + + return retval; + }; + CLSymbols::get().clEnqueueNDRangeKernel_ptr = interceptor; cl_ulong min_exec_time = std::numeric_limits<cl_ulong>::max(); diff --git a/tests/framework/instruments/OpenCLTimer.cpp b/tests/framework/instruments/OpenCLTimer.cpp index 9743015cec..d9d16bc829 100644 --- a/tests/framework/instruments/OpenCLTimer.cpp +++ b/tests/framework/instruments/OpenCLTimer.cpp @@ -43,53 +43,6 @@ std::string OpenCLTimer::id() const return "OpenCLTimer"; } -/* Function to be used to intercept kernel enqueues and store their OpenCL Event */ -class Interceptor -{ -public: - explicit Interceptor(OpenCLTimer &timer) - : _timer(timer) - { - } - - cl_int operator()( - cl_command_queue command_queue, - cl_kernel kernel, - cl_uint work_dim, - const size_t *gwo, - const size_t *gws, - const size_t *lws, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) - { - ARM_COMPUTE_ERROR_ON_MSG(event != nullptr, "Not supported"); - ARM_COMPUTE_UNUSED(event); - - OpenCLTimer::kernel_info info; - cl::Kernel cpp_kernel(kernel, true); - std::stringstream ss; - ss << cpp_kernel.getInfo<CL_KERNEL_FUNCTION_NAME>(); - if(gws != nullptr) - { - ss << " GWS[" << gws[0] << "," << gws[1] << "," << gws[2] << "]"; - } - if(lws != nullptr) - { - ss << " LWS[" << lws[0] << "," << lws[1] << "," << lws[2] << "]"; - } - info.name = ss.str(); - cl_event tmp; - cl_int retval = _timer.real_function(command_queue, kernel, work_dim, gwo, gws, lws, num_events_in_wait_list, event_wait_list, &tmp); - info.event = tmp; - _timer.kernels.push_back(std::move(info)); - return retval; - } - -private: - OpenCLTimer &_timer; -}; - OpenCLTimer::OpenCLTimer(ScaleFactor scale_factor) : real_function(CLSymbols::get().clEnqueueNDRangeKernel_ptr) { @@ -127,7 +80,41 @@ void OpenCLTimer::start() { kernels.clear(); // Start intercepting enqueues: - CLSymbols::get().clEnqueueNDRangeKernel_ptr = Interceptor(*this); + auto interceptor = [this]( + cl_command_queue command_queue, + cl_kernel kernel, + cl_uint work_dim, + const size_t *gwo, + const size_t *gws, + const size_t *lws, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) + { + ARM_COMPUTE_ERROR_ON_MSG(event != nullptr, "Not supported"); + ARM_COMPUTE_UNUSED(event); + + OpenCLTimer::kernel_info info; + cl::Kernel cpp_kernel(kernel, true); + std::stringstream ss; + ss << cpp_kernel.getInfo<CL_KERNEL_FUNCTION_NAME>(); + if(gws != nullptr) + { + ss << " GWS[" << gws[0] << "," << gws[1] << "," << gws[2] << "]"; + } + if(lws != nullptr) + { + ss << " LWS[" << lws[0] << "," << lws[1] << "," << lws[2] << "]"; + } + info.name = ss.str(); + cl_event tmp; + cl_int retval = this->real_function(command_queue, kernel, work_dim, gwo, gws, lws, num_events_in_wait_list, event_wait_list, &tmp); + info.event = tmp; + this->kernels.push_back(std::move(info)); + return retval; + }; + + CLSymbols::get().clEnqueueNDRangeKernel_ptr = interceptor; } void OpenCLTimer::stop() |