diff options
Diffstat (limited to 'src/core')
-rw-r--r-- | src/core/CL/CLCompileContext.cpp | 19 | ||||
-rw-r--r-- | src/core/CL/CLHelpers.cpp | 24 | ||||
-rw-r--r-- | src/core/CL/CLKernelLibrary.cpp | 5 | ||||
-rw-r--r-- | src/core/CL/ICLKernel.cpp | 11 | ||||
-rw-r--r-- | src/core/CL/ICLKernel.h | 51 | ||||
-rw-r--r-- | src/core/CL/OpenCL.cpp | 20 |
6 files changed, 110 insertions, 20 deletions
diff --git a/src/core/CL/CLCompileContext.cpp b/src/core/CL/CLCompileContext.cpp index 0afb7e5e0e..3db0fe515a 100644 --- a/src/core/CL/CLCompileContext.cpp +++ b/src/core/CL/CLCompileContext.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 Arm Limited. + * Copyright (c) 2020-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -137,15 +137,16 @@ Kernel::Kernel(std::string name, const cl::Program &program) { } CLCompileContext::CLCompileContext() - : _context(), _device(), _programs_map(), _built_programs_map() + : _context(), _device(), _programs_map(), _built_programs_map(), _is_wbsm_supported() { } CLCompileContext::CLCompileContext(cl::Context context, const cl::Device &device) - : _context(), _device(), _programs_map(), _built_programs_map() + : _context(), _device(), _programs_map(), _built_programs_map(), _is_wbsm_supported() { - _context = std::move(context); - _device = CLDevice(device); + _context = std::move(context); + _device = CLDevice(device); + _is_wbsm_supported = get_wbsm_support_info(device); } Kernel CLCompileContext::create_kernel(const std::string &kernel_name, const std::string &program_name, const std::string &program_source, @@ -318,7 +319,8 @@ const cl::Device &CLCompileContext::get_device() const void CLCompileContext::set_device(cl::Device device) { - _device = std::move(device); + _device = std::move(device); + _is_wbsm_supported = get_wbsm_support_info(device); } cl::NDRange CLCompileContext::default_ndrange() const @@ -346,6 +348,11 @@ bool CLCompileContext::int64_base_atomics_supported() const return _device.supported("cl_khr_int64_base_atomics"); } +bool CLCompileContext::is_wbsm_supported() const +{ + return _is_wbsm_supported; +} + size_t CLCompileContext::max_local_workgroup_size(const cl::Kernel &kernel) const { size_t result; diff --git a/src/core/CL/CLHelpers.cpp b/src/core/CL/CLHelpers.cpp index 895bb72827..aff897738a 100644 --- a/src/core/CL/CLHelpers.cpp +++ b/src/core/CL/CLHelpers.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -415,4 +415,26 @@ cl::NDRange create_lws_hint_parallel_implementations(unsigned int input_dimensio const unsigned int num_of_threads = ((input_dimension + border_width) / 16); return cl::NDRange(std::min(8U, num_of_threads)); } + +bool get_wbsm_support_info(const cl::Device &device) +{ + cl_bitfield capabilities = 0; + cl_int err = clGetDeviceInfo(device.get(), ARM_COMPUTE_LIBRARY_OPENCL_DEVICE_CAPABILITIES_ARM, sizeof(cl_bitfield), &capabilities, nullptr); + if((err == CL_SUCCESS) && (capabilities & ARM_COMPUTE_LIBRARY_OPENCL_EXEC_WBSM_ARM)) + { + return true; + } + return false; +} + +void set_wbsm(cl::Kernel &kernel, cl_int wbsm_hint) +{ + cl_int err = clSetKernelExecInfo(kernel.get(), + ARM_COMPUTE_LIBRARY_OPENCL_EXEC_WBSM_ARM, + sizeof(cl_int), + &wbsm_hint); + ARM_COMPUTE_UNUSED(err); + ARM_COMPUTE_ERROR_ON(err != CL_SUCCESS); +} + } // namespace arm_compute diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp index cf1c52e463..75f76ea344 100644 --- a/src/core/CL/CLKernelLibrary.cpp +++ b/src/core/CL/CLKernelLibrary.cpp @@ -1206,6 +1206,11 @@ bool CLKernelLibrary::int64_base_atomics_supported() const return _compile_context.int64_base_atomics_supported(); } +bool CLKernelLibrary::is_wbsm_supported() +{ + return _compile_context.is_wbsm_supported(); +} + std::pair<std::string, bool> CLKernelLibrary::get_program(const std::string &program_name) const { #ifdef EMBEDDED_KERNELS diff --git a/src/core/CL/ICLKernel.cpp b/src/core/CL/ICLKernel.cpp index 2b259bf28a..1c6963f3f1 100644 --- a/src/core/CL/ICLKernel.cpp +++ b/src/core/CL/ICLKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,8 +29,6 @@ #include <cstddef> -using namespace arm_compute; - void arm_compute::enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint, bool use_dummy_work_items) { if(kernel.kernel()() == nullptr) @@ -77,9 +75,15 @@ void arm_compute::enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Wind lws = valid_lws; } + if(CLKernelLibrary::get().is_wbsm_supported()) + { + set_wbsm(kernel.kernel(), kernel.wbsm_hint()); + } queue.enqueueNDRangeKernel(kernel.kernel(), cl::NullRange, gws, lws); } +namespace arm_compute +{ template <unsigned int dimension_size> void ICLKernel::add_tensor_argument(unsigned &idx, const ICLTensor *tensor, const Window &window) { @@ -146,3 +150,4 @@ cl::NDRange ICLKernel::gws_from_window(const Window &window) return gws; } +} // namespace arm_compute
\ No newline at end of file diff --git a/src/core/CL/ICLKernel.h b/src/core/CL/ICLKernel.h index a24cd8c798..6737109f34 100644 --- a/src/core/CL/ICLKernel.h +++ b/src/core/CL/ICLKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -31,6 +31,7 @@ #include "arm_compute/core/IKernel.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/experimental/Types.h" +#include "arm_compute/runtime/CL/CLTuningParams.h" #include <string> @@ -67,19 +68,30 @@ private: protected: /** Configure the kernel's window and local workgroup size hint. * - * @param[in] window The maximum window which will be returned by window() - * @param[in] lws_hint (Optional) Local-Workgroup-Size to use. + * @param[in] window The maximum window which will be returned by window() + * @param[in] lws_hint Local-Workgroup-Size to use. + * @param[in] wbsm_hint (Optional) Workgroup-Batch-Size-Modifier to use. */ - void configure_internal(const Window &window, cl::NDRange lws_hint = CLKernelLibrary::get().default_ndrange()) + void configure_internal(const Window &window, cl::NDRange lws_hint, cl_int wbsm_hint = 0) { - _lws_hint = lws_hint; + configure_internal(window, CLTuningParams(lws_hint, wbsm_hint)); + } + + /** Configure the kernel's window and tuning parameters hints. + * + * @param[in] window The maximum window which will be returned by window() + * @param[in] tuning_params_hint (Optional) Tuning parameters to use. + */ + void configure_internal(const Window &window, CLTuningParams tuning_params_hint = CLTuningParams(CLKernelLibrary::get().default_ndrange(), 0)) + { + _tuning_params_hint = tuning_params_hint; IKernel::configure(window); } public: /** Constructor */ ICLKernel() - : _kernel(nullptr), _target(GPUTarget::MIDGARD), _config_id(arm_compute::default_config_id), _max_workgroup_size(0), _lws_hint() + : _kernel(nullptr), _target(GPUTarget::MIDGARD), _config_id(arm_compute::default_config_id), _max_workgroup_size(0), _tuning_params_hint() { } /** Returns a reference to the OpenCL kernel of this object. @@ -254,7 +266,7 @@ public: void set_lws_hint(const cl::NDRange &lws_hint) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); // lws_hint will be overwritten by configure() - _lws_hint = lws_hint; + _tuning_params_hint.set_lws(lws_hint); } /** Return the Local-Workgroup-Size hint @@ -263,7 +275,28 @@ public: */ cl::NDRange lws_hint() const { - return _lws_hint; + return _tuning_params_hint.get_lws(); + } + + /** Set the workgroup batch size modifier hint + * + * @note This method should be called after the configuration of the kernel + * + * @param[in] wbsm_hint workgroup batch size modifier value + */ + void set_wbsm_hint(const cl_int &wbsm_hint) + { + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); // wbsm_hint will be overwritten by configure() + _tuning_params_hint.set_wbsm(wbsm_hint); + } + + /** Return the workgroup batch size modifier hint + * + * @return Current wbsm hint + */ + cl_int wbsm_hint() const + { + return _tuning_params_hint.get_wbsm(); } /** Get the configuration ID @@ -344,7 +377,7 @@ protected: std::string _config_id; /**< Configuration ID */ size_t _max_workgroup_size; /**< The maximum workgroup size for this kernel */ private: - cl::NDRange _lws_hint; /**< Local workgroup size hint for the OpenCL kernel */ + CLTuningParams _tuning_params_hint; /**< Tuning parameters hint for the OpenCL kernel */ }; /** Add the kernel to the command queue with the given window. diff --git a/src/core/CL/OpenCL.cpp b/src/core/CL/OpenCL.cpp index 6c70861946..aff6285697 100644 --- a/src/core/CL/OpenCL.cpp +++ b/src/core/CL/OpenCL.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -134,6 +134,7 @@ bool CLSymbols::load(const std::string &library) LOAD_FUNCTION_PTR(clEnqueueMarker, handle); LOAD_FUNCTION_PTR(clWaitForEvents, handle); LOAD_FUNCTION_PTR(clCreateImage, handle); + LOAD_FUNCTION_PTR(clSetKernelExecInfo, handle); // Third-party extensions LOAD_FUNCTION_PTR(clImportMemoryARM, handle); @@ -962,6 +963,23 @@ clCreateImage(cl_context context, } } +cl_int clSetKernelExecInfo(cl_kernel kernel, + cl_kernel_exec_info param_name, + size_t param_value_size, + const void *param_value) +{ + arm_compute::CLSymbols::get().load_default(); + auto func = arm_compute::CLSymbols::get().clSetKernelExecInfo_ptr; + if(func != nullptr) + { + return func(kernel, param_name, param_value_size, param_value); + } + else + { + return CL_OUT_OF_RESOURCES; + } +} + cl_mem clImportMemoryARM(cl_context context, cl_mem_flags flags, |