aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/CL')
-rw-r--r--src/core/CL/CLCompileContext.cpp19
-rw-r--r--src/core/CL/CLHelpers.cpp24
-rw-r--r--src/core/CL/CLKernelLibrary.cpp5
-rw-r--r--src/core/CL/ICLKernel.cpp11
-rw-r--r--src/core/CL/ICLKernel.h51
-rw-r--r--src/core/CL/OpenCL.cpp20
6 files changed, 110 insertions, 20 deletions
diff --git a/src/core/CL/CLCompileContext.cpp b/src/core/CL/CLCompileContext.cpp
index 0afb7e5e0e..3db0fe515a 100644
--- a/src/core/CL/CLCompileContext.cpp
+++ b/src/core/CL/CLCompileContext.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -137,15 +137,16 @@ Kernel::Kernel(std::string name, const cl::Program &program)
{
}
CLCompileContext::CLCompileContext()
- : _context(), _device(), _programs_map(), _built_programs_map()
+ : _context(), _device(), _programs_map(), _built_programs_map(), _is_wbsm_supported()
{
}
CLCompileContext::CLCompileContext(cl::Context context, const cl::Device &device)
- : _context(), _device(), _programs_map(), _built_programs_map()
+ : _context(), _device(), _programs_map(), _built_programs_map(), _is_wbsm_supported()
{
- _context = std::move(context);
- _device = CLDevice(device);
+ _context = std::move(context);
+ _device = CLDevice(device);
+ _is_wbsm_supported = get_wbsm_support_info(device);
}
Kernel CLCompileContext::create_kernel(const std::string &kernel_name, const std::string &program_name, const std::string &program_source,
@@ -318,7 +319,8 @@ const cl::Device &CLCompileContext::get_device() const
void CLCompileContext::set_device(cl::Device device)
{
- _device = std::move(device);
+ _device = std::move(device);
+ _is_wbsm_supported = get_wbsm_support_info(device);
}
cl::NDRange CLCompileContext::default_ndrange() const
@@ -346,6 +348,11 @@ bool CLCompileContext::int64_base_atomics_supported() const
return _device.supported("cl_khr_int64_base_atomics");
}
+bool CLCompileContext::is_wbsm_supported() const
+{
+ return _is_wbsm_supported;
+}
+
size_t CLCompileContext::max_local_workgroup_size(const cl::Kernel &kernel) const
{
size_t result;
diff --git a/src/core/CL/CLHelpers.cpp b/src/core/CL/CLHelpers.cpp
index 895bb72827..aff897738a 100644
--- a/src/core/CL/CLHelpers.cpp
+++ b/src/core/CL/CLHelpers.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -415,4 +415,26 @@ cl::NDRange create_lws_hint_parallel_implementations(unsigned int input_dimensio
const unsigned int num_of_threads = ((input_dimension + border_width) / 16);
return cl::NDRange(std::min(8U, num_of_threads));
}
+
+bool get_wbsm_support_info(const cl::Device &device)
+{
+ cl_bitfield capabilities = 0;
+ cl_int err = clGetDeviceInfo(device.get(), ARM_COMPUTE_LIBRARY_OPENCL_DEVICE_CAPABILITIES_ARM, sizeof(cl_bitfield), &capabilities, nullptr);
+ if((err == CL_SUCCESS) && (capabilities & ARM_COMPUTE_LIBRARY_OPENCL_EXEC_WBSM_ARM))
+ {
+ return true;
+ }
+ return false;
+}
+
+void set_wbsm(cl::Kernel &kernel, cl_int wbsm_hint)
+{
+ cl_int err = clSetKernelExecInfo(kernel.get(),
+ ARM_COMPUTE_LIBRARY_OPENCL_EXEC_WBSM_ARM,
+ sizeof(cl_int),
+ &wbsm_hint);
+ ARM_COMPUTE_UNUSED(err);
+ ARM_COMPUTE_ERROR_ON(err != CL_SUCCESS);
+}
+
} // namespace arm_compute
diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp
index cf1c52e463..75f76ea344 100644
--- a/src/core/CL/CLKernelLibrary.cpp
+++ b/src/core/CL/CLKernelLibrary.cpp
@@ -1206,6 +1206,11 @@ bool CLKernelLibrary::int64_base_atomics_supported() const
return _compile_context.int64_base_atomics_supported();
}
+bool CLKernelLibrary::is_wbsm_supported()
+{
+ return _compile_context.is_wbsm_supported();
+}
+
std::pair<std::string, bool> CLKernelLibrary::get_program(const std::string &program_name) const
{
#ifdef EMBEDDED_KERNELS
diff --git a/src/core/CL/ICLKernel.cpp b/src/core/CL/ICLKernel.cpp
index 2b259bf28a..1c6963f3f1 100644
--- a/src/core/CL/ICLKernel.cpp
+++ b/src/core/CL/ICLKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -29,8 +29,6 @@
#include <cstddef>
-using namespace arm_compute;
-
void arm_compute::enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint, bool use_dummy_work_items)
{
if(kernel.kernel()() == nullptr)
@@ -77,9 +75,15 @@ void arm_compute::enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Wind
lws = valid_lws;
}
+ if(CLKernelLibrary::get().is_wbsm_supported())
+ {
+ set_wbsm(kernel.kernel(), kernel.wbsm_hint());
+ }
queue.enqueueNDRangeKernel(kernel.kernel(), cl::NullRange, gws, lws);
}
+namespace arm_compute
+{
template <unsigned int dimension_size>
void ICLKernel::add_tensor_argument(unsigned &idx, const ICLTensor *tensor, const Window &window)
{
@@ -146,3 +150,4 @@ cl::NDRange ICLKernel::gws_from_window(const Window &window)
return gws;
}
+} // namespace arm_compute \ No newline at end of file
diff --git a/src/core/CL/ICLKernel.h b/src/core/CL/ICLKernel.h
index a24cd8c798..6737109f34 100644
--- a/src/core/CL/ICLKernel.h
+++ b/src/core/CL/ICLKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -31,6 +31,7 @@
#include "arm_compute/core/IKernel.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/experimental/Types.h"
+#include "arm_compute/runtime/CL/CLTuningParams.h"
#include <string>
@@ -67,19 +68,30 @@ private:
protected:
/** Configure the kernel's window and local workgroup size hint.
*
- * @param[in] window The maximum window which will be returned by window()
- * @param[in] lws_hint (Optional) Local-Workgroup-Size to use.
+ * @param[in] window The maximum window which will be returned by window()
+ * @param[in] lws_hint Local-Workgroup-Size to use.
+ * @param[in] wbsm_hint (Optional) Workgroup-Batch-Size-Modifier to use.
*/
- void configure_internal(const Window &window, cl::NDRange lws_hint = CLKernelLibrary::get().default_ndrange())
+ void configure_internal(const Window &window, cl::NDRange lws_hint, cl_int wbsm_hint = 0)
{
- _lws_hint = lws_hint;
+ configure_internal(window, CLTuningParams(lws_hint, wbsm_hint));
+ }
+
+ /** Configure the kernel's window and tuning parameters hints.
+ *
+ * @param[in] window The maximum window which will be returned by window()
+ * @param[in] tuning_params_hint (Optional) Tuning parameters to use.
+ */
+ void configure_internal(const Window &window, CLTuningParams tuning_params_hint = CLTuningParams(CLKernelLibrary::get().default_ndrange(), 0))
+ {
+ _tuning_params_hint = tuning_params_hint;
IKernel::configure(window);
}
public:
/** Constructor */
ICLKernel()
- : _kernel(nullptr), _target(GPUTarget::MIDGARD), _config_id(arm_compute::default_config_id), _max_workgroup_size(0), _lws_hint()
+ : _kernel(nullptr), _target(GPUTarget::MIDGARD), _config_id(arm_compute::default_config_id), _max_workgroup_size(0), _tuning_params_hint()
{
}
/** Returns a reference to the OpenCL kernel of this object.
@@ -254,7 +266,7 @@ public:
void set_lws_hint(const cl::NDRange &lws_hint)
{
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); // lws_hint will be overwritten by configure()
- _lws_hint = lws_hint;
+ _tuning_params_hint.set_lws(lws_hint);
}
/** Return the Local-Workgroup-Size hint
@@ -263,7 +275,28 @@ public:
*/
cl::NDRange lws_hint() const
{
- return _lws_hint;
+ return _tuning_params_hint.get_lws();
+ }
+
+ /** Set the workgroup batch size modifier hint
+ *
+ * @note This method should be called after the configuration of the kernel
+ *
+ * @param[in] wbsm_hint workgroup batch size modifier value
+ */
+ void set_wbsm_hint(const cl_int &wbsm_hint)
+ {
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); // wbsm_hint will be overwritten by configure()
+ _tuning_params_hint.set_wbsm(wbsm_hint);
+ }
+
+ /** Return the workgroup batch size modifier hint
+ *
+ * @return Current wbsm hint
+ */
+ cl_int wbsm_hint() const
+ {
+ return _tuning_params_hint.get_wbsm();
}
/** Get the configuration ID
@@ -344,7 +377,7 @@ protected:
std::string _config_id; /**< Configuration ID */
size_t _max_workgroup_size; /**< The maximum workgroup size for this kernel */
private:
- cl::NDRange _lws_hint; /**< Local workgroup size hint for the OpenCL kernel */
+ CLTuningParams _tuning_params_hint; /**< Tuning parameters hint for the OpenCL kernel */
};
/** Add the kernel to the command queue with the given window.
diff --git a/src/core/CL/OpenCL.cpp b/src/core/CL/OpenCL.cpp
index 6c70861946..aff6285697 100644
--- a/src/core/CL/OpenCL.cpp
+++ b/src/core/CL/OpenCL.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -134,6 +134,7 @@ bool CLSymbols::load(const std::string &library)
LOAD_FUNCTION_PTR(clEnqueueMarker, handle);
LOAD_FUNCTION_PTR(clWaitForEvents, handle);
LOAD_FUNCTION_PTR(clCreateImage, handle);
+ LOAD_FUNCTION_PTR(clSetKernelExecInfo, handle);
// Third-party extensions
LOAD_FUNCTION_PTR(clImportMemoryARM, handle);
@@ -962,6 +963,23 @@ clCreateImage(cl_context context,
}
}
+cl_int clSetKernelExecInfo(cl_kernel kernel,
+ cl_kernel_exec_info param_name,
+ size_t param_value_size,
+ const void *param_value)
+{
+ arm_compute::CLSymbols::get().load_default();
+ auto func = arm_compute::CLSymbols::get().clSetKernelExecInfo_ptr;
+ if(func != nullptr)
+ {
+ return func(kernel, param_name, param_value_size, param_value);
+ }
+ else
+ {
+ return CL_OUT_OF_RESOURCES;
+ }
+}
+
cl_mem
clImportMemoryARM(cl_context context,
cl_mem_flags flags,