aboutsummaryrefslogtreecommitdiff
path: root/src/core
diff options
context:
space:
mode:
authorManuel Bottini <manuel.bottini@arm.com>2021-01-25 15:07:17 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2021-02-03 17:35:00 +0000
commitbe9f9f9139b759d314f4f2a6d2ee747079666504 (patch)
tree461690abb95caeaeca40261fd85816a906c8446c /src/core
parent7061eb283969f9a020c08349454447564e4dd5b3 (diff)
downloadComputeLibrary-be9f9f9139b759d314f4f2a6d2ee747079666504.tar.gz
Add WBSM tuning to CLTuner
Add WBSM as possible parameter to be tuned Add helper functions to check WBSM support and setting the value in the kernel Update tuning parameter lists to use WBSM Update CLTuner to use WBSM The WBSM tuning is exposed as a parameter to be set at compile time by setting the CLTuningInfo CLTuningInfo contains information about the tuning mode and if wbsm tuning enabled Resolves: COMPMID-3936 Change-Id: Id53697c9c6d2cef41c049f368002f6197351b3ed Signed-off-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4914 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/core')
-rw-r--r--src/core/CL/CLCompileContext.cpp19
-rw-r--r--src/core/CL/CLHelpers.cpp24
-rw-r--r--src/core/CL/CLKernelLibrary.cpp5
-rw-r--r--src/core/CL/ICLKernel.cpp11
-rw-r--r--src/core/CL/ICLKernel.h51
-rw-r--r--src/core/CL/OpenCL.cpp20
6 files changed, 110 insertions, 20 deletions
diff --git a/src/core/CL/CLCompileContext.cpp b/src/core/CL/CLCompileContext.cpp
index 0afb7e5e0e..3db0fe515a 100644
--- a/src/core/CL/CLCompileContext.cpp
+++ b/src/core/CL/CLCompileContext.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -137,15 +137,16 @@ Kernel::Kernel(std::string name, const cl::Program &program)
{
}
CLCompileContext::CLCompileContext()
- : _context(), _device(), _programs_map(), _built_programs_map()
+ : _context(), _device(), _programs_map(), _built_programs_map(), _is_wbsm_supported()
{
}
CLCompileContext::CLCompileContext(cl::Context context, const cl::Device &device)
- : _context(), _device(), _programs_map(), _built_programs_map()
+ : _context(), _device(), _programs_map(), _built_programs_map(), _is_wbsm_supported()
{
- _context = std::move(context);
- _device = CLDevice(device);
+ _context = std::move(context);
+ _device = CLDevice(device);
+ _is_wbsm_supported = get_wbsm_support_info(device);
}
Kernel CLCompileContext::create_kernel(const std::string &kernel_name, const std::string &program_name, const std::string &program_source,
@@ -318,7 +319,8 @@ const cl::Device &CLCompileContext::get_device() const
void CLCompileContext::set_device(cl::Device device)
{
- _device = std::move(device);
+ _device = std::move(device);
+ _is_wbsm_supported = get_wbsm_support_info(device);
}
cl::NDRange CLCompileContext::default_ndrange() const
@@ -346,6 +348,11 @@ bool CLCompileContext::int64_base_atomics_supported() const
return _device.supported("cl_khr_int64_base_atomics");
}
+bool CLCompileContext::is_wbsm_supported() const
+{
+ return _is_wbsm_supported;
+}
+
size_t CLCompileContext::max_local_workgroup_size(const cl::Kernel &kernel) const
{
size_t result;
diff --git a/src/core/CL/CLHelpers.cpp b/src/core/CL/CLHelpers.cpp
index 895bb72827..aff897738a 100644
--- a/src/core/CL/CLHelpers.cpp
+++ b/src/core/CL/CLHelpers.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -415,4 +415,26 @@ cl::NDRange create_lws_hint_parallel_implementations(unsigned int input_dimensio
const unsigned int num_of_threads = ((input_dimension + border_width) / 16);
return cl::NDRange(std::min(8U, num_of_threads));
}
+
+bool get_wbsm_support_info(const cl::Device &device)
+{
+ cl_bitfield capabilities = 0;
+ cl_int err = clGetDeviceInfo(device.get(), ARM_COMPUTE_LIBRARY_OPENCL_DEVICE_CAPABILITIES_ARM, sizeof(cl_bitfield), &capabilities, nullptr);
+ if((err == CL_SUCCESS) && (capabilities & ARM_COMPUTE_LIBRARY_OPENCL_EXEC_WBSM_ARM))
+ {
+ return true;
+ }
+ return false;
+}
+
+void set_wbsm(cl::Kernel &kernel, cl_int wbsm_hint)
+{
+ cl_int err = clSetKernelExecInfo(kernel.get(),
+ ARM_COMPUTE_LIBRARY_OPENCL_EXEC_WBSM_ARM,
+ sizeof(cl_int),
+ &wbsm_hint);
+ ARM_COMPUTE_UNUSED(err);
+ ARM_COMPUTE_ERROR_ON(err != CL_SUCCESS);
+}
+
} // namespace arm_compute
diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp
index cf1c52e463..75f76ea344 100644
--- a/src/core/CL/CLKernelLibrary.cpp
+++ b/src/core/CL/CLKernelLibrary.cpp
@@ -1206,6 +1206,11 @@ bool CLKernelLibrary::int64_base_atomics_supported() const
return _compile_context.int64_base_atomics_supported();
}
+bool CLKernelLibrary::is_wbsm_supported()
+{
+ return _compile_context.is_wbsm_supported();
+}
+
std::pair<std::string, bool> CLKernelLibrary::get_program(const std::string &program_name) const
{
#ifdef EMBEDDED_KERNELS
diff --git a/src/core/CL/ICLKernel.cpp b/src/core/CL/ICLKernel.cpp
index 2b259bf28a..1c6963f3f1 100644
--- a/src/core/CL/ICLKernel.cpp
+++ b/src/core/CL/ICLKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -29,8 +29,6 @@
#include <cstddef>
-using namespace arm_compute;
-
void arm_compute::enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint, bool use_dummy_work_items)
{
if(kernel.kernel()() == nullptr)
@@ -77,9 +75,15 @@ void arm_compute::enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Wind
lws = valid_lws;
}
+ if(CLKernelLibrary::get().is_wbsm_supported())
+ {
+ set_wbsm(kernel.kernel(), kernel.wbsm_hint());
+ }
queue.enqueueNDRangeKernel(kernel.kernel(), cl::NullRange, gws, lws);
}
+namespace arm_compute
+{
template <unsigned int dimension_size>
void ICLKernel::add_tensor_argument(unsigned &idx, const ICLTensor *tensor, const Window &window)
{
@@ -146,3 +150,4 @@ cl::NDRange ICLKernel::gws_from_window(const Window &window)
return gws;
}
+} // namespace arm_compute \ No newline at end of file
diff --git a/src/core/CL/ICLKernel.h b/src/core/CL/ICLKernel.h
index a24cd8c798..6737109f34 100644
--- a/src/core/CL/ICLKernel.h
+++ b/src/core/CL/ICLKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -31,6 +31,7 @@
#include "arm_compute/core/IKernel.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/experimental/Types.h"
+#include "arm_compute/runtime/CL/CLTuningParams.h"
#include <string>
@@ -67,19 +68,30 @@ private:
protected:
/** Configure the kernel's window and local workgroup size hint.
*
- * @param[in] window The maximum window which will be returned by window()
- * @param[in] lws_hint (Optional) Local-Workgroup-Size to use.
+ * @param[in] window The maximum window which will be returned by window()
+ * @param[in] lws_hint Local-Workgroup-Size to use.
+ * @param[in] wbsm_hint (Optional) Workgroup-Batch-Size-Modifier to use.
*/
- void configure_internal(const Window &window, cl::NDRange lws_hint = CLKernelLibrary::get().default_ndrange())
+ void configure_internal(const Window &window, cl::NDRange lws_hint, cl_int wbsm_hint = 0)
{
- _lws_hint = lws_hint;
+ configure_internal(window, CLTuningParams(lws_hint, wbsm_hint));
+ }
+
+ /** Configure the kernel's window and tuning parameters hints.
+ *
+ * @param[in] window The maximum window which will be returned by window()
+ * @param[in] tuning_params_hint (Optional) Tuning parameters to use.
+ */
+ void configure_internal(const Window &window, CLTuningParams tuning_params_hint = CLTuningParams(CLKernelLibrary::get().default_ndrange(), 0))
+ {
+ _tuning_params_hint = tuning_params_hint;
IKernel::configure(window);
}
public:
/** Constructor */
ICLKernel()
- : _kernel(nullptr), _target(GPUTarget::MIDGARD), _config_id(arm_compute::default_config_id), _max_workgroup_size(0), _lws_hint()
+ : _kernel(nullptr), _target(GPUTarget::MIDGARD), _config_id(arm_compute::default_config_id), _max_workgroup_size(0), _tuning_params_hint()
{
}
/** Returns a reference to the OpenCL kernel of this object.
@@ -254,7 +266,7 @@ public:
void set_lws_hint(const cl::NDRange &lws_hint)
{
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); // lws_hint will be overwritten by configure()
- _lws_hint = lws_hint;
+ _tuning_params_hint.set_lws(lws_hint);
}
/** Return the Local-Workgroup-Size hint
@@ -263,7 +275,28 @@ public:
*/
cl::NDRange lws_hint() const
{
- return _lws_hint;
+ return _tuning_params_hint.get_lws();
+ }
+
+ /** Set the workgroup batch size modifier hint
+ *
+ * @note This method should be called after the configuration of the kernel
+ *
+ * @param[in] wbsm_hint workgroup batch size modifier value
+ */
+ void set_wbsm_hint(const cl_int &wbsm_hint)
+ {
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); // wbsm_hint will be overwritten by configure()
+ _tuning_params_hint.set_wbsm(wbsm_hint);
+ }
+
+ /** Return the workgroup batch size modifier hint
+ *
+ * @return Current wbsm hint
+ */
+ cl_int wbsm_hint() const
+ {
+ return _tuning_params_hint.get_wbsm();
}
/** Get the configuration ID
@@ -344,7 +377,7 @@ protected:
std::string _config_id; /**< Configuration ID */
size_t _max_workgroup_size; /**< The maximum workgroup size for this kernel */
private:
- cl::NDRange _lws_hint; /**< Local workgroup size hint for the OpenCL kernel */
+ CLTuningParams _tuning_params_hint; /**< Tuning parameters hint for the OpenCL kernel */
};
/** Add the kernel to the command queue with the given window.
diff --git a/src/core/CL/OpenCL.cpp b/src/core/CL/OpenCL.cpp
index 6c70861946..aff6285697 100644
--- a/src/core/CL/OpenCL.cpp
+++ b/src/core/CL/OpenCL.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -134,6 +134,7 @@ bool CLSymbols::load(const std::string &library)
LOAD_FUNCTION_PTR(clEnqueueMarker, handle);
LOAD_FUNCTION_PTR(clWaitForEvents, handle);
LOAD_FUNCTION_PTR(clCreateImage, handle);
+ LOAD_FUNCTION_PTR(clSetKernelExecInfo, handle);
// Third-party extensions
LOAD_FUNCTION_PTR(clImportMemoryARM, handle);
@@ -962,6 +963,23 @@ clCreateImage(cl_context context,
}
}
+cl_int clSetKernelExecInfo(cl_kernel kernel,
+ cl_kernel_exec_info param_name,
+ size_t param_value_size,
+ const void *param_value)
+{
+ arm_compute::CLSymbols::get().load_default();
+ auto func = arm_compute::CLSymbols::get().clSetKernelExecInfo_ptr;
+ if(func != nullptr)
+ {
+ return func(kernel, param_name, param_value_size, param_value);
+ }
+ else
+ {
+ return CL_OUT_OF_RESOURCES;
+ }
+}
+
cl_mem
clImportMemoryARM(cl_context context,
cl_mem_flags flags,