aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/core/CL/CLKernelLibrary.cpp5
-rw-r--r--src/runtime/CL/CLTuner.cpp8
2 files changed, 9 insertions, 4 deletions
diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp
index 16bcd50d06..2b843e7785 100644
--- a/src/core/CL/CLKernelLibrary.cpp
+++ b/src/core/CL/CLKernelLibrary.cpp
@@ -1242,3 +1242,8 @@ std::string CLKernelLibrary::get_device_version()
{
return _device.getInfo<CL_DEVICE_VERSION>();
}
+
+cl_uint CLKernelLibrary::get_num_compute_units()
+{
+ return _device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
+} \ No newline at end of file
diff --git a/src/runtime/CL/CLTuner.cpp b/src/runtime/CL/CLTuner.cpp
index 2c3f9ce33e..a079503671 100644
--- a/src/runtime/CL/CLTuner.cpp
+++ b/src/runtime/CL/CLTuner.cpp
@@ -76,11 +76,11 @@ void CLTuner::tune_kernel_static(ICLKernel &kernel)
void CLTuner::tune_kernel_dynamic(ICLKernel &kernel)
{
- // Get the configuration ID from the kernel
- const std::string &config_id = kernel.config_id();
+ // Get the configuration ID from the kernel and append GPU target name and number of available compute units
+ const std::string config_id = kernel.config_id() + "_" + string_from_target(kernel.get_target()) + "_MP" + support::cpp11::to_string(CLKernelLibrary::get().get_num_compute_units());
- // Check if we need to find the Optimal LWS. If config_id is equal to default_config_id, the kernel does not require to be tuned
- if(config_id != arm_compute::default_config_id)
+ // Check if we need to find the Optimal LWS. If the kernel's config_id is equal to default_config_id, the kernel does not require to be tuned
+ if(kernel.config_id() != arm_compute::default_config_id)
{
auto p = _lws_table.find(config_id);