aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arm_compute/core/CL/CLKernelLibrary.h5
-rw-r--r--src/core/CL/CLKernelLibrary.cpp5
-rw-r--r--src/runtime/CL/CLTuner.cpp8
3 files changed, 14 insertions, 4 deletions
diff --git a/arm_compute/core/CL/CLKernelLibrary.h b/arm_compute/core/CL/CLKernelLibrary.h
index 741e47c65f..9f183f1232 100644
--- a/arm_compute/core/CL/CLKernelLibrary.h
+++ b/arm_compute/core/CL/CLKernelLibrary.h
@@ -297,6 +297,11 @@ public:
* @return The content of CL_DEVICE_VERSION
*/
std::string get_device_version();
+ /** Return the maximum number of compute units in the device
+ *
+ * @return The content of CL_DEVICE_MAX_COMPUTE_UNITS
+ */
+ cl_uint get_num_compute_units();
/** Creates a kernel from the kernel library.
*
* @param[in] kernel_name Kernel name.
diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp
index 16bcd50d06..2b843e7785 100644
--- a/src/core/CL/CLKernelLibrary.cpp
+++ b/src/core/CL/CLKernelLibrary.cpp
@@ -1242,3 +1242,8 @@ std::string CLKernelLibrary::get_device_version()
{
return _device.getInfo<CL_DEVICE_VERSION>();
}
+
+cl_uint CLKernelLibrary::get_num_compute_units()
+{
+ return _device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
+} \ No newline at end of file
diff --git a/src/runtime/CL/CLTuner.cpp b/src/runtime/CL/CLTuner.cpp
index 2c3f9ce33e..a079503671 100644
--- a/src/runtime/CL/CLTuner.cpp
+++ b/src/runtime/CL/CLTuner.cpp
@@ -76,11 +76,11 @@ void CLTuner::tune_kernel_static(ICLKernel &kernel)
void CLTuner::tune_kernel_dynamic(ICLKernel &kernel)
{
- // Get the configuration ID from the kernel
- const std::string &config_id = kernel.config_id();
+ // Get the configuration ID from the kernel and append GPU target name and number of available compute units
+ const std::string config_id = kernel.config_id() + "_" + string_from_target(kernel.get_target()) + "_MP" + support::cpp11::to_string(CLKernelLibrary::get().get_num_compute_units());
- // Check if we need to find the Optimal LWS. If config_id is equal to default_config_id, the kernel does not require to be tuned
- if(config_id != arm_compute::default_config_id)
+ // Check if we need to find the Optimal LWS. If the kernel's config_id is equal to default_config_id, the kernel does not require to be tuned
+ if(kernel.config_id() != arm_compute::default_config_id)
{
auto p = _lws_table.find(config_id);