aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/OpenCL.cpp
diff options
context:
space:
mode:
authorAbel Bernabeu <abel.bernabeu@arm.com>2017-09-28 09:53:45 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:35:24 +0000
commit5a6e0532b39d674f8773014a0a553d9bc70a9baa (patch)
tree5cfb228a11ed903f9e2872dc86d5cd1fdf1edc08 /src/core/CL/OpenCL.cpp
parent53b405f1e08ad41cb9a527abfe0308ec1edf18ff (diff)
downloadComputeLibrary-5a6e0532b39d674f8773014a0a553d9bc70a9baa.tar.gz
COMPUTE-8024 Fixed the maximum OpenCL workgroup size
The maximum workgroup size depends on the kernel and the device, rather than being a property of the device. The present patch fixes the case when a kernel is queued with no workgroup size and the default workgroup size is used instead. A previous patch introduced a maximum workgroup size that depended on the device but ignored the kernel. In OpenCL the maximum workgroup size we query from the device is an upper bound of the actual maximum that we can query for a given kernel running on the same device. For some kernels the values will match, but for others we will get a lower value when querying for an specific kernel (i.e. if the kernel uses a high number of registers). Change-Id: I3bed6bde80ddc4f0ddb8f82c80903774aa1999b6 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/89471 Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/CL/OpenCL.cpp')
-rw-r--r--src/core/CL/OpenCL.cpp21
1 files changed, 21 insertions, 0 deletions
diff --git a/src/core/CL/OpenCL.cpp b/src/core/CL/OpenCL.cpp
index cc2391977b..287c5e2f5a 100644
--- a/src/core/CL/OpenCL.cpp
+++ b/src/core/CL/OpenCL.cpp
@@ -101,6 +101,7 @@ bool CLSymbols::load(const std::string &library)
clGetDeviceIDs = reinterpret_cast<clGetDeviceIDs_func>(dlsym(handle, "clGetDeviceIDs"));
clRetainEvent = reinterpret_cast<clRetainEvent_func>(dlsym(handle, "clRetainEvent"));
clGetPlatformIDs = reinterpret_cast<clGetPlatformIDs_func>(dlsym(handle, "clGetPlatformIDs"));
+ clGetKernelWorkGroupInfo = reinterpret_cast<clGetKernelWorkGroupInfo_func>(dlsym(handle, "clGetKernelWorkGroupInfo"));
dlclose(handle);
@@ -647,3 +648,23 @@ cl_int clGetPlatformIDs(cl_uint num_entries, cl_platform_id *platforms, cl_uint
return CL_OUT_OF_RESOURCES;
}
}
+
+cl_int
+clGetKernelWorkGroupInfo(cl_kernel kernel,
+ cl_device_id device,
+ cl_kernel_work_group_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret)
+{
+ arm_compute::CLSymbols::get().load_default();
+ auto func = arm_compute::CLSymbols::get().clGetKernelWorkGroupInfo;
+ if(func != nullptr)
+ {
+ return func(kernel, device, param_name, param_value_size, param_value, param_value_size_ret);
+ }
+ else
+ {
+ return CL_OUT_OF_RESOURCES;
+ }
+}