From 5a6e0532b39d674f8773014a0a553d9bc70a9baa Mon Sep 17 00:00:00 2001 From: Abel Bernabeu Date: Thu, 28 Sep 2017 09:53:45 +0100 Subject: COMPUTE-8024 Fixed the maximum OpenCL workgroup size The maximum workgroup size depends on the kernel and the device, rather than being a property of the device. The present patch fixes the case when a kernel is queued with no workgroup size and the default workgroup size is used instead. A previous patch introduced a maximum workgroup size that depended on the device but ignored the kernel. In OpenCL the maximum workgroup size we query from the device is an upper bound of the actual maximum that we can query for a given kernel running on the same device. For some kernels the values will match, but for others we will get a lower value when querying for an specific kernel (i.e. if the kernel uses a high number of registers). Change-Id: I3bed6bde80ddc4f0ddb8f82c80903774aa1999b6 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/89471 Tested-by: Kaizen Reviewed-by: Anthony Barbier --- arm_compute/core/CL/OpenCL.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arm_compute/core/CL/OpenCL.h') diff --git a/arm_compute/core/CL/OpenCL.h b/arm_compute/core/CL/OpenCL.h index 151cc9b53d..6780e23c2d 100644 --- a/arm_compute/core/CL/OpenCL.h +++ b/arm_compute/core/CL/OpenCL.h @@ -84,6 +84,7 @@ public: using clGetDeviceIDs_func = cl_int (*)(cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *); using clRetainEvent_func = cl_int (*)(cl_event); using clGetPlatformIDs_func = cl_int (*)(cl_uint, cl_platform_id *, cl_uint *); + using clGetKernelWorkGroupInfo_func = cl_int (*)(cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void *, size_t *); clBuildProgram_func clBuildProgram = nullptr; clEnqueueNDRangeKernel_func clEnqueueNDRangeKernel = nullptr; @@ -115,6 +116,7 @@ public: clGetDeviceIDs_func clGetDeviceIDs = nullptr; clRetainEvent_func clRetainEvent = nullptr; clGetPlatformIDs_func clGetPlatformIDs = nullptr; + clGetKernelWorkGroupInfo_func clGetKernelWorkGroupInfo = nullptr; private: std::pair _loaded{ false, false }; -- cgit v1.2.1