aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/CL/CLKernelLibrary.h
diff options
context:
space:
mode:
authorAbel Bernabeu <abel.bernabeu@arm.com>2017-09-28 09:53:45 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:35:24 +0000
commit5a6e0532b39d674f8773014a0a553d9bc70a9baa (patch)
tree5cfb228a11ed903f9e2872dc86d5cd1fdf1edc08 /arm_compute/core/CL/CLKernelLibrary.h
parent53b405f1e08ad41cb9a527abfe0308ec1edf18ff (diff)
downloadComputeLibrary-5a6e0532b39d674f8773014a0a553d9bc70a9baa.tar.gz
COMPUTE-8024 Fixed the maximum OpenCL workgroup size
The maximum workgroup size depends on the kernel and the device, rather than being a property of the device. The present patch fixes the case when a kernel is queued with no workgroup size and the default workgroup size is used instead. A previous patch introduced a maximum workgroup size that depended on the device but ignored the kernel. In OpenCL the maximum workgroup size we query from the device is an upper bound of the actual maximum that we can query for a given kernel running on the same device. For some kernels the values will match, but for others we will get a lower value when querying for an specific kernel (i.e. if the kernel uses a high number of registers). Change-Id: I3bed6bde80ddc4f0ddb8f82c80903774aa1999b6 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/89471 Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'arm_compute/core/CL/CLKernelLibrary.h')
-rw-r--r--arm_compute/core/CL/CLKernelLibrary.h29
1 files changed, 10 insertions, 19 deletions
diff --git a/arm_compute/core/CL/CLKernelLibrary.h b/arm_compute/core/CL/CLKernelLibrary.h
index 4d4565d6fd..fc131cdcfe 100644
--- a/arm_compute/core/CL/CLKernelLibrary.h
+++ b/arm_compute/core/CL/CLKernelLibrary.h
@@ -162,11 +162,9 @@ public:
*/
void init(std::string kernel_path = ".", cl::Context context = cl::Context::getDefault(), cl::Device device = cl::Device::getDefault())
{
- _kernel_path = std::move(kernel_path);
- _context = std::move(context);
- _device = std::move(device);
- _max_workgroup_size = 0;
- max_local_workgroup_size();
+ _kernel_path = std::move(kernel_path);
+ _context = std::move(context);
+ _device = std::move(device);
}
/** Sets the path that the kernels reside in.
*
@@ -208,20 +206,15 @@ public:
{
_device = cl_devices[0];
}
-
- _max_workgroup_size = 0;
- max_local_workgroup_size();
- };
+ }
/** Sets the CL device for which the programs are created.
*
* @param[in] device A CL device.
*/
void set_device(cl::Device device)
{
- _device = std::move(device);
- _max_workgroup_size = 0;
- max_local_workgroup_size();
- };
+ _device = std::move(device);
+ }
/** Creates a kernel from the kernel library.
*
* @param[in] kernel_name Kernel name.
@@ -238,15 +231,14 @@ public:
*
*/
void load_binary();
- /** Find the maximum number of local work items in a workgroup can be supported by the device
+ /** Find the maximum number of local work items in a workgroup can be supported for the kernel.
*
*/
- size_t max_local_workgroup_size();
-
- /** Return the default NDRange that is suitable for the device.
+ size_t max_local_workgroup_size(const cl::Kernel &kernel) const;
+ /** Return the default NDRange for the device.
*
*/
- cl::NDRange default_ndrange();
+ cl::NDRange default_ndrange() const;
private:
/** Load program and its dependencies.
@@ -270,7 +262,6 @@ private:
static const std::map<std::string, std::string> _kernel_program_map; /**< Map that associates kernel names with programs. */
static const std::map<std::string, std::string> _program_source_map; /**< Contains sources for all programs.
Used for compile-time kernel inclusion. >*/
- size_t _max_workgroup_size; /** Maximum local workgroup size supported on the device */
};
}
#endif /* __ARM_COMPUTE_CLKERNELLIBRARY_H__ */