aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/CL
diff options
context:
space:
mode:
Diffstat (limited to 'arm_compute/core/CL')
-rw-r--r--arm_compute/core/CL/CLKernelLibrary.h29
-rw-r--r--arm_compute/core/CL/ICLKernel.h17
-rw-r--r--arm_compute/core/CL/OpenCL.h2
3 files changed, 24 insertions, 24 deletions
diff --git a/arm_compute/core/CL/CLKernelLibrary.h b/arm_compute/core/CL/CLKernelLibrary.h
index 4d4565d6fd..fc131cdcfe 100644
--- a/arm_compute/core/CL/CLKernelLibrary.h
+++ b/arm_compute/core/CL/CLKernelLibrary.h
@@ -162,11 +162,9 @@ public:
*/
void init(std::string kernel_path = ".", cl::Context context = cl::Context::getDefault(), cl::Device device = cl::Device::getDefault())
{
- _kernel_path = std::move(kernel_path);
- _context = std::move(context);
- _device = std::move(device);
- _max_workgroup_size = 0;
- max_local_workgroup_size();
+ _kernel_path = std::move(kernel_path);
+ _context = std::move(context);
+ _device = std::move(device);
}
/** Sets the path that the kernels reside in.
*
@@ -208,20 +206,15 @@ public:
{
_device = cl_devices[0];
}
-
- _max_workgroup_size = 0;
- max_local_workgroup_size();
- };
+ }
/** Sets the CL device for which the programs are created.
*
* @param[in] device A CL device.
*/
void set_device(cl::Device device)
{
- _device = std::move(device);
- _max_workgroup_size = 0;
- max_local_workgroup_size();
- };
+ _device = std::move(device);
+ }
/** Creates a kernel from the kernel library.
*
* @param[in] kernel_name Kernel name.
@@ -238,15 +231,14 @@ public:
*
*/
void load_binary();
- /** Find the maximum number of local work items in a workgroup can be supported by the device
+ /** Find the maximum number of local work items in a workgroup can be supported for the kernel.
*
*/
- size_t max_local_workgroup_size();
-
- /** Return the default NDRange that is suitable for the device.
+ size_t max_local_workgroup_size(const cl::Kernel &kernel) const;
+ /** Return the default NDRange for the device.
*
*/
- cl::NDRange default_ndrange();
+ cl::NDRange default_ndrange() const;
private:
/** Load program and its dependencies.
@@ -270,7 +262,6 @@ private:
static const std::map<std::string, std::string> _kernel_program_map; /**< Map that associates kernel names with programs. */
static const std::map<std::string, std::string> _program_source_map; /**< Contains sources for all programs.
Used for compile-time kernel inclusion. >*/
- size_t _max_workgroup_size; /** Maximum local workgroup size supported on the device */
};
}
#endif /* __ARM_COMPUTE_CLKERNELLIBRARY_H__ */
diff --git a/arm_compute/core/CL/ICLKernel.h b/arm_compute/core/CL/ICLKernel.h
index d118d13f3f..9119940bc5 100644
--- a/arm_compute/core/CL/ICLKernel.h
+++ b/arm_compute/core/CL/ICLKernel.h
@@ -175,6 +175,12 @@ public:
*/
GPUTarget get_target() const;
+ /** Get the maximum workgroup size for the device the CLKernelLibrary uses.
+ *
+ * @return The maximum workgroup size value.
+ */
+ size_t get_max_workgroup_size();
+
private:
/** Add the passed array's parameters to the object's kernel's arguments starting from the index idx.
*
@@ -208,10 +214,11 @@ private:
unsigned int num_arguments_per_tensor() const;
protected:
- cl::Kernel _kernel; /**< OpenCL kernel to run */
- cl::NDRange _lws_hint; /**< Local workgroup size hint for the OpenCL kernel */
- GPUTarget _target; /**< The targeted GPU */
- std::string _config_id; /**< Configuration ID */
+ cl::Kernel _kernel; /**< OpenCL kernel to run */
+ cl::NDRange _lws_hint; /**< Local workgroup size hint for the OpenCL kernel */
+ GPUTarget _target; /**< The targeted GPU */
+ std::string _config_id; /**< Configuration ID */
+ size_t _max_workgroup_size; /**< The maximum workgroup size for this kernel */
};
/** Add the kernel to the command queue with the given window.
@@ -223,7 +230,7 @@ protected:
* @param[in,out] queue OpenCL command queue.
* @param[in] kernel Kernel to enqueue
* @param[in] window Window the kernel has to process.
- * @param[in] lws_hint Local workgroup size requested, by default (128,1)
+ * @param[in] lws_hint Local workgroup size requested, by default (128,1).
*
* @note If any dimension of the lws is greater than the global workgroup size then no lws will be passed.
*/
diff --git a/arm_compute/core/CL/OpenCL.h b/arm_compute/core/CL/OpenCL.h
index 151cc9b53d..6780e23c2d 100644
--- a/arm_compute/core/CL/OpenCL.h
+++ b/arm_compute/core/CL/OpenCL.h
@@ -84,6 +84,7 @@ public:
using clGetDeviceIDs_func = cl_int (*)(cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *);
using clRetainEvent_func = cl_int (*)(cl_event);
using clGetPlatformIDs_func = cl_int (*)(cl_uint, cl_platform_id *, cl_uint *);
+ using clGetKernelWorkGroupInfo_func = cl_int (*)(cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void *, size_t *);
clBuildProgram_func clBuildProgram = nullptr;
clEnqueueNDRangeKernel_func clEnqueueNDRangeKernel = nullptr;
@@ -115,6 +116,7 @@ public:
clGetDeviceIDs_func clGetDeviceIDs = nullptr;
clRetainEvent_func clRetainEvent = nullptr;
clGetPlatformIDs_func clGetPlatformIDs = nullptr;
+ clGetKernelWorkGroupInfo_func clGetKernelWorkGroupInfo = nullptr;
private:
std::pair<bool, bool> _loaded{ false, false };