diff options
Diffstat (limited to 'arm_compute/core')
-rw-r--r-- | arm_compute/core/CL/CLKernelLibrary.h | 29 | ||||
-rw-r--r-- | arm_compute/core/CL/ICLKernel.h | 17 | ||||
-rw-r--r-- | arm_compute/core/CL/OpenCL.h | 2 |
3 files changed, 24 insertions, 24 deletions
diff --git a/arm_compute/core/CL/CLKernelLibrary.h b/arm_compute/core/CL/CLKernelLibrary.h index 4d4565d6fd..fc131cdcfe 100644 --- a/arm_compute/core/CL/CLKernelLibrary.h +++ b/arm_compute/core/CL/CLKernelLibrary.h @@ -162,11 +162,9 @@ public: */ void init(std::string kernel_path = ".", cl::Context context = cl::Context::getDefault(), cl::Device device = cl::Device::getDefault()) { - _kernel_path = std::move(kernel_path); - _context = std::move(context); - _device = std::move(device); - _max_workgroup_size = 0; - max_local_workgroup_size(); + _kernel_path = std::move(kernel_path); + _context = std::move(context); + _device = std::move(device); } /** Sets the path that the kernels reside in. * @@ -208,20 +206,15 @@ public: { _device = cl_devices[0]; } - - _max_workgroup_size = 0; - max_local_workgroup_size(); - }; + } /** Sets the CL device for which the programs are created. * * @param[in] device A CL device. */ void set_device(cl::Device device) { - _device = std::move(device); - _max_workgroup_size = 0; - max_local_workgroup_size(); - }; + _device = std::move(device); + } /** Creates a kernel from the kernel library. * * @param[in] kernel_name Kernel name. @@ -238,15 +231,14 @@ public: * */ void load_binary(); - /** Find the maximum number of local work items in a workgroup can be supported by the device + /** Find the maximum number of local work items in a workgroup can be supported for the kernel. * */ - size_t max_local_workgroup_size(); - - /** Return the default NDRange that is suitable for the device. + size_t max_local_workgroup_size(const cl::Kernel &kernel) const; + /** Return the default NDRange for the device. * */ - cl::NDRange default_ndrange(); + cl::NDRange default_ndrange() const; private: /** Load program and its dependencies. @@ -270,7 +262,6 @@ private: static const std::map<std::string, std::string> _kernel_program_map; /**< Map that associates kernel names with programs. */ static const std::map<std::string, std::string> _program_source_map; /**< Contains sources for all programs. Used for compile-time kernel inclusion. >*/ - size_t _max_workgroup_size; /** Maximum local workgroup size supported on the device */ }; } #endif /* __ARM_COMPUTE_CLKERNELLIBRARY_H__ */ diff --git a/arm_compute/core/CL/ICLKernel.h b/arm_compute/core/CL/ICLKernel.h index d118d13f3f..9119940bc5 100644 --- a/arm_compute/core/CL/ICLKernel.h +++ b/arm_compute/core/CL/ICLKernel.h @@ -175,6 +175,12 @@ public: */ GPUTarget get_target() const; + /** Get the maximum workgroup size for the device the CLKernelLibrary uses. + * + * @return The maximum workgroup size value. + */ + size_t get_max_workgroup_size(); + private: /** Add the passed array's parameters to the object's kernel's arguments starting from the index idx. * @@ -208,10 +214,11 @@ private: unsigned int num_arguments_per_tensor() const; protected: - cl::Kernel _kernel; /**< OpenCL kernel to run */ - cl::NDRange _lws_hint; /**< Local workgroup size hint for the OpenCL kernel */ - GPUTarget _target; /**< The targeted GPU */ - std::string _config_id; /**< Configuration ID */ + cl::Kernel _kernel; /**< OpenCL kernel to run */ + cl::NDRange _lws_hint; /**< Local workgroup size hint for the OpenCL kernel */ + GPUTarget _target; /**< The targeted GPU */ + std::string _config_id; /**< Configuration ID */ + size_t _max_workgroup_size; /**< The maximum workgroup size for this kernel */ }; /** Add the kernel to the command queue with the given window. @@ -223,7 +230,7 @@ protected: * @param[in,out] queue OpenCL command queue. * @param[in] kernel Kernel to enqueue * @param[in] window Window the kernel has to process. - * @param[in] lws_hint Local workgroup size requested, by default (128,1) + * @param[in] lws_hint Local workgroup size requested, by default (128,1). * * @note If any dimension of the lws is greater than the global workgroup size then no lws will be passed. */ diff --git a/arm_compute/core/CL/OpenCL.h b/arm_compute/core/CL/OpenCL.h index 151cc9b53d..6780e23c2d 100644 --- a/arm_compute/core/CL/OpenCL.h +++ b/arm_compute/core/CL/OpenCL.h @@ -84,6 +84,7 @@ public: using clGetDeviceIDs_func = cl_int (*)(cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *); using clRetainEvent_func = cl_int (*)(cl_event); using clGetPlatformIDs_func = cl_int (*)(cl_uint, cl_platform_id *, cl_uint *); + using clGetKernelWorkGroupInfo_func = cl_int (*)(cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void *, size_t *); clBuildProgram_func clBuildProgram = nullptr; clEnqueueNDRangeKernel_func clEnqueueNDRangeKernel = nullptr; @@ -115,6 +116,7 @@ public: clGetDeviceIDs_func clGetDeviceIDs = nullptr; clRetainEvent_func clRetainEvent = nullptr; clGetPlatformIDs_func clGetPlatformIDs = nullptr; + clGetKernelWorkGroupInfo_func clGetKernelWorkGroupInfo = nullptr; private: std::pair<bool, bool> _loaded{ false, false }; |