aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/ICLKernel.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/CL/ICLKernel.h')
-rw-r--r--src/core/CL/ICLKernel.h52
1 files changed, 47 insertions, 5 deletions
diff --git a/src/core/CL/ICLKernel.h b/src/core/CL/ICLKernel.h
index 6737109f34..ae3077a564 100644
--- a/src/core/CL/ICLKernel.h
+++ b/src/core/CL/ICLKernel.h
@@ -37,6 +37,26 @@
namespace arm_compute
{
+namespace
+{
+bool is_same_lws(cl::NDRange lws0, cl::NDRange lws1)
+{
+ if(lws0.dimensions() != lws1.dimensions())
+ {
+ return false;
+ }
+
+ for(size_t i = 0; i < lws0.dimensions(); ++i)
+ {
+ if(lws0.get()[i] != lws1.get()[i])
+ {
+ return false;
+ }
+ }
+
+ return true;
+}
+} // namespace
template <typename T>
class ICLArray;
class ICLTensor;
@@ -64,6 +84,13 @@ private:
{
return 2 + 2 * dimension_size;
}
+
+ cl::NDRange default_lws_tune(const Window &window)
+ {
+ ARM_COMPUTE_UNUSED(window);
+ return CLKernelLibrary::get().default_ndrange();
+ }
+
using IKernel::configure; //Prevent children from calling IKernel::configure() directly
protected:
/** Configure the kernel's window and local workgroup size hint.
@@ -85,13 +112,19 @@ protected:
void configure_internal(const Window &window, CLTuningParams tuning_params_hint = CLTuningParams(CLKernelLibrary::get().default_ndrange(), 0))
{
_tuning_params_hint = tuning_params_hint;
+
+ if(is_same_lws(_tuning_params_hint.get_lws(), CLKernelLibrary::get().default_ndrange()))
+ {
+ _tuning_params_hint.set_lws(default_lws_tune(window));
+ }
+
IKernel::configure(window);
}
public:
/** Constructor */
ICLKernel()
- : _kernel(nullptr), _target(GPUTarget::MIDGARD), _config_id(arm_compute::default_config_id), _max_workgroup_size(0), _tuning_params_hint()
+ : _kernel(nullptr), _target(GPUTarget::MIDGARD), _config_id(arm_compute::default_config_id), _max_workgroup_size(0), _type(CLKernelType::UNKNOWN), _tuning_params_hint()
{
}
/** Returns a reference to the OpenCL kernel of this object.
@@ -102,6 +135,14 @@ public:
{
return _kernel;
}
+ /** Returns the CL kernel type
+ *
+ * @return The CL kernel type
+ */
+ CLKernelType type() const
+ {
+ return _type;
+ }
/** Add the passed 1D array's parameters to the object's kernel's arguments starting from the index idx.
*
* @param[in,out] idx Index at which to start adding the array's arguments. Will be incremented by the number of kernel arguments set.
@@ -372,10 +413,11 @@ private:
void add_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window);
protected:
- cl::Kernel _kernel; /**< OpenCL kernel to run */
- GPUTarget _target; /**< The targeted GPU */
- std::string _config_id; /**< Configuration ID */
- size_t _max_workgroup_size; /**< The maximum workgroup size for this kernel */
+ cl::Kernel _kernel; /**< OpenCL kernel to run */
+ GPUTarget _target; /**< The targeted GPU */
+ std::string _config_id; /**< Configuration ID */
+ size_t _max_workgroup_size; /**< The maximum workgroup size for this kernel */
+ CLKernelType _type; /**< The CL kernel type */
private:
CLTuningParams _tuning_params_hint; /**< Tuning parameters hint for the OpenCL kernel */
};