From 47f177e679874dc901888973c5fc237b756b38cb Mon Sep 17 00:00:00 2001 From: SiCong Li Date: Wed, 22 Feb 2023 17:24:09 +0000 Subject: Fix LWS search space used by CLTuner * Ensure CLTuner uses the real GWS used by run(), instead of the static GWS (which is usually changed at run time), by caching GWS in each kernel Note this is a somewhat inelegant workaround. The real issue stems from the fact that execution window and scheduler are very much coupled with our operator run() / run_op() method. (Please see COMPMID-5934) * Restrict LWS values to explore within GWS bound for exhaustive mode * Refactor gws_from_window() to include all the information required to calculate GWS * Log lws search space used for tuning * Fix ClDirectConv2dKernel config id Resolves COMPMID-5892 Signed-off-by: SiCong Li Change-Id: I420490d8b94d13ada2e44eb0a12078f883379334 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9193 Reviewed-by: Gian Marco Iodice Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- src/core/CL/ICLKernel.h | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) (limited to 'src/core/CL/ICLKernel.h') diff --git a/src/core/CL/ICLKernel.h b/src/core/CL/ICLKernel.h index 5d5b636cf4..c82809cef3 100644 --- a/src/core/CL/ICLKernel.h +++ b/src/core/CL/ICLKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2022 Arm Limited. + * Copyright (c) 2016-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -86,9 +86,16 @@ private: return 2 + 2 * dimension_size; } - cl::NDRange default_lws_tune(const Window &window) + /** Get default lws for the kernel + * + * @param[in] window Execution window used by the kernel + * @param[in] use_dummy_work_items If the kernel uses dummy workloads + * + * @return cl::NDRange + */ + cl::NDRange default_lws_tune(const Window &window, bool use_dummy_work_items) { - return get_default_lws_for_type(_type, gws_from_window(window)); + return get_default_lws_for_type(_type, gws_from_window(window, use_dummy_work_items)); } using IKernel::configure; //Prevent children from calling IKernel::configure() directly @@ -115,7 +122,9 @@ protected: if(is_same_lws(_tuning_params_hint.get_lws(), CLKernelLibrary::get().default_ndrange())) { - _tuning_params_hint.set_lws(default_lws_tune(window)); + // Disable use_dummy_work_items at configure time. Because dummy work items only affect gws size, which + // will be recalculated with use_dummy_work_items flag at run time again anyway. + _tuning_params_hint.set_lws(default_lws_tune(window, false /* use_dummy_work_items */)); } IKernel::configure(window); @@ -124,7 +133,7 @@ protected: public: /** Constructor */ ICLKernel() - : _kernel(nullptr), _target(GPUTarget::MIDGARD), _config_id(arm_compute::default_config_id), _max_workgroup_size(0), _type(CLKernelType::UNKNOWN), _tuning_params_hint() + : _kernel(nullptr), _target(GPUTarget::MIDGARD), _config_id(arm_compute::default_config_id), _max_workgroup_size(0), _type(CLKernelType::UNKNOWN), _tuning_params_hint(), _cached_gws(cl::NullRange) { } /** Returns a reference to the OpenCL kernel of this object. @@ -431,11 +440,24 @@ public: size_t get_max_workgroup_size(); /** Get the global work size given an execution window * - * @param[in] window Execution window + * @param[in] window Execution window + * @param[in] use_dummy_work_items If the kernel uses dummy work items * * @return Global work size of the given execution window */ - static cl::NDRange gws_from_window(const Window &window); + static cl::NDRange gws_from_window(const Window &window, bool use_dummy_work_items); + + /** Get the cached gws used to enqueue this kernel + * + * @return Latest global work size of the kernel + */ + cl::NDRange get_cached_gws() const; + + /** Cache the latest gws used to enqueue this kernel + * + * @param[in] gws Latest global work size of the kernel + */ + void cache_gws(const cl::NDRange &gws); private: /** Add the passed array's parameters to the object's kernel's arguments starting from the index idx. @@ -465,6 +487,7 @@ protected: CLKernelType _type; /**< The CL kernel type */ private: CLTuningParams _tuning_params_hint; /**< Tuning parameters hint for the OpenCL kernel */ + cl::NDRange _cached_gws; /**< Latest GWS used to enqueue this kernel */ }; /** Add the kernel to the command queue with the given window. -- cgit v1.2.1