From 47f177e679874dc901888973c5fc237b756b38cb Mon Sep 17 00:00:00 2001 From: SiCong Li Date: Wed, 22 Feb 2023 17:24:09 +0000 Subject: Fix LWS search space used by CLTuner * Ensure CLTuner uses the real GWS used by run(), instead of the static GWS (which is usually changed at run time), by caching GWS in each kernel Note this is a somewhat inelegant workaround. The real issue stems from the fact that execution window and scheduler are very much coupled with our operator run() / run_op() method. (Please see COMPMID-5934) * Restrict LWS values to explore within GWS bound for exhaustive mode * Refactor gws_from_window() to include all the information required to calculate GWS * Log lws search space used for tuning * Fix ClDirectConv2dKernel config id Resolves COMPMID-5892 Signed-off-by: SiCong Li Change-Id: I420490d8b94d13ada2e44eb0a12078f883379334 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9193 Reviewed-by: Gian Marco Iodice Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- src/runtime/CL/tuners/CLTuningParametersList.cpp | 27 +++++++++++++----------- 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'src/runtime/CL/tuners') diff --git a/src/runtime/CL/tuners/CLTuningParametersList.cpp b/src/runtime/CL/tuners/CLTuningParametersList.cpp index 6cb2212794..6f3e32491a 100644 --- a/src/runtime/CL/tuners/CLTuningParametersList.cpp +++ b/src/runtime/CL/tuners/CLTuningParametersList.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -31,7 +31,7 @@ constexpr unsigned int max_lws_supported_x{ 64u }; constexpr unsigned int max_lws_supported_y{ 32u }; constexpr unsigned int max_lws_supported_z{ 32u }; -/** Non instantiable base class for Tuning parameters combinations that use Index2Cooard mapping */ +/** Non instantiable base class for Tuning parameters combinations that use Index2Coord mapping */ class CLTuningParametersList : public ICLTuningParametersList { protected: @@ -162,10 +162,13 @@ CLTuningParams CLTuningParametersListExhaustive::operator[](size_t index) CLTuningParametersListExhaustive::CLTuningParametersListExhaustive(const cl::NDRange &gws, CLTuningInfo tuning_info) { - ARM_COMPUTE_UNUSED(gws); - search_space_shape[0] = max_lws_supported_x; - search_space_shape[1] = max_lws_supported_y; - search_space_shape[2] = max_lws_supported_z; + const auto lws_x_max = std::min(static_cast(gws[0]), max_lws_supported_x); + const auto lws_y_max = std::min(static_cast(gws[1]), max_lws_supported_y); + const auto lws_z_max = std::min(static_cast(gws[2]), max_lws_supported_z); + + search_space_shape[0] = lws_x_max; + search_space_shape[1] = lws_y_max; + search_space_shape[2] = lws_z_max; search_space_shape[3] = 1; if(tuning_info.tune_wbsm) { @@ -183,9 +186,9 @@ CLTuningParams CLTuningParametersListNormal::operator[](size_t index) CLTuningParametersListNormal::CLTuningParametersListNormal(const cl::NDRange &gws, CLTuningInfo tuning_info) { - auto lws_x_max = std::min(static_cast(gws[0]), max_lws_supported_x); - auto lws_y_max = std::min(static_cast(gws[1]), max_lws_supported_y); - auto lws_z_max = std::min(static_cast(gws[2]), max_lws_supported_z); + const auto lws_x_max = std::min(static_cast(gws[0]), max_lws_supported_x); + const auto lws_y_max = std::min(static_cast(gws[1]), max_lws_supported_y); + const auto lws_z_max = std::min(static_cast(gws[2]), max_lws_supported_z); // Initialize the tuning parameters values to test _lws_x = {}; @@ -227,9 +230,9 @@ void CLTuningParametersListNormal::initialize_lws_values(std::vector(gws[0]), 8u); // Limit exploration to 1 - 8 - auto lws_y_max = std::min(static_cast(gws[1]), 4u); // Limit exploration to 1 - 4 - auto lws_z_max = std::min(static_cast(gws[2]), 4u); // Limit exploration to 1 - 4 + const auto lws_x_max = std::min(static_cast(gws[0]), 8u); // Limit exploration to 1 - 8 + const auto lws_y_max = std::min(static_cast(gws[1]), 4u); // Limit exploration to 1 - 4 + const auto lws_z_max = std::min(static_cast(gws[2]), 4u); // Limit exploration to 1 - 4 // Initialize the LWS values to test _lws_x = {}; -- cgit v1.2.1