aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/CL/tuners
diff options
context:
space:
mode:
authorSiCong Li <sicong.li@arm.com>2023-02-22 17:24:09 +0000
committerSiCong Li <sicong.li@arm.com>2023-03-06 16:19:11 +0000
commit47f177e679874dc901888973c5fc237b756b38cb (patch)
tree130386717101d0c2440111cb288faa21df8ab151 /src/runtime/CL/tuners
parentadfcacc8e39888a9a62e33c178041642d0a3047a (diff)
downloadComputeLibrary-47f177e679874dc901888973c5fc237b756b38cb.tar.gz
Fix LWS search space used by CLTuner
* Ensure CLTuner uses the real GWS used by run(), instead of the static GWS (which is usually changed at run time), by caching GWS in each kernel Note this is a somewhat inelegant workaround. The real issue stems from the fact that execution window and scheduler are very much coupled with our operator run() / run_op() method. (Please see COMPMID-5934) * Restrict LWS values to explore within GWS bound for exhaustive mode * Refactor gws_from_window() to include all the information required to calculate GWS * Log lws search space used for tuning * Fix ClDirectConv2dKernel config id Resolves COMPMID-5892 Signed-off-by: SiCong Li <sicong.li@arm.com> Change-Id: I420490d8b94d13ada2e44eb0a12078f883379334 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9193 Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/runtime/CL/tuners')
-rw-r--r--src/runtime/CL/tuners/CLTuningParametersList.cpp27
1 files changed, 15 insertions, 12 deletions
diff --git a/src/runtime/CL/tuners/CLTuningParametersList.cpp b/src/runtime/CL/tuners/CLTuningParametersList.cpp
index 6cb2212794..6f3e32491a 100644
--- a/src/runtime/CL/tuners/CLTuningParametersList.cpp
+++ b/src/runtime/CL/tuners/CLTuningParametersList.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -31,7 +31,7 @@ constexpr unsigned int max_lws_supported_x{ 64u };
constexpr unsigned int max_lws_supported_y{ 32u };
constexpr unsigned int max_lws_supported_z{ 32u };
-/** Non instantiable base class for Tuning parameters combinations that use Index2Cooard mapping */
+/** Non instantiable base class for Tuning parameters combinations that use Index2Coord mapping */
class CLTuningParametersList : public ICLTuningParametersList
{
protected:
@@ -162,10 +162,13 @@ CLTuningParams CLTuningParametersListExhaustive::operator[](size_t index)
CLTuningParametersListExhaustive::CLTuningParametersListExhaustive(const cl::NDRange &gws, CLTuningInfo tuning_info)
{
- ARM_COMPUTE_UNUSED(gws);
- search_space_shape[0] = max_lws_supported_x;
- search_space_shape[1] = max_lws_supported_y;
- search_space_shape[2] = max_lws_supported_z;
+ const auto lws_x_max = std::min(static_cast<unsigned int>(gws[0]), max_lws_supported_x);
+ const auto lws_y_max = std::min(static_cast<unsigned int>(gws[1]), max_lws_supported_y);
+ const auto lws_z_max = std::min(static_cast<unsigned int>(gws[2]), max_lws_supported_z);
+
+ search_space_shape[0] = lws_x_max;
+ search_space_shape[1] = lws_y_max;
+ search_space_shape[2] = lws_z_max;
search_space_shape[3] = 1;
if(tuning_info.tune_wbsm)
{
@@ -183,9 +186,9 @@ CLTuningParams CLTuningParametersListNormal::operator[](size_t index)
CLTuningParametersListNormal::CLTuningParametersListNormal(const cl::NDRange &gws, CLTuningInfo tuning_info)
{
- auto lws_x_max = std::min(static_cast<unsigned int>(gws[0]), max_lws_supported_x);
- auto lws_y_max = std::min(static_cast<unsigned int>(gws[1]), max_lws_supported_y);
- auto lws_z_max = std::min(static_cast<unsigned int>(gws[2]), max_lws_supported_z);
+ const auto lws_x_max = std::min(static_cast<unsigned int>(gws[0]), max_lws_supported_x);
+ const auto lws_y_max = std::min(static_cast<unsigned int>(gws[1]), max_lws_supported_y);
+ const auto lws_z_max = std::min(static_cast<unsigned int>(gws[2]), max_lws_supported_z);
// Initialize the tuning parameters values to test
_lws_x = {};
@@ -227,9 +230,9 @@ void CLTuningParametersListNormal::initialize_lws_values(std::vector<unsigned in
CLTuningParametersListRapid::CLTuningParametersListRapid(const cl::NDRange &gws, CLTuningInfo tuning_info)
{
- auto lws_x_max = std::min(static_cast<unsigned int>(gws[0]), 8u); // Limit exploration to 1 - 8
- auto lws_y_max = std::min(static_cast<unsigned int>(gws[1]), 4u); // Limit exploration to 1 - 4
- auto lws_z_max = std::min(static_cast<unsigned int>(gws[2]), 4u); // Limit exploration to 1 - 4
+ const auto lws_x_max = std::min(static_cast<unsigned int>(gws[0]), 8u); // Limit exploration to 1 - 8
+ const auto lws_y_max = std::min(static_cast<unsigned int>(gws[1]), 4u); // Limit exploration to 1 - 4
+ const auto lws_z_max = std::min(static_cast<unsigned int>(gws[2]), 4u); // Limit exploration to 1 - 4
// Initialize the LWS values to test
_lws_x = {};