aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorManuel Bottini <manuel.bottini@arm.com>2021-01-25 15:07:17 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2021-02-03 17:35:00 +0000
commitbe9f9f9139b759d314f4f2a6d2ee747079666504 (patch)
tree461690abb95caeaeca40261fd85816a906c8446c /arm_compute
parent7061eb283969f9a020c08349454447564e4dd5b3 (diff)
downloadComputeLibrary-be9f9f9139b759d314f4f2a6d2ee747079666504.tar.gz
Add WBSM tuning to CLTuner
Add WBSM as possible parameter to be tuned Add helper functions to check WBSM support and setting the value in the kernel Update tuning parameter lists to use WBSM Update CLTuner to use WBSM The WBSM tuning is exposed as a parameter to be set at compile time by setting the CLTuningInfo CLTuningInfo contains information about the tuning mode and if wbsm tuning enabled Resolves: COMPMID-3936 Change-Id: Id53697c9c6d2cef41c049f368002f6197351b3ed Signed-off-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4914 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/core/CL/CLCompileContext.h9
-rw-r--r--arm_compute/core/CL/CLHelpers.h22
-rw-r--r--arm_compute/core/CL/CLKernelLibrary.h6
-rw-r--r--arm_compute/core/CL/OpenCL.h3
-rw-r--r--arm_compute/runtime/CL/CLTuner.h1
-rw-r--r--arm_compute/runtime/CL/CLTunerTypes.h5
-rw-r--r--arm_compute/runtime/CL/CLTuningParams.h85
-rw-r--r--arm_compute/runtime/CL/tuners/CLTuningParametersList.h5
8 files changed, 124 insertions, 12 deletions
diff --git a/arm_compute/core/CL/CLCompileContext.h b/arm_compute/core/CL/CLCompileContext.h
index 6f6dc18b85..46a8c9b341 100644
--- a/arm_compute/core/CL/CLCompileContext.h
+++ b/arm_compute/core/CL/CLCompileContext.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -296,6 +296,12 @@ public:
*/
bool int64_base_atomics_supported() const;
+ /* Returns true if the workgroup batch size modifier parameter is supported on the cl device
+ *
+ * @return true if the workgroup batch size modifier parameter is supported, false otherwise
+ */
+ bool is_wbsm_supported() const;
+
private:
/** Load program and its dependencies.
*
@@ -327,6 +333,7 @@ private:
CLDevice _device; /**< Underlying CL device. */
mutable std::map<std::string, const Program> _programs_map; /**< Map with all already loaded program data. */
mutable std::map<std::string, cl::Program> _built_programs_map; /**< Map with all already built program data. */
+ bool _is_wbsm_supported; /**< Support of worksize batch size modifier support boolean*/
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLCOMPILECONTEXT_H */
diff --git a/arm_compute/core/CL/CLHelpers.h b/arm_compute/core/CL/CLHelpers.h
index cf18e16e34..0e9aa5d6e5 100644
--- a/arm_compute/core/CL/CLHelpers.h
+++ b/arm_compute/core/CL/CLHelpers.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,6 +30,11 @@
#include <set>
#include <string>
+/* CL Device capabilities */
+#define ARM_COMPUTE_LIBRARY_OPENCL_DEVICE_CAPABILITIES_ARM 0x41E4
+/* Workgroup Batch Size Modifier */
+#define ARM_COMPUTE_LIBRARY_OPENCL_EXEC_WBSM_ARM 0x41E6
+
namespace arm_compute
{
class CLCoreRuntimeContext;
@@ -226,5 +231,20 @@ cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_
*/
cl::NDRange create_lws_hint_parallel_implementations(unsigned int input_dimension, unsigned int vector_size);
+/* Helper function to check if the workgroup batch size modifier parameter is supported on the cl device
+ *
+ * @param[in] device cl device to check for support
+ *
+ * @return true if the workgroup batch size modifier parameter is supported, false otherwise
+ */
+bool get_wbsm_support_info(const cl::Device &device);
+
+/* Helper function to set the workgroup batch size modifier parameter in the kernel
+ *
+ * @param[in] kernel cl kernel to set the workgroup batch size modifier parameter
+ * @param[in] wbsm_hint workgroup batch size modifier to use
+ */
+void set_wbsm(cl::Kernel &kernel, cl_int wbsm_hint);
+
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLHELPERS_H */
diff --git a/arm_compute/core/CL/CLKernelLibrary.h b/arm_compute/core/CL/CLKernelLibrary.h
index 193389388e..0d8e4a6164 100644
--- a/arm_compute/core/CL/CLKernelLibrary.h
+++ b/arm_compute/core/CL/CLKernelLibrary.h
@@ -148,6 +148,12 @@ public:
*/
std::string get_program_name(const std::string &kernel_name) const;
+ /* Returns true if the workgroup batch size modifier parameter is supported on the cl device
+ *
+ * @return true if the workgroup batch size modifier parameter is supported, false otherwise
+ */
+ bool is_wbsm_supported();
+
/** Sets the CL context used to create programs.
*
* @note Setting the context also resets the device to the
diff --git a/arm_compute/core/CL/OpenCL.h b/arm_compute/core/CL/OpenCL.h
index f9796d7e95..155c3e4eef 100644
--- a/arm_compute/core/CL/OpenCL.h
+++ b/arm_compute/core/CL/OpenCL.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -135,6 +135,7 @@ public:
DECLARE_FUNCTION_PTR(clEnqueueMarker);
DECLARE_FUNCTION_PTR(clWaitForEvents);
DECLARE_FUNCTION_PTR(clCreateImage);
+ DECLARE_FUNCTION_PTR(clSetKernelExecInfo);
// Third-party extensions
DECLARE_FUNCTION_PTR(clImportMemoryARM);
diff --git a/arm_compute/runtime/CL/CLTuner.h b/arm_compute/runtime/CL/CLTuner.h
index 9814867142..e1c98bf411 100644
--- a/arm_compute/runtime/CL/CLTuner.h
+++ b/arm_compute/runtime/CL/CLTuner.h
@@ -182,7 +182,6 @@ private:
cl::Event _kernel_event;
bool _tune_new_kernels;
CLTuningInfo _tuning_info;
- CLTunerMode _tuner_mode;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_CLTUNER_H */
diff --git a/arm_compute/runtime/CL/CLTunerTypes.h b/arm_compute/runtime/CL/CLTunerTypes.h
index 49e2d615ea..e93ef5b2b3 100644
--- a/arm_compute/runtime/CL/CLTunerTypes.h
+++ b/arm_compute/runtime/CL/CLTunerTypes.h
@@ -42,7 +42,10 @@ enum class CLTunerMode
/**< OpenCL tuner tuning information */
struct CLTuningInfo
{
- bool tune_lws = true;
+ CLTunerMode tuner_mode = CLTunerMode::NORMAL; /**< Parameter to select the level (granularity) of the tuning */
+ bool tune_wbsm = false; /**< Flag to tune the batches of work groups distributed to compute units.
+ Internally, the library will check if this feature is available on
+ the target platform */
};
/** Converts a string to a strong types enumeration @ref CLTunerMode
diff --git a/arm_compute/runtime/CL/CLTuningParams.h b/arm_compute/runtime/CL/CLTuningParams.h
index 99a386638d..b50481336b 100644
--- a/arm_compute/runtime/CL/CLTuningParams.h
+++ b/arm_compute/runtime/CL/CLTuningParams.h
@@ -25,6 +25,10 @@
#define ARM_COMPUTE_CLTUNING_PARAMS_H
#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/runtime/CL/CLTunerTypes.h"
+#include "support/StringSupport.h"
+
+#include <ostream>
namespace arm_compute
{
@@ -34,26 +38,95 @@ class CLTuningParams
public:
CLTuningParams(const CLTuningParams &) = default;
- CLTuningParams(unsigned int lws_x = 0, unsigned int lws_y = 0, unsigned int lws_z = 0)
- : _lws(lws_x, lws_y, lws_z)
+ CLTuningParams(unsigned int lws_x = 0, unsigned int lws_y = 0, unsigned int lws_z = 0, int wbsm = 0)
+ : _lws(lws_x, lws_y, lws_z), _wbsm(wbsm)
{
}
- CLTuningParams(cl::NDRange lws)
- : _lws(lws)
+ CLTuningParams(cl::NDRange lws, cl_int wbsm = 0)
+ : _lws(lws), _wbsm(wbsm)
{
}
- void set_lws(cl::NDRange &lws)
+
+ CLTuningParams(cl_int wbsm)
+ : CLTuningParams(cl::NullRange, wbsm)
+ {
+ }
+
+ void set_lws(cl::NDRange lws)
{
_lws = lws;
}
- cl::NDRange get_lws()
+ cl::NDRange get_lws() const
{
return _lws;
}
+ void set_wbsm(cl_int wbsm)
+ {
+ _wbsm = wbsm;
+ }
+
+ cl_int get_wbsm() const
+ {
+ return _wbsm;
+ }
+
+ std::string to_string(CLTuningInfo tuning_info)
+ {
+ std::string tuning_params_string = "";
+ tuning_params_string += ";" + support::cpp11::to_string(_lws[0]) + ";" + support::cpp11::to_string(_lws[1]) + ";" + support::cpp11::to_string(_lws[2]);
+ if(tuning_info.tune_wbsm)
+ {
+ tuning_params_string += ";" + support::cpp11::to_string(_wbsm);
+ }
+ return tuning_params_string;
+ }
+
+ bool from_string(CLTuningInfo tuning_info, std::string tuning_params_string)
+ {
+ std::replace(tuning_params_string.begin(), tuning_params_string.end(), ';', ' ');
+ std::vector<std::string> array;
+ std::stringstream ss(tuning_params_string);
+ std::string temp;
+ while(ss >> temp)
+ {
+ array.push_back(temp);
+ }
+ // Read 3 values for lws
+ if(array.size() < 3)
+ {
+ return false;
+ }
+ const unsigned int lws_0 = support::cpp11::stoi(array[0]);
+ const unsigned int lws_1 = support::cpp11::stoi(array[1]);
+ const unsigned int lws_2 = support::cpp11::stoi(array[2]);
+ if(lws_0 == 0 && lws_1 == 0 && lws_2 == 0)
+ {
+ // If lws values are 0, cl::NullRange has to be used
+ // otherwise the lws object will be badly created
+ _lws = cl::NullRange;
+ }
+ else
+ {
+ _lws = cl::NDRange(lws_0, lws_1, lws_2);
+ }
+ array.erase(array.begin(), array.begin() + 3);
+ if(tuning_info.tune_wbsm)
+ {
+ if(array.size() < 1)
+ {
+ return false;
+ }
+ _wbsm = support::cpp11::stoi(array[0]);
+ array.erase(array.begin());
+ }
+ return true;
+ }
+
private:
cl::NDRange _lws;
+ cl_int _wbsm;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_CLTUNING_PARAMS_H */
diff --git a/arm_compute/runtime/CL/tuners/CLTuningParametersList.h b/arm_compute/runtime/CL/tuners/CLTuningParametersList.h
index c51b9901ef..69572c98d2 100644
--- a/arm_compute/runtime/CL/tuners/CLTuningParametersList.h
+++ b/arm_compute/runtime/CL/tuners/CLTuningParametersList.h
@@ -77,9 +77,12 @@ public:
/** Construct an ICLTuningParametersList object for the given tuner mode and gws configuration.
*
+ * @param[in] tuning_info Tuning info containng which parameters to tune and the tuner mode
+ * @param[in] gws Global worksize values
+ *
* @return unique_ptr to the requested ICLTuningParametersList implementation.
*/
-std::unique_ptr<ICLTuningParametersList> get_tuning_parameters_list(CLTunerMode mode, const cl::NDRange &gws);
+std::unique_ptr<ICLTuningParametersList> get_tuning_parameters_list(CLTuningInfo tuning_info, const cl::NDRange &gws);
} // namespace cl_tuner
} // namespace arm_compute