diff options
Diffstat (limited to 'arm_compute')
-rw-r--r-- | arm_compute/core/CL/CLCompileContext.h | 9 | ||||
-rw-r--r-- | arm_compute/core/CL/CLHelpers.h | 22 | ||||
-rw-r--r-- | arm_compute/core/CL/CLKernelLibrary.h | 6 | ||||
-rw-r--r-- | arm_compute/core/CL/OpenCL.h | 3 | ||||
-rw-r--r-- | arm_compute/runtime/CL/CLTuner.h | 1 | ||||
-rw-r--r-- | arm_compute/runtime/CL/CLTunerTypes.h | 5 | ||||
-rw-r--r-- | arm_compute/runtime/CL/CLTuningParams.h | 85 | ||||
-rw-r--r-- | arm_compute/runtime/CL/tuners/CLTuningParametersList.h | 5 |
8 files changed, 124 insertions, 12 deletions
diff --git a/arm_compute/core/CL/CLCompileContext.h b/arm_compute/core/CL/CLCompileContext.h index 6f6dc18b85..46a8c9b341 100644 --- a/arm_compute/core/CL/CLCompileContext.h +++ b/arm_compute/core/CL/CLCompileContext.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 Arm Limited. + * Copyright (c) 2020-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -296,6 +296,12 @@ public: */ bool int64_base_atomics_supported() const; + /* Returns true if the workgroup batch size modifier parameter is supported on the cl device + * + * @return true if the workgroup batch size modifier parameter is supported, false otherwise + */ + bool is_wbsm_supported() const; + private: /** Load program and its dependencies. * @@ -327,6 +333,7 @@ private: CLDevice _device; /**< Underlying CL device. */ mutable std::map<std::string, const Program> _programs_map; /**< Map with all already loaded program data. */ mutable std::map<std::string, cl::Program> _built_programs_map; /**< Map with all already built program data. */ + bool _is_wbsm_supported; /**< Support of worksize batch size modifier support boolean*/ }; } // namespace arm_compute #endif /* ARM_COMPUTE_CLCOMPILECONTEXT_H */ diff --git a/arm_compute/core/CL/CLHelpers.h b/arm_compute/core/CL/CLHelpers.h index cf18e16e34..0e9aa5d6e5 100644 --- a/arm_compute/core/CL/CLHelpers.h +++ b/arm_compute/core/CL/CLHelpers.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,11 @@ #include <set> #include <string> +/* CL Device capabilities */ +#define ARM_COMPUTE_LIBRARY_OPENCL_DEVICE_CAPABILITIES_ARM 0x41E4 +/* Workgroup Batch Size Modifier */ +#define ARM_COMPUTE_LIBRARY_OPENCL_EXEC_WBSM_ARM 0x41E6 + namespace arm_compute { class CLCoreRuntimeContext; @@ -226,5 +231,20 @@ cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_ */ cl::NDRange create_lws_hint_parallel_implementations(unsigned int input_dimension, unsigned int vector_size); +/* Helper function to check if the workgroup batch size modifier parameter is supported on the cl device + * + * @param[in] device cl device to check for support + * + * @return true if the workgroup batch size modifier parameter is supported, false otherwise + */ +bool get_wbsm_support_info(const cl::Device &device); + +/* Helper function to set the workgroup batch size modifier parameter in the kernel + * + * @param[in] kernel cl kernel to set the workgroup batch size modifier parameter + * @param[in] wbsm_hint workgroup batch size modifier to use + */ +void set_wbsm(cl::Kernel &kernel, cl_int wbsm_hint); + } // namespace arm_compute #endif /* ARM_COMPUTE_CLHELPERS_H */ diff --git a/arm_compute/core/CL/CLKernelLibrary.h b/arm_compute/core/CL/CLKernelLibrary.h index 193389388e..0d8e4a6164 100644 --- a/arm_compute/core/CL/CLKernelLibrary.h +++ b/arm_compute/core/CL/CLKernelLibrary.h @@ -148,6 +148,12 @@ public: */ std::string get_program_name(const std::string &kernel_name) const; + /* Returns true if the workgroup batch size modifier parameter is supported on the cl device + * + * @return true if the workgroup batch size modifier parameter is supported, false otherwise + */ + bool is_wbsm_supported(); + /** Sets the CL context used to create programs. * * @note Setting the context also resets the device to the diff --git a/arm_compute/core/CL/OpenCL.h b/arm_compute/core/CL/OpenCL.h index f9796d7e95..155c3e4eef 100644 --- a/arm_compute/core/CL/OpenCL.h +++ b/arm_compute/core/CL/OpenCL.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -135,6 +135,7 @@ public: DECLARE_FUNCTION_PTR(clEnqueueMarker); DECLARE_FUNCTION_PTR(clWaitForEvents); DECLARE_FUNCTION_PTR(clCreateImage); + DECLARE_FUNCTION_PTR(clSetKernelExecInfo); // Third-party extensions DECLARE_FUNCTION_PTR(clImportMemoryARM); diff --git a/arm_compute/runtime/CL/CLTuner.h b/arm_compute/runtime/CL/CLTuner.h index 9814867142..e1c98bf411 100644 --- a/arm_compute/runtime/CL/CLTuner.h +++ b/arm_compute/runtime/CL/CLTuner.h @@ -182,7 +182,6 @@ private: cl::Event _kernel_event; bool _tune_new_kernels; CLTuningInfo _tuning_info; - CLTunerMode _tuner_mode; }; } // namespace arm_compute #endif /*ARM_COMPUTE_CLTUNER_H */ diff --git a/arm_compute/runtime/CL/CLTunerTypes.h b/arm_compute/runtime/CL/CLTunerTypes.h index 49e2d615ea..e93ef5b2b3 100644 --- a/arm_compute/runtime/CL/CLTunerTypes.h +++ b/arm_compute/runtime/CL/CLTunerTypes.h @@ -42,7 +42,10 @@ enum class CLTunerMode /**< OpenCL tuner tuning information */ struct CLTuningInfo { - bool tune_lws = true; + CLTunerMode tuner_mode = CLTunerMode::NORMAL; /**< Parameter to select the level (granularity) of the tuning */ + bool tune_wbsm = false; /**< Flag to tune the batches of work groups distributed to compute units. + Internally, the library will check if this feature is available on + the target platform */ }; /** Converts a string to a strong types enumeration @ref CLTunerMode diff --git a/arm_compute/runtime/CL/CLTuningParams.h b/arm_compute/runtime/CL/CLTuningParams.h index 99a386638d..b50481336b 100644 --- a/arm_compute/runtime/CL/CLTuningParams.h +++ b/arm_compute/runtime/CL/CLTuningParams.h @@ -25,6 +25,10 @@ #define ARM_COMPUTE_CLTUNING_PARAMS_H #include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/runtime/CL/CLTunerTypes.h" +#include "support/StringSupport.h" + +#include <ostream> namespace arm_compute { @@ -34,26 +38,95 @@ class CLTuningParams public: CLTuningParams(const CLTuningParams &) = default; - CLTuningParams(unsigned int lws_x = 0, unsigned int lws_y = 0, unsigned int lws_z = 0) - : _lws(lws_x, lws_y, lws_z) + CLTuningParams(unsigned int lws_x = 0, unsigned int lws_y = 0, unsigned int lws_z = 0, int wbsm = 0) + : _lws(lws_x, lws_y, lws_z), _wbsm(wbsm) { } - CLTuningParams(cl::NDRange lws) - : _lws(lws) + CLTuningParams(cl::NDRange lws, cl_int wbsm = 0) + : _lws(lws), _wbsm(wbsm) { } - void set_lws(cl::NDRange &lws) + + CLTuningParams(cl_int wbsm) + : CLTuningParams(cl::NullRange, wbsm) + { + } + + void set_lws(cl::NDRange lws) { _lws = lws; } - cl::NDRange get_lws() + cl::NDRange get_lws() const { return _lws; } + void set_wbsm(cl_int wbsm) + { + _wbsm = wbsm; + } + + cl_int get_wbsm() const + { + return _wbsm; + } + + std::string to_string(CLTuningInfo tuning_info) + { + std::string tuning_params_string = ""; + tuning_params_string += ";" + support::cpp11::to_string(_lws[0]) + ";" + support::cpp11::to_string(_lws[1]) + ";" + support::cpp11::to_string(_lws[2]); + if(tuning_info.tune_wbsm) + { + tuning_params_string += ";" + support::cpp11::to_string(_wbsm); + } + return tuning_params_string; + } + + bool from_string(CLTuningInfo tuning_info, std::string tuning_params_string) + { + std::replace(tuning_params_string.begin(), tuning_params_string.end(), ';', ' '); + std::vector<std::string> array; + std::stringstream ss(tuning_params_string); + std::string temp; + while(ss >> temp) + { + array.push_back(temp); + } + // Read 3 values for lws + if(array.size() < 3) + { + return false; + } + const unsigned int lws_0 = support::cpp11::stoi(array[0]); + const unsigned int lws_1 = support::cpp11::stoi(array[1]); + const unsigned int lws_2 = support::cpp11::stoi(array[2]); + if(lws_0 == 0 && lws_1 == 0 && lws_2 == 0) + { + // If lws values are 0, cl::NullRange has to be used + // otherwise the lws object will be badly created + _lws = cl::NullRange; + } + else + { + _lws = cl::NDRange(lws_0, lws_1, lws_2); + } + array.erase(array.begin(), array.begin() + 3); + if(tuning_info.tune_wbsm) + { + if(array.size() < 1) + { + return false; + } + _wbsm = support::cpp11::stoi(array[0]); + array.erase(array.begin()); + } + return true; + } + private: cl::NDRange _lws; + cl_int _wbsm; }; } // namespace arm_compute #endif /*ARM_COMPUTE_CLTUNING_PARAMS_H */ diff --git a/arm_compute/runtime/CL/tuners/CLTuningParametersList.h b/arm_compute/runtime/CL/tuners/CLTuningParametersList.h index c51b9901ef..69572c98d2 100644 --- a/arm_compute/runtime/CL/tuners/CLTuningParametersList.h +++ b/arm_compute/runtime/CL/tuners/CLTuningParametersList.h @@ -77,9 +77,12 @@ public: /** Construct an ICLTuningParametersList object for the given tuner mode and gws configuration. * + * @param[in] tuning_info Tuning info containng which parameters to tune and the tuner mode + * @param[in] gws Global worksize values + * * @return unique_ptr to the requested ICLTuningParametersList implementation. */ -std::unique_ptr<ICLTuningParametersList> get_tuning_parameters_list(CLTunerMode mode, const cl::NDRange &gws); +std::unique_ptr<ICLTuningParametersList> get_tuning_parameters_list(CLTuningInfo tuning_info, const cl::NDRange &gws); } // namespace cl_tuner } // namespace arm_compute |