aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/core/CL/CLCompileContext.h9
-rw-r--r--arm_compute/core/CL/CLHelpers.h22
-rw-r--r--arm_compute/core/CL/CLKernelLibrary.h6
-rw-r--r--arm_compute/core/CL/OpenCL.h3
-rw-r--r--arm_compute/runtime/CL/CLTuner.h1
-rw-r--r--arm_compute/runtime/CL/CLTunerTypes.h5
-rw-r--r--arm_compute/runtime/CL/CLTuningParams.h85
-rw-r--r--arm_compute/runtime/CL/tuners/CLTuningParametersList.h5
8 files changed, 124 insertions, 12 deletions
diff --git a/arm_compute/core/CL/CLCompileContext.h b/arm_compute/core/CL/CLCompileContext.h
index 6f6dc18b85..46a8c9b341 100644
--- a/arm_compute/core/CL/CLCompileContext.h
+++ b/arm_compute/core/CL/CLCompileContext.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -296,6 +296,12 @@ public:
*/
bool int64_base_atomics_supported() const;
+ /* Returns true if the workgroup batch size modifier parameter is supported on the cl device
+ *
+ * @return true if the workgroup batch size modifier parameter is supported, false otherwise
+ */
+ bool is_wbsm_supported() const;
+
private:
/** Load program and its dependencies.
*
@@ -327,6 +333,7 @@ private:
CLDevice _device; /**< Underlying CL device. */
mutable std::map<std::string, const Program> _programs_map; /**< Map with all already loaded program data. */
mutable std::map<std::string, cl::Program> _built_programs_map; /**< Map with all already built program data. */
+ bool _is_wbsm_supported; /**< Support of worksize batch size modifier support boolean*/
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLCOMPILECONTEXT_H */
diff --git a/arm_compute/core/CL/CLHelpers.h b/arm_compute/core/CL/CLHelpers.h
index cf18e16e34..0e9aa5d6e5 100644
--- a/arm_compute/core/CL/CLHelpers.h
+++ b/arm_compute/core/CL/CLHelpers.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,6 +30,11 @@
#include <set>
#include <string>
+/* CL Device capabilities */
+#define ARM_COMPUTE_LIBRARY_OPENCL_DEVICE_CAPABILITIES_ARM 0x41E4
+/* Workgroup Batch Size Modifier */
+#define ARM_COMPUTE_LIBRARY_OPENCL_EXEC_WBSM_ARM 0x41E6
+
namespace arm_compute
{
class CLCoreRuntimeContext;
@@ -226,5 +231,20 @@ cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_
*/
cl::NDRange create_lws_hint_parallel_implementations(unsigned int input_dimension, unsigned int vector_size);
+/* Helper function to check if the workgroup batch size modifier parameter is supported on the cl device
+ *
+ * @param[in] device cl device to check for support
+ *
+ * @return true if the workgroup batch size modifier parameter is supported, false otherwise
+ */
+bool get_wbsm_support_info(const cl::Device &device);
+
+/* Helper function to set the workgroup batch size modifier parameter in the kernel
+ *
+ * @param[in] kernel cl kernel to set the workgroup batch size modifier parameter
+ * @param[in] wbsm_hint workgroup batch size modifier to use
+ */
+void set_wbsm(cl::Kernel &kernel, cl_int wbsm_hint);
+
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLHELPERS_H */
diff --git a/arm_compute/core/CL/CLKernelLibrary.h b/arm_compute/core/CL/CLKernelLibrary.h
index 193389388e..0d8e4a6164 100644
--- a/arm_compute/core/CL/CLKernelLibrary.h
+++ b/arm_compute/core/CL/CLKernelLibrary.h
@@ -148,6 +148,12 @@ public:
*/
std::string get_program_name(const std::string &kernel_name) const;
+ /* Returns true if the workgroup batch size modifier parameter is supported on the cl device
+ *
+ * @return true if the workgroup batch size modifier parameter is supported, false otherwise
+ */
+ bool is_wbsm_supported();
+
/** Sets the CL context used to create programs.
*
* @note Setting the context also resets the device to the
diff --git a/arm_compute/core/CL/OpenCL.h b/arm_compute/core/CL/OpenCL.h
index f9796d7e95..155c3e4eef 100644
--- a/arm_compute/core/CL/OpenCL.h
+++ b/arm_compute/core/CL/OpenCL.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -135,6 +135,7 @@ public:
DECLARE_FUNCTION_PTR(clEnqueueMarker);
DECLARE_FUNCTION_PTR(clWaitForEvents);
DECLARE_FUNCTION_PTR(clCreateImage);
+ DECLARE_FUNCTION_PTR(clSetKernelExecInfo);
// Third-party extensions
DECLARE_FUNCTION_PTR(clImportMemoryARM);
diff --git a/arm_compute/runtime/CL/CLTuner.h b/arm_compute/runtime/CL/CLTuner.h
index 9814867142..e1c98bf411 100644
--- a/arm_compute/runtime/CL/CLTuner.h
+++ b/arm_compute/runtime/CL/CLTuner.h
@@ -182,7 +182,6 @@ private:
cl::Event _kernel_event;
bool _tune_new_kernels;
CLTuningInfo _tuning_info;
- CLTunerMode _tuner_mode;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_CLTUNER_H */
diff --git a/arm_compute/runtime/CL/CLTunerTypes.h b/arm_compute/runtime/CL/CLTunerTypes.h
index 49e2d615ea..e93ef5b2b3 100644
--- a/arm_compute/runtime/CL/CLTunerTypes.h
+++ b/arm_compute/runtime/CL/CLTunerTypes.h
@@ -42,7 +42,10 @@ enum class CLTunerMode
/**< OpenCL tuner tuning information */
struct CLTuningInfo
{
- bool tune_lws = true;
+ CLTunerMode tuner_mode = CLTunerMode::NORMAL; /**< Parameter to select the level (granularity) of the tuning */
+ bool tune_wbsm = false; /**< Flag to tune the batches of work groups distributed to compute units.
+ Internally, the library will check if this feature is available on
+ the target platform */
};
/** Converts a string to a strong types enumeration @ref CLTunerMode
diff --git a/arm_compute/runtime/CL/CLTuningParams.h b/arm_compute/runtime/CL/CLTuningParams.h
index 99a386638d..b50481336b 100644
--- a/arm_compute/runtime/CL/CLTuningParams.h
+++ b/arm_compute/runtime/CL/CLTuningParams.h
@@ -25,6 +25,10 @@
#define ARM_COMPUTE_CLTUNING_PARAMS_H
#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/runtime/CL/CLTunerTypes.h"
+#include "support/StringSupport.h"
+
+#include <ostream>
namespace arm_compute
{
@@ -34,26 +38,95 @@ class CLTuningParams
public:
CLTuningParams(const CLTuningParams &) = default;
- CLTuningParams(unsigned int lws_x = 0, unsigned int lws_y = 0, unsigned int lws_z = 0)
- : _lws(lws_x, lws_y, lws_z)
+ CLTuningParams(unsigned int lws_x = 0, unsigned int lws_y = 0, unsigned int lws_z = 0, int wbsm = 0)
+ : _lws(lws_x, lws_y, lws_z), _wbsm(wbsm)
{
}
- CLTuningParams(cl::NDRange lws)
- : _lws(lws)
+ CLTuningParams(cl::NDRange lws, cl_int wbsm = 0)
+ : _lws(lws), _wbsm(wbsm)
{
}
- void set_lws(cl::NDRange &lws)
+
+ CLTuningParams(cl_int wbsm)
+ : CLTuningParams(cl::NullRange, wbsm)
+ {
+ }
+
+ void set_lws(cl::NDRange lws)
{
_lws = lws;
}
- cl::NDRange get_lws()
+ cl::NDRange get_lws() const
{
return _lws;
}
+ void set_wbsm(cl_int wbsm)
+ {
+ _wbsm = wbsm;
+ }
+
+ cl_int get_wbsm() const
+ {
+ return _wbsm;
+ }
+
+ std::string to_string(CLTuningInfo tuning_info)
+ {
+ std::string tuning_params_string = "";
+ tuning_params_string += ";" + support::cpp11::to_string(_lws[0]) + ";" + support::cpp11::to_string(_lws[1]) + ";" + support::cpp11::to_string(_lws[2]);
+ if(tuning_info.tune_wbsm)
+ {
+ tuning_params_string += ";" + support::cpp11::to_string(_wbsm);
+ }
+ return tuning_params_string;
+ }
+
+ bool from_string(CLTuningInfo tuning_info, std::string tuning_params_string)
+ {
+ std::replace(tuning_params_string.begin(), tuning_params_string.end(), ';', ' ');
+ std::vector<std::string> array;
+ std::stringstream ss(tuning_params_string);
+ std::string temp;
+ while(ss >> temp)
+ {
+ array.push_back(temp);
+ }
+ // Read 3 values for lws
+ if(array.size() < 3)
+ {
+ return false;
+ }
+ const unsigned int lws_0 = support::cpp11::stoi(array[0]);
+ const unsigned int lws_1 = support::cpp11::stoi(array[1]);
+ const unsigned int lws_2 = support::cpp11::stoi(array[2]);
+ if(lws_0 == 0 && lws_1 == 0 && lws_2 == 0)
+ {
+ // If lws values are 0, cl::NullRange has to be used
+ // otherwise the lws object will be badly created
+ _lws = cl::NullRange;
+ }
+ else
+ {
+ _lws = cl::NDRange(lws_0, lws_1, lws_2);
+ }
+ array.erase(array.begin(), array.begin() + 3);
+ if(tuning_info.tune_wbsm)
+ {
+ if(array.size() < 1)
+ {
+ return false;
+ }
+ _wbsm = support::cpp11::stoi(array[0]);
+ array.erase(array.begin());
+ }
+ return true;
+ }
+
private:
cl::NDRange _lws;
+ cl_int _wbsm;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_CLTUNING_PARAMS_H */
diff --git a/arm_compute/runtime/CL/tuners/CLTuningParametersList.h b/arm_compute/runtime/CL/tuners/CLTuningParametersList.h
index c51b9901ef..69572c98d2 100644
--- a/arm_compute/runtime/CL/tuners/CLTuningParametersList.h
+++ b/arm_compute/runtime/CL/tuners/CLTuningParametersList.h
@@ -77,9 +77,12 @@ public:
/** Construct an ICLTuningParametersList object for the given tuner mode and gws configuration.
*
+ * @param[in] tuning_info Tuning info containng which parameters to tune and the tuner mode
+ * @param[in] gws Global worksize values
+ *
* @return unique_ptr to the requested ICLTuningParametersList implementation.
*/
-std::unique_ptr<ICLTuningParametersList> get_tuning_parameters_list(CLTunerMode mode, const cl::NDRange &gws);
+std::unique_ptr<ICLTuningParametersList> get_tuning_parameters_list(CLTuningInfo tuning_info, const cl::NDRange &gws);
} // namespace cl_tuner
} // namespace arm_compute