aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/CL/CLTuner.cpp
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2020-07-17 12:47:56 +0100
committerGian Marco Iodice <gianmarco.iodice@arm.com>2020-07-20 07:34:56 +0000
commit9c82e014260a997fe784affc7e0545972c3511e5 (patch)
tree404b1ab89ab9b62b9883c182f81b4bf4b53448ad /src/runtime/CL/CLTuner.cpp
parentba2cc1aea6bcd16b3ad81b55be18911af83d2113 (diff)
downloadComputeLibrary-9c82e014260a997fe784affc7e0545972c3511e5.tar.gz
COMPMID-3604: Graph failures during tuning
Update ICLTuner interface to account for the new memory injection interface. Redirect to appropriate kernel execution interface depending on if the kernel supports memory injection or not. Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: I8ce29f5c22f1865c9e688d12b65e68ee4486f99c Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3588 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/runtime/CL/CLTuner.cpp')
-rw-r--r--src/runtime/CL/CLTuner.cpp16
1 files changed, 11 insertions, 5 deletions
diff --git a/src/runtime/CL/CLTuner.cpp b/src/runtime/CL/CLTuner.cpp
index e3119c1db9..b2e3476e20 100644
--- a/src/runtime/CL/CLTuner.cpp
+++ b/src/runtime/CL/CLTuner.cpp
@@ -77,6 +77,11 @@ void CLTuner::tune_kernel_static(ICLKernel &kernel)
void CLTuner::tune_kernel_dynamic(ICLKernel &kernel)
{
+ tune_kernel_dynamic(kernel, {}, {});
+}
+
+void CLTuner::tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs)
+{
// Get the configuration ID from the kernel and append GPU target name and number of available compute units
const std::string config_id = kernel.config_id() + "_" + string_from_target(kernel.get_target()) + "_MP" + support::cpp11::to_string(CLKernelLibrary::get().get_num_compute_units());
@@ -90,7 +95,7 @@ void CLTuner::tune_kernel_dynamic(ICLKernel &kernel)
if(_tune_new_kernels)
{
// Find the optimal LWS for the kernel
- cl::NDRange opt_lws = find_optimal_lws(kernel);
+ cl::NDRange opt_lws = find_optimal_lws(kernel, inputs, outputs);
// Insert the optimal LWS in the table
add_lws_to_table(config_id, opt_lws);
@@ -112,7 +117,7 @@ void CLTuner::add_lws_to_table(const std::string &kernel_id, cl::NDRange optimal
_lws_table.emplace(kernel_id, optimal_lws);
}
-cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel)
+cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs)
{
// Profiling queue
cl::CommandQueue queue_profiler;
@@ -167,7 +172,8 @@ cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel)
cl::NDRange gws = ICLKernel::gws_from_window(kernel.window());
// Run the kernel with default lws to be used as baseline
- kernel.run(kernel.window(), queue_profiler);
+ const bool inject_memory = !inputs.empty();
+ inject_memory ? kernel.run_op(inputs, outputs, kernel.window(), queue_profiler) : kernel.run(kernel.window(), queue_profiler);
queue_profiler.finish();
@@ -178,7 +184,7 @@ cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel)
cl::NDRange opt_lws = cl::NullRange;
- //Construct the list of LWS values to be tested based on the tuner mode.
+ // Construct the list of LWS values to be tested based on the tuner mode.
auto lws_list = cl_tuner::CLLWSListFactory::get_lws_list(_tuner_mode, gws);
for(size_t i = 0; i < lws_list->size(); ++i)
{
@@ -197,7 +203,7 @@ cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel)
kernel.set_lws_hint(lws_test);
// Run the kernel
- kernel.run(kernel.window(), queue_profiler);
+ inject_memory ? kernel.run_op(inputs, outputs, kernel.window(), queue_profiler) : kernel.run(kernel.window(), queue_profiler);
queue_profiler.finish();