From 9c82e014260a997fe784affc7e0545972c3511e5 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Fri, 17 Jul 2020 12:47:56 +0100 Subject: COMPMID-3604: Graph failures during tuning Update ICLTuner interface to account for the new memory injection interface. Redirect to appropriate kernel execution interface depending on if the kernel supports memory injection or not. Signed-off-by: Georgios Pinitas Change-Id: I8ce29f5c22f1865c9e688d12b65e68ee4486f99c Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3588 Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins --- arm_compute/runtime/CL/CLTuner.h | 5 +++-- arm_compute/runtime/CL/ICLTuner.h | 13 +++++++++++-- arm_compute/runtime/CL/tuners/BifrostTuner.h | 3 ++- arm_compute/runtime/CL/tuners/MidgardTuner.h | 3 ++- src/runtime/CL/CLScheduler.cpp | 14 ++++---------- src/runtime/CL/CLTuner.cpp | 16 +++++++++++----- src/runtime/CL/tuners/BifrostTuner.cpp | 5 +++++ src/runtime/CL/tuners/MidgardTuner.cpp | 7 ++++++- 8 files changed, 44 insertions(+), 22 deletions(-) diff --git a/arm_compute/runtime/CL/CLTuner.h b/arm_compute/runtime/CL/CLTuner.h index 745d57a959..aa31181d2d 100644 --- a/arm_compute/runtime/CL/CLTuner.h +++ b/arm_compute/runtime/CL/CLTuner.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -116,6 +116,7 @@ public: // Inherited methods overridden: void tune_kernel_static(ICLKernel &kernel) override; void tune_kernel_dynamic(ICLKernel &kernel) override; + void tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs) override; /** Is the kernel_event set ? * @@ -130,7 +131,7 @@ private: * * @return The optimal LWS to use */ - cl::NDRange find_optimal_lws(ICLKernel &kernel); + cl::NDRange find_optimal_lws(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs); std::unordered_map _lws_table; cl::Event _kernel_event; diff --git a/arm_compute/runtime/CL/ICLTuner.h b/arm_compute/runtime/CL/ICLTuner.h index 0b238180eb..4bc8ddf632 100644 --- a/arm_compute/runtime/CL/ICLTuner.h +++ b/arm_compute/runtime/CL/ICLTuner.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,6 +24,8 @@ #ifndef ARM_COMPUTE_ICLTUNER_H #define ARM_COMPUTE_ICLTUNER_H +#include "arm_compute/core/experimental/Types.h" + namespace arm_compute { class ICLKernel; @@ -49,6 +51,13 @@ public: * @param[in] kernel Kernel to tune */ virtual void tune_kernel_dynamic(ICLKernel &kernel) = 0; + /** Tune OpenCL kernel dynamically + * + * @param[in] kernel Kernel to tune + * @param[in] inputs Inputs for the kernel to use + * @param[in, out] outputs Outputs for the kernel to use + */ + virtual void tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs) = 0; }; -} +} // namespace arm_compute #endif /*ARM_COMPUTE_ICLTUNER_H */ diff --git a/arm_compute/runtime/CL/tuners/BifrostTuner.h b/arm_compute/runtime/CL/tuners/BifrostTuner.h index b7ce6e96f9..830f7d9067 100644 --- a/arm_compute/runtime/CL/tuners/BifrostTuner.h +++ b/arm_compute/runtime/CL/tuners/BifrostTuner.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -37,6 +37,7 @@ public: // Inherited overriden methods void tune_kernel_static(ICLKernel &kernel) override; void tune_kernel_dynamic(ICLKernel &kernel) override; + void tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs) override; }; } // namespace tuners } // namespace arm_compute diff --git a/arm_compute/runtime/CL/tuners/MidgardTuner.h b/arm_compute/runtime/CL/tuners/MidgardTuner.h index 418b80728d..c702e7a2aa 100644 --- a/arm_compute/runtime/CL/tuners/MidgardTuner.h +++ b/arm_compute/runtime/CL/tuners/MidgardTuner.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -37,6 +37,7 @@ public: // Inherited overriden methods void tune_kernel_static(ICLKernel &kernel) override; void tune_kernel_dynamic(ICLKernel &kernel) override; + void tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs) override; }; } // namespace tuners } // namespace arm_compute diff --git a/src/runtime/CL/CLScheduler.cpp b/src/runtime/CL/CLScheduler.cpp index 56f5f212a8..5ef66f456a 100644 --- a/src/runtime/CL/CLScheduler.cpp +++ b/src/runtime/CL/CLScheduler.cpp @@ -157,22 +157,16 @@ void CLScheduler::enqueue_common(ICLKernel &kernel, const InputTensorMap &inputs "The CLScheduler is not initialised yet! Please call the CLScheduler::get().default_init(), \ or CLScheduler::get()::init() and CLKernelLibrary::get()::init() function before running functions!"); + const bool inject_memory = !inputs.empty(); + // Tune the kernel if the CLTuner has been provided if(_cl_tuner != nullptr) { - // Tune the OpenCL kernel - _cl_tuner->tune_kernel_dynamic(kernel); + inject_memory ? _cl_tuner->tune_kernel_dynamic(kernel, inputs, outputs) : _cl_tuner->tune_kernel_dynamic(kernel); } // Run kernel - if(inputs.empty()) - { - kernel.run(kernel.window(), _queue); - } - else - { - kernel.run_op(inputs, outputs, kernel.window(), _queue); - } + inject_memory ? kernel.run_op(inputs, outputs, kernel.window(), _queue) : kernel.run(kernel.window(), _queue); if(flush) { diff --git a/src/runtime/CL/CLTuner.cpp b/src/runtime/CL/CLTuner.cpp index e3119c1db9..b2e3476e20 100644 --- a/src/runtime/CL/CLTuner.cpp +++ b/src/runtime/CL/CLTuner.cpp @@ -76,6 +76,11 @@ void CLTuner::tune_kernel_static(ICLKernel &kernel) } void CLTuner::tune_kernel_dynamic(ICLKernel &kernel) +{ + tune_kernel_dynamic(kernel, {}, {}); +} + +void CLTuner::tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs) { // Get the configuration ID from the kernel and append GPU target name and number of available compute units const std::string config_id = kernel.config_id() + "_" + string_from_target(kernel.get_target()) + "_MP" + support::cpp11::to_string(CLKernelLibrary::get().get_num_compute_units()); @@ -90,7 +95,7 @@ void CLTuner::tune_kernel_dynamic(ICLKernel &kernel) if(_tune_new_kernels) { // Find the optimal LWS for the kernel - cl::NDRange opt_lws = find_optimal_lws(kernel); + cl::NDRange opt_lws = find_optimal_lws(kernel, inputs, outputs); // Insert the optimal LWS in the table add_lws_to_table(config_id, opt_lws); @@ -112,7 +117,7 @@ void CLTuner::add_lws_to_table(const std::string &kernel_id, cl::NDRange optimal _lws_table.emplace(kernel_id, optimal_lws); } -cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel) +cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs) { // Profiling queue cl::CommandQueue queue_profiler; @@ -167,7 +172,8 @@ cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel) cl::NDRange gws = ICLKernel::gws_from_window(kernel.window()); // Run the kernel with default lws to be used as baseline - kernel.run(kernel.window(), queue_profiler); + const bool inject_memory = !inputs.empty(); + inject_memory ? kernel.run_op(inputs, outputs, kernel.window(), queue_profiler) : kernel.run(kernel.window(), queue_profiler); queue_profiler.finish(); @@ -178,7 +184,7 @@ cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel) cl::NDRange opt_lws = cl::NullRange; - //Construct the list of LWS values to be tested based on the tuner mode. + // Construct the list of LWS values to be tested based on the tuner mode. auto lws_list = cl_tuner::CLLWSListFactory::get_lws_list(_tuner_mode, gws); for(size_t i = 0; i < lws_list->size(); ++i) { @@ -197,7 +203,7 @@ cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel) kernel.set_lws_hint(lws_test); // Run the kernel - kernel.run(kernel.window(), queue_profiler); + inject_memory ? kernel.run_op(inputs, outputs, kernel.window(), queue_profiler) : kernel.run(kernel.window(), queue_profiler); queue_profiler.finish(); diff --git a/src/runtime/CL/tuners/BifrostTuner.cpp b/src/runtime/CL/tuners/BifrostTuner.cpp index 3fecd04455..1797c2ceb1 100644 --- a/src/runtime/CL/tuners/BifrostTuner.cpp +++ b/src/runtime/CL/tuners/BifrostTuner.cpp @@ -315,5 +315,10 @@ void BifrostTuner::tune_kernel_dynamic(ICLKernel &kernel) { ARM_COMPUTE_UNUSED(kernel); } + +void BifrostTuner::tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs) +{ + ARM_COMPUTE_UNUSED(kernel, inputs, outputs); +} } // namespace tuners } // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/CL/tuners/MidgardTuner.cpp b/src/runtime/CL/tuners/MidgardTuner.cpp index a95ca1998e..68c98cebe7 100644 --- a/src/runtime/CL/tuners/MidgardTuner.cpp +++ b/src/runtime/CL/tuners/MidgardTuner.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -73,5 +73,10 @@ void MidgardTuner::tune_kernel_dynamic(ICLKernel &kernel) { ARM_COMPUTE_UNUSED(kernel); } + +void MidgardTuner::tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs) +{ + ARM_COMPUTE_UNUSED(kernel, inputs, outputs); +} } // namespace tuners } // namespace arm_compute -- cgit v1.2.1