aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arm_compute/runtime/CL/CLTuner.h5
-rw-r--r--arm_compute/runtime/CL/ICLTuner.h13
-rw-r--r--arm_compute/runtime/CL/tuners/BifrostTuner.h3
-rw-r--r--arm_compute/runtime/CL/tuners/MidgardTuner.h3
-rw-r--r--src/runtime/CL/CLScheduler.cpp14
-rw-r--r--src/runtime/CL/CLTuner.cpp16
-rw-r--r--src/runtime/CL/tuners/BifrostTuner.cpp5
-rw-r--r--src/runtime/CL/tuners/MidgardTuner.cpp7
8 files changed, 44 insertions, 22 deletions
diff --git a/arm_compute/runtime/CL/CLTuner.h b/arm_compute/runtime/CL/CLTuner.h
index 745d57a959..aa31181d2d 100644
--- a/arm_compute/runtime/CL/CLTuner.h
+++ b/arm_compute/runtime/CL/CLTuner.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -116,6 +116,7 @@ public:
// Inherited methods overridden:
void tune_kernel_static(ICLKernel &kernel) override;
void tune_kernel_dynamic(ICLKernel &kernel) override;
+ void tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs) override;
/** Is the kernel_event set ?
*
@@ -130,7 +131,7 @@ private:
*
* @return The optimal LWS to use
*/
- cl::NDRange find_optimal_lws(ICLKernel &kernel);
+ cl::NDRange find_optimal_lws(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs);
std::unordered_map<std::string, cl::NDRange> _lws_table;
cl::Event _kernel_event;
diff --git a/arm_compute/runtime/CL/ICLTuner.h b/arm_compute/runtime/CL/ICLTuner.h
index 0b238180eb..4bc8ddf632 100644
--- a/arm_compute/runtime/CL/ICLTuner.h
+++ b/arm_compute/runtime/CL/ICLTuner.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,6 +24,8 @@
#ifndef ARM_COMPUTE_ICLTUNER_H
#define ARM_COMPUTE_ICLTUNER_H
+#include "arm_compute/core/experimental/Types.h"
+
namespace arm_compute
{
class ICLKernel;
@@ -49,6 +51,13 @@ public:
* @param[in] kernel Kernel to tune
*/
virtual void tune_kernel_dynamic(ICLKernel &kernel) = 0;
+ /** Tune OpenCL kernel dynamically
+ *
+ * @param[in] kernel Kernel to tune
+ * @param[in] inputs Inputs for the kernel to use
+ * @param[in, out] outputs Outputs for the kernel to use
+ */
+ virtual void tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs) = 0;
};
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_ICLTUNER_H */
diff --git a/arm_compute/runtime/CL/tuners/BifrostTuner.h b/arm_compute/runtime/CL/tuners/BifrostTuner.h
index b7ce6e96f9..830f7d9067 100644
--- a/arm_compute/runtime/CL/tuners/BifrostTuner.h
+++ b/arm_compute/runtime/CL/tuners/BifrostTuner.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -37,6 +37,7 @@ public:
// Inherited overriden methods
void tune_kernel_static(ICLKernel &kernel) override;
void tune_kernel_dynamic(ICLKernel &kernel) override;
+ void tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs) override;
};
} // namespace tuners
} // namespace arm_compute
diff --git a/arm_compute/runtime/CL/tuners/MidgardTuner.h b/arm_compute/runtime/CL/tuners/MidgardTuner.h
index 418b80728d..c702e7a2aa 100644
--- a/arm_compute/runtime/CL/tuners/MidgardTuner.h
+++ b/arm_compute/runtime/CL/tuners/MidgardTuner.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -37,6 +37,7 @@ public:
// Inherited overriden methods
void tune_kernel_static(ICLKernel &kernel) override;
void tune_kernel_dynamic(ICLKernel &kernel) override;
+ void tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs) override;
};
} // namespace tuners
} // namespace arm_compute
diff --git a/src/runtime/CL/CLScheduler.cpp b/src/runtime/CL/CLScheduler.cpp
index 56f5f212a8..5ef66f456a 100644
--- a/src/runtime/CL/CLScheduler.cpp
+++ b/src/runtime/CL/CLScheduler.cpp
@@ -157,22 +157,16 @@ void CLScheduler::enqueue_common(ICLKernel &kernel, const InputTensorMap &inputs
"The CLScheduler is not initialised yet! Please call the CLScheduler::get().default_init(), \
or CLScheduler::get()::init() and CLKernelLibrary::get()::init() function before running functions!");
+ const bool inject_memory = !inputs.empty();
+
// Tune the kernel if the CLTuner has been provided
if(_cl_tuner != nullptr)
{
- // Tune the OpenCL kernel
- _cl_tuner->tune_kernel_dynamic(kernel);
+ inject_memory ? _cl_tuner->tune_kernel_dynamic(kernel, inputs, outputs) : _cl_tuner->tune_kernel_dynamic(kernel);
}
// Run kernel
- if(inputs.empty())
- {
- kernel.run(kernel.window(), _queue);
- }
- else
- {
- kernel.run_op(inputs, outputs, kernel.window(), _queue);
- }
+ inject_memory ? kernel.run_op(inputs, outputs, kernel.window(), _queue) : kernel.run(kernel.window(), _queue);
if(flush)
{
diff --git a/src/runtime/CL/CLTuner.cpp b/src/runtime/CL/CLTuner.cpp
index e3119c1db9..b2e3476e20 100644
--- a/src/runtime/CL/CLTuner.cpp
+++ b/src/runtime/CL/CLTuner.cpp
@@ -77,6 +77,11 @@ void CLTuner::tune_kernel_static(ICLKernel &kernel)
void CLTuner::tune_kernel_dynamic(ICLKernel &kernel)
{
+ tune_kernel_dynamic(kernel, {}, {});
+}
+
+void CLTuner::tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs)
+{
// Get the configuration ID from the kernel and append GPU target name and number of available compute units
const std::string config_id = kernel.config_id() + "_" + string_from_target(kernel.get_target()) + "_MP" + support::cpp11::to_string(CLKernelLibrary::get().get_num_compute_units());
@@ -90,7 +95,7 @@ void CLTuner::tune_kernel_dynamic(ICLKernel &kernel)
if(_tune_new_kernels)
{
// Find the optimal LWS for the kernel
- cl::NDRange opt_lws = find_optimal_lws(kernel);
+ cl::NDRange opt_lws = find_optimal_lws(kernel, inputs, outputs);
// Insert the optimal LWS in the table
add_lws_to_table(config_id, opt_lws);
@@ -112,7 +117,7 @@ void CLTuner::add_lws_to_table(const std::string &kernel_id, cl::NDRange optimal
_lws_table.emplace(kernel_id, optimal_lws);
}
-cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel)
+cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs)
{
// Profiling queue
cl::CommandQueue queue_profiler;
@@ -167,7 +172,8 @@ cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel)
cl::NDRange gws = ICLKernel::gws_from_window(kernel.window());
// Run the kernel with default lws to be used as baseline
- kernel.run(kernel.window(), queue_profiler);
+ const bool inject_memory = !inputs.empty();
+ inject_memory ? kernel.run_op(inputs, outputs, kernel.window(), queue_profiler) : kernel.run(kernel.window(), queue_profiler);
queue_profiler.finish();
@@ -178,7 +184,7 @@ cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel)
cl::NDRange opt_lws = cl::NullRange;
- //Construct the list of LWS values to be tested based on the tuner mode.
+ // Construct the list of LWS values to be tested based on the tuner mode.
auto lws_list = cl_tuner::CLLWSListFactory::get_lws_list(_tuner_mode, gws);
for(size_t i = 0; i < lws_list->size(); ++i)
{
@@ -197,7 +203,7 @@ cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel)
kernel.set_lws_hint(lws_test);
// Run the kernel
- kernel.run(kernel.window(), queue_profiler);
+ inject_memory ? kernel.run_op(inputs, outputs, kernel.window(), queue_profiler) : kernel.run(kernel.window(), queue_profiler);
queue_profiler.finish();
diff --git a/src/runtime/CL/tuners/BifrostTuner.cpp b/src/runtime/CL/tuners/BifrostTuner.cpp
index 3fecd04455..1797c2ceb1 100644
--- a/src/runtime/CL/tuners/BifrostTuner.cpp
+++ b/src/runtime/CL/tuners/BifrostTuner.cpp
@@ -315,5 +315,10 @@ void BifrostTuner::tune_kernel_dynamic(ICLKernel &kernel)
{
ARM_COMPUTE_UNUSED(kernel);
}
+
+void BifrostTuner::tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs)
+{
+ ARM_COMPUTE_UNUSED(kernel, inputs, outputs);
+}
} // namespace tuners
} // namespace arm_compute \ No newline at end of file
diff --git a/src/runtime/CL/tuners/MidgardTuner.cpp b/src/runtime/CL/tuners/MidgardTuner.cpp
index a95ca1998e..68c98cebe7 100644
--- a/src/runtime/CL/tuners/MidgardTuner.cpp
+++ b/src/runtime/CL/tuners/MidgardTuner.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -73,5 +73,10 @@ void MidgardTuner::tune_kernel_dynamic(ICLKernel &kernel)
{
ARM_COMPUTE_UNUSED(kernel);
}
+
+void MidgardTuner::tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs)
+{
+ ARM_COMPUTE_UNUSED(kernel, inputs, outputs);
+}
} // namespace tuners
} // namespace arm_compute