aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/runtime/IScheduler.h
diff options
context:
space:
mode:
Diffstat (limited to 'arm_compute/runtime/IScheduler.h')
-rw-r--r--arm_compute/runtime/IScheduler.h68
1 files changed, 60 insertions, 8 deletions
diff --git a/arm_compute/runtime/IScheduler.h b/arm_compute/runtime/IScheduler.h
index a5e20ee627..ae204c8560 100644
--- a/arm_compute/runtime/IScheduler.h
+++ b/arm_compute/runtime/IScheduler.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 ARM Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,6 +25,8 @@
#define ARM_COMPUTE_ISCHEDULER_H
#include "arm_compute/core/CPP/CPPTypes.h"
+#include "arm_compute/core/experimental/Types.h"
+#include "arm_compute/core/Types.h"
#include <functional>
#include <limits>
@@ -32,6 +34,8 @@
namespace arm_compute
{
class ICPPKernel;
+class ITensor;
+class Window;
/** Scheduler interface to run kernels */
class IScheduler
@@ -44,6 +48,13 @@ public:
DYNAMIC, /**< Split the workload dynamically using a bucket system */
};
+ /** Function to be used and map a given thread id to a logical core id
+ *
+ * Mapping function expects the thread index and total number of cores as input,
+ * and returns the logical core index to bind against
+ */
+ using BindFunc = std::function<int(int, int)>;
+
/** When arm_compute::ISchedular::Hints::_split_dimension is initialized with this value
* then the schedular is free to break down the problem space over as many dimensions
* as it wishes
@@ -116,9 +127,9 @@ public:
}
private:
- unsigned int _split_dimension;
- StrategyHint _strategy;
- int _threshold;
+ unsigned int _split_dimension{};
+ StrategyHint _strategy{};
+ int _threshold{};
};
/** Signature for the workloads to execute */
using Workload = std::function<void(const ThreadInfo &)>;
@@ -134,7 +145,14 @@ public:
*/
virtual void set_num_threads(unsigned int num_threads) = 0;
- /** Returns the number of threads that the SingleThreadScheduler has in his pool.
+ /** Sets the number of threads the scheduler will use to run the kernels but also using a binding function to pin the threads to given logical cores
+ *
+ * @param[in] num_threads If set to 0, then one thread per CPU core available on the system will be used, otherwise the number of threads specified.
+ * @param[in] func Binding function to use.
+ */
+ virtual void set_num_threads_with_affinity(unsigned int num_threads, BindFunc func);
+
+ /** Returns the number of threads that the SingleThreadScheduler has in its pool.
*
* @return Number of threads available in SingleThreadScheduler.
*/
@@ -147,11 +165,20 @@ public:
*/
virtual void schedule(ICPPKernel *kernel, const Hints &hints) = 0;
+ /** Runs the kernel in the same thread as the caller synchronously.
+ *
+ * @param[in] kernel Kernel to execute.
+ * @param[in] hints Hints for the scheduler.
+ * @param[in] window Window to use for kernel execution.
+ * @param[in] tensors Vector containing the tensors to operate on.
+ */
+ virtual void schedule_op(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors) = 0;
+
/** Execute all the passed workloads
*
- * @note there is no guarantee regarding the order in which the workloads will be executed or whether or not they will be executed in parallel.
+ * @note There is no guarantee regarding the order in which the workloads will be executed or whether or not they will be executed in parallel.
*
- * @param[in] workloads Array of workloads to run
+ * @param[in] workloads List of workloads to run
* @param[in] tag String that can be used by profiling tools to identify the workloads run by the scheduler (Can be null).
*/
virtual void run_tagged_workloads(std::vector<Workload> &workloads, const char *tag);
@@ -178,7 +205,32 @@ protected:
* @param[in] workloads Array of workloads to run
*/
virtual void run_workloads(std::vector<Workload> &workloads) = 0;
- CPUInfo _cpu_info;
+
+ /** Common scheduler logic to execute the given kernel
+ *
+ * @param[in] kernel Kernel to execute.
+ * @param[in] hints Hints for the scheduler.
+ * @param[in] window Window to use for kernel execution.
+ * @param[in] tensors Vector containing the tensors to operate on.
+ */
+ void schedule_common(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors);
+
+ /** Adjust the number of windows to the optimize performance
+ * (used for small workloads where smaller number of threads might improve the performance)
+ *
+ * @param[in] window Window to use for kernel execution
+ * @param[in] split_dimension Axis of dimension to split
+ * @param[in] init_num_windows Initial number of sub-windows to split
+ * @param[in] kernel Kernel to execute
+ * @param[in] cpu_info The CPU platform used to create the context.
+ *
+ * @return Adjusted number of windows
+ */
+ std::size_t adjust_num_of_windows(const Window &window,
+ std::size_t split_dimension,
+ std::size_t init_num_windows,
+ const ICPPKernel &kernel,
+ const CPUInfo &cpu_info);
private:
unsigned int _num_threads_hint = {};