1 files changed, 60 insertions, 8 deletions
diff --git a/arm_compute/runtime/IScheduler.h b/arm_compute/runtime/IScheduler.h
index a5e20ee627..ae204c8560 100644
--- a/arm_compute/runtime/IScheduler.h
+++ b/arm_compute/runtime/IScheduler.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 ARM Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,6 +25,8 @@
 #define ARM_COMPUTE_ISCHEDULER_H
 
 #include "arm_compute/core/CPP/CPPTypes.h"
+#include "arm_compute/core/experimental/Types.h"
+#include "arm_compute/core/Types.h"
 
 #include <functional>
 #include <limits>
@@ -32,6 +34,8 @@
 namespace arm_compute
 {
 class ICPPKernel;
+class ITensor;
+class Window;
 
 /** Scheduler interface to run kernels */
 class IScheduler
@@ -44,6 +48,13 @@ public:
         DYNAMIC, /**< Split the workload dynamically using a bucket system */
     };
 
+    /** Function to be used and map a given thread id to a logical core id
+     *
+     * Mapping function expects the thread index and total number of cores as input,
+     * and returns the logical core index to bind against
+     */
+    using BindFunc = std::function<int(int, int)>;
+
     /** When arm_compute::ISchedular::Hints::_split_dimension is initialized with this value
      * then the schedular is free to break down the problem space over as many dimensions
      * as it wishes
@@ -116,9 +127,9 @@ public:
         }
 
     private:
-        unsigned int _split_dimension;
-        StrategyHint _strategy;
-        int          _threshold;
+        unsigned int _split_dimension{};
+        StrategyHint _strategy{};
+        int          _threshold{};
     };
     /** Signature for the workloads to execute */
     using Workload = std::function<void(const ThreadInfo &)>;
@@ -134,7 +145,14 @@ public:
      */
     virtual void set_num_threads(unsigned int num_threads) = 0;
 
-    /** Returns the number of threads that the SingleThreadScheduler has in his pool.
+    /** Sets the number of threads the scheduler will use to run the kernels but also using a binding function to pin the threads to given logical cores
+     *
+     * @param[in] num_threads If set to 0, then one thread per CPU core available on the system will be used, otherwise the number of threads specified.
+     * @param[in] func        Binding function to use.
+     */
+    virtual void set_num_threads_with_affinity(unsigned int num_threads, BindFunc func);
+
+    /** Returns the number of threads that the SingleThreadScheduler has in its pool.
      *
      * @return Number of threads available in SingleThreadScheduler.
      */
@@ -147,11 +165,20 @@ public:
      */
     virtual void schedule(ICPPKernel *kernel, const Hints &hints) = 0;
 
+    /** Runs the kernel in the same thread as the caller synchronously.
+     *
+     * @param[in] kernel  Kernel to execute.
+     * @param[in] hints   Hints for the scheduler.
+     * @param[in] window  Window to use for kernel execution.
+     * @param[in] tensors Vector containing the tensors to operate on.
+     */
+    virtual void schedule_op(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors) = 0;
+
     /** Execute all the passed workloads
      *
-     * @note there is no guarantee regarding the order in which the workloads will be executed or whether or not they will be executed in parallel.
+     * @note There is no guarantee regarding the order in which the workloads will be executed or whether or not they will be executed in parallel.
      *
-     * @param[in] workloads Array of workloads to run
+     * @param[in] workloads List of workloads to run
      * @param[in] tag       String that can be used by profiling tools to identify the workloads run by the scheduler (Can be null).
      */
     virtual void run_tagged_workloads(std::vector<Workload> &workloads, const char *tag);
@@ -178,7 +205,32 @@ protected:
      * @param[in] workloads Array of workloads to run
      */
     virtual void run_workloads(std::vector<Workload> &workloads) = 0;
-    CPUInfo _cpu_info;
+
+    /** Common scheduler logic to execute the given kernel
+     *
+     * @param[in] kernel  Kernel to execute.
+     * @param[in] hints   Hints for the scheduler.
+     * @param[in] window  Window to use for kernel execution.
+     * @param[in] tensors Vector containing the tensors to operate on.
+     */
+    void schedule_common(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors);
+
+    /** Adjust the number of windows to the optimize performance
+     * (used for small workloads where smaller number of threads might improve the performance)
+     *
+     * @param[in] window           Window to use for kernel execution
+     * @param[in] split_dimension  Axis of dimension to split
+     * @param[in] init_num_windows Initial number of sub-windows to split
+     * @param[in] kernel           Kernel to execute
+     * @param[in] cpu_info         The CPU platform used to create the context.
+     *
+     * @return Adjusted number of windows
+     */
+    std::size_t adjust_num_of_windows(const Window     &window,
+                                      std::size_t       split_dimension,
+                                      std::size_t       init_num_windows,
+                                      const ICPPKernel &kernel,
+                                      const CPUInfo    &cpu_info);
 
 private:
     unsigned int _num_threads_hint = {};