diff options
author | David Svantesson <david.svantesson@arm.com> | 2023-08-02 14:23:00 +0000 |
---|---|---|
committer | David Svantesson <david.svantesson-yeung@arm.com> | 2023-11-24 15:03:05 +0000 |
commit | ded5b182675e3166e947a8eb637b5b1e925816ab (patch) | |
tree | c7bc082e7e8d80f0d33c69fc903be9dcb342e7f3 /src/cpu/operators/CpuWinogradConv2d.cpp | |
parent | e30c8740d2da7af52ae1320f4d597ffc73d41c5e (diff) | |
download | ComputeLibrary-ded5b182675e3166e947a8eb637b5b1e925816ab.tar.gz |
thread_local _custom_scheduler
Resolves ONCPUML-1331
This patch adds an option to make _custom_scheduler thread_local to
support usage of multiple schedulers handled outside of ACL.
It also adds num_threads() function to Scheduler which reverts to
querying CPUInfo if no scheduler has been set.
Change-Id: Iff706165d8d091895331a5bb3a76f6cabe048912
Signed-off-by: David Svantesson-Yeung <david.svantesson-yeung@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10748
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: SiCong Li <sicong.li@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/cpu/operators/CpuWinogradConv2d.cpp')
-rw-r--r-- | src/cpu/operators/CpuWinogradConv2d.cpp | 6 |
1 files changed, 3 insertions, 3 deletions
diff --git a/src/cpu/operators/CpuWinogradConv2d.cpp b/src/cpu/operators/CpuWinogradConv2d.cpp index e4bcdc0b64..1fb6d33a61 100644 --- a/src/cpu/operators/CpuWinogradConv2d.cpp +++ b/src/cpu/operators/CpuWinogradConv2d.cpp @@ -103,7 +103,7 @@ bool get_winograd_kernel_implementation(const ITensorInfo Tensor4DShape in_shape{internal_get_shape(src)}; Tensor4DShape out_shape{internal_get_shape(dst)}; Tensor4DShape kernel_shape{internal_get_shape(weights)}; - uint32_t nthreads = NEScheduler::get().num_threads(); + uint32_t nthreads = NEScheduler::num_threads(); // Get configuration arguments for Winograd winograd_cfg.output_rows = 0; winograd_cfg.output_cols = 0; @@ -183,7 +183,7 @@ void CpuWinogradConv2d::configure(const ITensorInfo *src, ARM_COMPUTE_LOG_PARAMS(src, weights, biases, dst, conv_info, act_info, enable_fast_math); ARM_COMPUTE_UNUSED(biases); const DataType data_type = src->data_type(); - uint32_t nthreads = NEScheduler::get().num_threads(); + uint32_t nthreads = NEScheduler::num_threads(); _data_layout = src->data_layout(); const Tensor4DShape kernel_shape{internal_get_shape(weights)}; @@ -361,7 +361,7 @@ void CpuWinogradConv2d::run(ITensorPack &tensors) auto output = tensors.get_tensor(ACL_DST); Window win; - const uint32_t nthreads = NEScheduler::get().num_threads(); + const uint32_t nthreads = NEScheduler::num_threads(); // The Winograd transform implementation does fine-grain threading inside the transforms. Just pass thread_id and nthreads. win.set(Window::DimX, Window::Dimension(0, nthreads, 1)); |