diff options
author | Pablo Marquez Tello <pablo.tello@arm.com> | 2023-12-05 15:44:50 +0000 |
---|---|---|
committer | Pablo Marquez Tello <pablo.tello@arm.com> | 2023-12-06 15:05:15 +0000 |
commit | 17e116e90e6b962a09c133c646b6ad7884e94693 (patch) | |
tree | 7c0bba172a510fb0793d2052b3ce5f1d3174abc9 /src | |
parent | fadc9b1e0bba90d6a91beb65466b2a0895b3a5e4 (diff) | |
download | ComputeLibrary-17e116e90e6b962a09c133c646b6ad7884e94693.tar.gz |
Revert "thread_local _custom_scheduler"
This reverts commit ded5b182675e3166e947a8eb637b5b1e925816ab.
Resolves COMPMID-6735
Signed-off-by: Pablo Marquez Tello <pablo.tello@arm.com>
Change-Id: I9b69ca1ec80a671171d3f52081c4b8c61a676617
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10838
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: <felixjohnny.thomasmathibalan@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp | 4 | ||||
-rw-r--r-- | src/cpu/operators/CpuPool2d.cpp | 4 | ||||
-rw-r--r-- | src/cpu/operators/CpuWinogradConv2d.cpp | 6 | ||||
-rw-r--r-- | src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp | 24 | ||||
-rw-r--r-- | src/runtime/Scheduler.cpp | 32 |
5 files changed, 22 insertions, 48 deletions
diff --git a/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp b/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp index 8507c59e6b..8d3741de96 100644 --- a/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp +++ b/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp @@ -60,8 +60,8 @@ void CpuDepthwiseConv2dAssemblyDispatch::configure(const ITensorInfo *src, const ConvolutionInfo &info) { ARM_COMPUTE_LOG_PARAMS(src, weights, bias, dst, info); - const CPUInfo &ci = CPUInfo::get(); - const unsigned int num_threads = NEScheduler::num_threads(); + const CPUInfo &ci = NEScheduler::get().cpu_info(); + const unsigned int num_threads = NEScheduler::get().num_threads(); _pImpl->is_prepared = false; _pImpl->are_weights_const = weights->are_values_constant(); diff --git a/src/cpu/operators/CpuPool2d.cpp b/src/cpu/operators/CpuPool2d.cpp index d00efd191d..b72bde6978 100644 --- a/src/cpu/operators/CpuPool2d.cpp +++ b/src/cpu/operators/CpuPool2d.cpp @@ -69,8 +69,8 @@ void CpuPool2d::configure(ITensorInfo *src, ITensorInfo *dst, const PoolingLayer if (run_optimised) { - const CPUInfo &ci = CPUInfo::get(); - const unsigned int num_threads = NEScheduler::num_threads(); + const CPUInfo &ci = NEScheduler::get().cpu_info(); + const unsigned int num_threads = NEScheduler::get().num_threads(); auto pooling_wrapper = std::make_unique<kernels::CpuPool2dAssemblyWrapperKernel>(); ARM_COMPUTE_ERROR_ON(pooling_wrapper == nullptr); diff --git a/src/cpu/operators/CpuWinogradConv2d.cpp b/src/cpu/operators/CpuWinogradConv2d.cpp index 1fb6d33a61..e4bcdc0b64 100644 --- a/src/cpu/operators/CpuWinogradConv2d.cpp +++ b/src/cpu/operators/CpuWinogradConv2d.cpp @@ -103,7 +103,7 @@ bool get_winograd_kernel_implementation(const ITensorInfo Tensor4DShape in_shape{internal_get_shape(src)}; Tensor4DShape out_shape{internal_get_shape(dst)}; Tensor4DShape kernel_shape{internal_get_shape(weights)}; - uint32_t nthreads = NEScheduler::num_threads(); + uint32_t nthreads = NEScheduler::get().num_threads(); // Get configuration arguments for Winograd winograd_cfg.output_rows = 0; winograd_cfg.output_cols = 0; @@ -183,7 +183,7 @@ void CpuWinogradConv2d::configure(const ITensorInfo *src, ARM_COMPUTE_LOG_PARAMS(src, weights, biases, dst, conv_info, act_info, enable_fast_math); ARM_COMPUTE_UNUSED(biases); const DataType data_type = src->data_type(); - uint32_t nthreads = NEScheduler::num_threads(); + uint32_t nthreads = NEScheduler::get().num_threads(); _data_layout = src->data_layout(); const Tensor4DShape kernel_shape{internal_get_shape(weights)}; @@ -361,7 +361,7 @@ void CpuWinogradConv2d::run(ITensorPack &tensors) auto output = tensors.get_tensor(ACL_DST); Window win; - const uint32_t nthreads = NEScheduler::num_threads(); + const uint32_t nthreads = NEScheduler::get().num_threads(); // The Winograd transform implementation does fine-grain threading inside the transforms. Just pass thread_id and nthreads. win.set(Window::DimX, Window::Dimension(0, nthreads, 1)); diff --git a/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp b/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp index 7f851aa755..611bc76463 100644 --- a/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp +++ b/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp @@ -579,8 +579,9 @@ void Fallback<TypeInput, TypeOutput, OutputStage>::prepare(ITensorPack &tensors) CpuAuxTensorHandler pretranspose(offset_int_vec(Pretranspose), _pretranspose_info, tensors, false); ARM_COMPUTE_ERROR_ON(pretranspose.get()->buffer() == nullptr); - run_parallel_pretranspose_B_array<TypeInput, TypeOutput>( - _gemm_kernel_asm.get(), pretranspose.get(), in1_ptr, ldb, multi_stride_b, NEScheduler::num_threads()); + run_parallel_pretranspose_B_array<TypeInput, TypeOutput>(_gemm_kernel_asm.get(), pretranspose.get(), + in1_ptr, ldb, multi_stride_b, + NEScheduler::get().num_threads()); b->mark_as_unused(); // Note that we don't need to mark b_to_use as unused, as if it's been assigned to pre_pretransposed_b, its memory will be auto-managed by the handler @@ -690,8 +691,9 @@ void Fallback<TypeInput, TypeOutput, OutputStage>::run(ITensorPack &tensors) } else { - run_parallel_pretranspose_B_array<TypeInput, TypeOutput>( - _gemm_kernel_asm.get(), pretranspose.get(), b_ptr, ldb, multi_stride_b, NEScheduler::num_threads()); + run_parallel_pretranspose_B_array<TypeInput, TypeOutput>(_gemm_kernel_asm.get(), pretranspose.get(), + b_ptr, ldb, multi_stride_b, + NEScheduler::get().num_threads()); } } } @@ -705,7 +707,7 @@ void Fallback<TypeInput, TypeOutput, OutputStage>::run(ITensorPack &tensors) _gemm_kernel_asm->set_working_space(reinterpret_cast<void *>(workspace.get()->buffer())); const unsigned int split_dim = scheduling_hint.split_dimension(); const unsigned int window_size = _gemm_kernel_asm->get_window_size().total_size(); - unsigned int num_threads = NEScheduler::num_threads(); + unsigned int num_threads = NEScheduler::get().num_threads(); if (window_size < num_threads) { num_threads = window_size; @@ -754,8 +756,8 @@ void create_arm_gemm(std::unique_ptr<CpuGemmAssemblyDispatch::IFallback> &arm_ge const AsmGemmInfo &info) { Params p = extract_parameters(a, b, d, info); - const CPUInfo &ci = CPUInfo::get(); - unsigned int num_threads = NEScheduler::num_threads(); + const CPUInfo &ci = NEScheduler::get().cpu_info(); + unsigned int num_threads = NEScheduler::get().num_threads(); arm_gemm::GemmConfig cfg; cfg.weight_format = assembly_utils::map_to_arm_gemm_weight_format(info.weight_format); @@ -779,8 +781,8 @@ void create_arm_gemm_quant(std::unique_ptr<CpuGemmAssemblyDispatch::IFallback> & { ARM_COMPUTE_UNUSED(activation); Params p = extract_parameters(a, b, d, info); - const CPUInfo &ci = CPUInfo::get(); - const unsigned int num_threads = NEScheduler::num_threads(); + const CPUInfo &ci = NEScheduler::get().cpu_info(); + const unsigned int num_threads = NEScheduler::get().num_threads(); arm_gemm::GemmConfig cfg; cfg.weight_format = assembly_utils::map_to_arm_gemm_weight_format(info.weight_format); @@ -834,8 +836,8 @@ Status CpuGemmAssemblyDispatch::has_opt_impl(arm_compute::WeightFormat &expected ARM_COMPUTE_UNUSED(c); arm_gemm::Activation act = assembly_utils::map_to_arm_gemm_activation(info.activation_info); Params p = extract_parameters(a, b, d, info); - const CPUInfo &ci = CPUInfo::get(); - unsigned int num_threads = NEScheduler::num_threads(); + const CPUInfo &ci = NEScheduler::get().cpu_info(); + unsigned int num_threads = NEScheduler::get().num_threads(); arm_gemm::GemmConfig cfg; cfg.weight_format = assembly_utils::map_to_arm_gemm_weight_format(info.weight_format); arm_gemm::WeightFormat arm_gemm_expected_wf = assembly_utils::map_to_arm_gemm_weight_format(expected_weight_format); diff --git a/src/runtime/Scheduler.cpp b/src/runtime/Scheduler.cpp index 6d961f29a5..3f1e96968a 100644 --- a/src/runtime/Scheduler.cpp +++ b/src/runtime/Scheduler.cpp @@ -1,5 +1,5 @@ /* -* Copyright (c) 2017-2020, 2023 Arm Limited. + * Copyright (c) 2017-2020, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -40,18 +40,14 @@ using namespace arm_compute; #if !ARM_COMPUTE_CPP_SCHEDULER && ARM_COMPUTE_OPENMP_SCHEDULER Scheduler::Type Scheduler::_scheduler_type = Scheduler::Type::OMP; #elif ARM_COMPUTE_CPP_SCHEDULER && !ARM_COMPUTE_OPENMP_SCHEDULER -Scheduler::Type Scheduler::_scheduler_type = Scheduler::Type::CPP; +Scheduler::Type Scheduler::_scheduler_type = Scheduler::Type::CPP; #elif ARM_COMPUTE_CPP_SCHEDULER && ARM_COMPUTE_OPENMP_SCHEDULER Scheduler::Type Scheduler::_scheduler_type = Scheduler::Type::CPP; #else /* ARM_COMPUTE_*_SCHEDULER */ Scheduler::Type Scheduler::_scheduler_type = Scheduler::Type::ST; #endif /* ARM_COMPUTE_*_SCHEDULER */ -#ifndef ARM_COMPUTE_THREAD_LOCAL_SCHEDULER std::shared_ptr<IScheduler> Scheduler::_custom_scheduler = nullptr; -#else // ARM_COMPUTE_THREAD_LOCAL_SCHEDULER -std::shared_ptr<IScheduler> thread_local Scheduler::_custom_scheduler = nullptr; -#endif // ARM_COMPUTE_THREAD_LOCAL_SCHEDULER namespace { @@ -78,30 +74,6 @@ void Scheduler::set(Type t) _scheduler_type = t; } -bool Scheduler::is_set() -{ - if (_scheduler_type == Type::CUSTOM) - { - return _custom_scheduler != nullptr; - } - else - { - return !_schedulers.empty(); - } -} - -unsigned int Scheduler::num_threads() -{ - if (Scheduler::is_set()) - { - return Scheduler::get().num_threads(); - } - else - { - return CPUInfo::get().get_cpu_num(); - } -} - bool Scheduler::is_available(Type t) { if (t == Type::CUSTOM) |