aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPablo Marquez Tello <pablo.tello@arm.com>2023-12-05 15:44:50 +0000
committerPablo Marquez Tello <pablo.tello@arm.com>2023-12-06 15:05:15 +0000
commit17e116e90e6b962a09c133c646b6ad7884e94693 (patch)
tree7c0bba172a510fb0793d2052b3ce5f1d3174abc9
parentfadc9b1e0bba90d6a91beb65466b2a0895b3a5e4 (diff)
downloadComputeLibrary-17e116e90e6b962a09c133c646b6ad7884e94693.tar.gz
Revert "thread_local _custom_scheduler"
This reverts commit ded5b182675e3166e947a8eb637b5b1e925816ab. Resolves COMPMID-6735 Signed-off-by: Pablo Marquez Tello <pablo.tello@arm.com> Change-Id: I9b69ca1ec80a671171d3f52081c4b8c61a676617 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10838 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: <felixjohnny.thomasmathibalan@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--.bazelrc1
-rw-r--r--BUILD.bazel16
-rw-r--r--arm_compute/runtime/Scheduler.h20
-rw-r--r--cmake/Options.cmake4
-rw-r--r--src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp4
-rw-r--r--src/cpu/operators/CpuPool2d.cpp4
-rw-r--r--src/cpu/operators/CpuWinogradConv2d.cpp6
-rw-r--r--src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp24
-rw-r--r--src/runtime/Scheduler.cpp32
9 files changed, 25 insertions, 86 deletions
diff --git a/.bazelrc b/.bazelrc
index f74649d731..1dbbedc8bd 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -39,4 +39,3 @@ build --flag_alias=cppthreads=//:cppthreads
build --flag_alias=enable_bf16_validation=//:enable_bf16_validation
build --flag_alias=enable_sve_validation=//:enable_sve_validation
build --flag_alias=arch=//:arch
-build --flag_alias=thread_local_scheduler=//:thread_local_scheduler
diff --git a/BUILD.bazel b/BUILD.bazel
index 50340c6c39..3a7d941a0e 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -78,12 +78,6 @@ bool_flag(
visibility = ["//visibility:public"],
)
-bool_flag(
- name = "thread_local_scheduler",
- build_setting_default = False,
- visibility = ["//visibility:public"],
-)
-
string_flag(
name = "arch",
build_setting_default = "armv8-a",
@@ -158,12 +152,6 @@ config_setting(
}
)
-config_setting(
- name = "thread_local_scheduler_flag",
- flag_values = {
- ":thread_local_scheduler": "true",
- },
-)
#---------------------------------------------------------------------
# Common defines used for all targets
@@ -208,10 +196,6 @@ cc_library(
"//:arch_armv8-a": [],
"//:arch_armv8.2-a+fp16": ["ENABLE_FP16_KERNELS", "ARM_COMPUTE_ENABLE_FP16"],
"//conditions:default": [],
- }) +
- select({
- "//:thread_local_scheduler_flag": ["ARM_COMPUTE_THREAD_LOCAL_SCHEDULER"],
- "//conditions:default": [],
}),
visibility = ["//visibility:public"],
)
diff --git a/arm_compute/runtime/Scheduler.h b/arm_compute/runtime/Scheduler.h
index 481e5e9b60..7c83f86caa 100644
--- a/arm_compute/runtime/Scheduler.h
+++ b/arm_compute/runtime/Scheduler.h
@@ -1,5 +1,5 @@
/*
-* Copyright (c) 2017-2019, 2023 Arm Limited.
+ * Copyright (c) 2017-2019, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -72,24 +72,10 @@ public:
* @return true if the given scheduler type is supported. False otherwise.
*/
static bool is_available(Type t);
- /** Returns true if a scheduler has been set.
- *
- * @return true if a scheduler has been set. False otherwise.
- */
- static bool is_set();
- /** Returns number of threads from scheduler if scheduler is set, otherwise queries CPUInfo.
- *
- * @return number of threads from scheduler if scheduler is set, otherwise queries CPUInfo.
- */
- static unsigned int num_threads();
private:
- static Type _scheduler_type;
-#ifndef ARM_COMPUTE_THREAD_LOCAL_SCHEDULER
- static std::shared_ptr<IScheduler> _custom_scheduler;
-#else // ARM_COMPUTE_THREAD_LOCAL_SCHEDULER
- static std::shared_ptr<IScheduler> thread_local _custom_scheduler;
-#endif // ARM_COMPUTE_THREAD_LOCAL_SCHEDULER
+ static Type _scheduler_type;
+ static std::shared_ptr<IScheduler> _custom_scheduler;
static std::map<Type, std::unique_ptr<IScheduler>> _schedulers;
Scheduler();
diff --git a/cmake/Options.cmake b/cmake/Options.cmake
index 722c55c90a..e5c8cb8efe 100644
--- a/cmake/Options.cmake
+++ b/cmake/Options.cmake
@@ -65,7 +65,6 @@ option(ENABLE_NCHW_KERNELS "" ON)
option(ARM_COMPUTE_GRAPH_ENABLED "" ON)
option(ARM_COMPUTE_ENABLE_SVEF32MM "" ON)
option(ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS "" ON)
-option(ARM_COMPUTE_THREAD_LOCAL_SCHEDULER "" OFF)
option(ENABLE_FP16_KERNELS "" OFF)
option(ARM_COMPUTE_ENABLE_FP16 "" OFF)
@@ -117,7 +116,4 @@ endif()
if(ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS)
add_definitions(-DARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS)
endif()
-if(ARM_COMPUTE_THREAD_LOCAL_SCHEDULER)
- add_definitions(-DARM_COMPUTE_THREAD_LOCAL_SCHEDULER)
-endif()
add_definitions(-D_GLIBCXX_USE_NANOSLEEP)
diff --git a/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp b/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp
index 8507c59e6b..8d3741de96 100644
--- a/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp
+++ b/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp
@@ -60,8 +60,8 @@ void CpuDepthwiseConv2dAssemblyDispatch::configure(const ITensorInfo *src,
const ConvolutionInfo &info)
{
ARM_COMPUTE_LOG_PARAMS(src, weights, bias, dst, info);
- const CPUInfo &ci = CPUInfo::get();
- const unsigned int num_threads = NEScheduler::num_threads();
+ const CPUInfo &ci = NEScheduler::get().cpu_info();
+ const unsigned int num_threads = NEScheduler::get().num_threads();
_pImpl->is_prepared = false;
_pImpl->are_weights_const = weights->are_values_constant();
diff --git a/src/cpu/operators/CpuPool2d.cpp b/src/cpu/operators/CpuPool2d.cpp
index d00efd191d..b72bde6978 100644
--- a/src/cpu/operators/CpuPool2d.cpp
+++ b/src/cpu/operators/CpuPool2d.cpp
@@ -69,8 +69,8 @@ void CpuPool2d::configure(ITensorInfo *src, ITensorInfo *dst, const PoolingLayer
if (run_optimised)
{
- const CPUInfo &ci = CPUInfo::get();
- const unsigned int num_threads = NEScheduler::num_threads();
+ const CPUInfo &ci = NEScheduler::get().cpu_info();
+ const unsigned int num_threads = NEScheduler::get().num_threads();
auto pooling_wrapper = std::make_unique<kernels::CpuPool2dAssemblyWrapperKernel>();
ARM_COMPUTE_ERROR_ON(pooling_wrapper == nullptr);
diff --git a/src/cpu/operators/CpuWinogradConv2d.cpp b/src/cpu/operators/CpuWinogradConv2d.cpp
index 1fb6d33a61..e4bcdc0b64 100644
--- a/src/cpu/operators/CpuWinogradConv2d.cpp
+++ b/src/cpu/operators/CpuWinogradConv2d.cpp
@@ -103,7 +103,7 @@ bool get_winograd_kernel_implementation(const ITensorInfo
Tensor4DShape in_shape{internal_get_shape(src)};
Tensor4DShape out_shape{internal_get_shape(dst)};
Tensor4DShape kernel_shape{internal_get_shape(weights)};
- uint32_t nthreads = NEScheduler::num_threads();
+ uint32_t nthreads = NEScheduler::get().num_threads();
// Get configuration arguments for Winograd
winograd_cfg.output_rows = 0;
winograd_cfg.output_cols = 0;
@@ -183,7 +183,7 @@ void CpuWinogradConv2d::configure(const ITensorInfo *src,
ARM_COMPUTE_LOG_PARAMS(src, weights, biases, dst, conv_info, act_info, enable_fast_math);
ARM_COMPUTE_UNUSED(biases);
const DataType data_type = src->data_type();
- uint32_t nthreads = NEScheduler::num_threads();
+ uint32_t nthreads = NEScheduler::get().num_threads();
_data_layout = src->data_layout();
const Tensor4DShape kernel_shape{internal_get_shape(weights)};
@@ -361,7 +361,7 @@ void CpuWinogradConv2d::run(ITensorPack &tensors)
auto output = tensors.get_tensor(ACL_DST);
Window win;
- const uint32_t nthreads = NEScheduler::num_threads();
+ const uint32_t nthreads = NEScheduler::get().num_threads();
// The Winograd transform implementation does fine-grain threading inside the transforms. Just pass thread_id and nthreads.
win.set(Window::DimX, Window::Dimension(0, nthreads, 1));
diff --git a/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp b/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp
index 7f851aa755..611bc76463 100644
--- a/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp
+++ b/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp
@@ -579,8 +579,9 @@ void Fallback<TypeInput, TypeOutput, OutputStage>::prepare(ITensorPack &tensors)
CpuAuxTensorHandler pretranspose(offset_int_vec(Pretranspose), _pretranspose_info, tensors, false);
ARM_COMPUTE_ERROR_ON(pretranspose.get()->buffer() == nullptr);
- run_parallel_pretranspose_B_array<TypeInput, TypeOutput>(
- _gemm_kernel_asm.get(), pretranspose.get(), in1_ptr, ldb, multi_stride_b, NEScheduler::num_threads());
+ run_parallel_pretranspose_B_array<TypeInput, TypeOutput>(_gemm_kernel_asm.get(), pretranspose.get(),
+ in1_ptr, ldb, multi_stride_b,
+ NEScheduler::get().num_threads());
b->mark_as_unused();
// Note that we don't need to mark b_to_use as unused, as if it's been assigned to pre_pretransposed_b, its memory will be auto-managed by the handler
@@ -690,8 +691,9 @@ void Fallback<TypeInput, TypeOutput, OutputStage>::run(ITensorPack &tensors)
}
else
{
- run_parallel_pretranspose_B_array<TypeInput, TypeOutput>(
- _gemm_kernel_asm.get(), pretranspose.get(), b_ptr, ldb, multi_stride_b, NEScheduler::num_threads());
+ run_parallel_pretranspose_B_array<TypeInput, TypeOutput>(_gemm_kernel_asm.get(), pretranspose.get(),
+ b_ptr, ldb, multi_stride_b,
+ NEScheduler::get().num_threads());
}
}
}
@@ -705,7 +707,7 @@ void Fallback<TypeInput, TypeOutput, OutputStage>::run(ITensorPack &tensors)
_gemm_kernel_asm->set_working_space(reinterpret_cast<void *>(workspace.get()->buffer()));
const unsigned int split_dim = scheduling_hint.split_dimension();
const unsigned int window_size = _gemm_kernel_asm->get_window_size().total_size();
- unsigned int num_threads = NEScheduler::num_threads();
+ unsigned int num_threads = NEScheduler::get().num_threads();
if (window_size < num_threads)
{
num_threads = window_size;
@@ -754,8 +756,8 @@ void create_arm_gemm(std::unique_ptr<CpuGemmAssemblyDispatch::IFallback> &arm_ge
const AsmGemmInfo &info)
{
Params p = extract_parameters(a, b, d, info);
- const CPUInfo &ci = CPUInfo::get();
- unsigned int num_threads = NEScheduler::num_threads();
+ const CPUInfo &ci = NEScheduler::get().cpu_info();
+ unsigned int num_threads = NEScheduler::get().num_threads();
arm_gemm::GemmConfig cfg;
cfg.weight_format = assembly_utils::map_to_arm_gemm_weight_format(info.weight_format);
@@ -779,8 +781,8 @@ void create_arm_gemm_quant(std::unique_ptr<CpuGemmAssemblyDispatch::IFallback> &
{
ARM_COMPUTE_UNUSED(activation);
Params p = extract_parameters(a, b, d, info);
- const CPUInfo &ci = CPUInfo::get();
- const unsigned int num_threads = NEScheduler::num_threads();
+ const CPUInfo &ci = NEScheduler::get().cpu_info();
+ const unsigned int num_threads = NEScheduler::get().num_threads();
arm_gemm::GemmConfig cfg;
cfg.weight_format = assembly_utils::map_to_arm_gemm_weight_format(info.weight_format);
@@ -834,8 +836,8 @@ Status CpuGemmAssemblyDispatch::has_opt_impl(arm_compute::WeightFormat &expected
ARM_COMPUTE_UNUSED(c);
arm_gemm::Activation act = assembly_utils::map_to_arm_gemm_activation(info.activation_info);
Params p = extract_parameters(a, b, d, info);
- const CPUInfo &ci = CPUInfo::get();
- unsigned int num_threads = NEScheduler::num_threads();
+ const CPUInfo &ci = NEScheduler::get().cpu_info();
+ unsigned int num_threads = NEScheduler::get().num_threads();
arm_gemm::GemmConfig cfg;
cfg.weight_format = assembly_utils::map_to_arm_gemm_weight_format(info.weight_format);
arm_gemm::WeightFormat arm_gemm_expected_wf = assembly_utils::map_to_arm_gemm_weight_format(expected_weight_format);
diff --git a/src/runtime/Scheduler.cpp b/src/runtime/Scheduler.cpp
index 6d961f29a5..3f1e96968a 100644
--- a/src/runtime/Scheduler.cpp
+++ b/src/runtime/Scheduler.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (c) 2017-2020, 2023 Arm Limited.
+ * Copyright (c) 2017-2020, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,18 +40,14 @@ using namespace arm_compute;
#if !ARM_COMPUTE_CPP_SCHEDULER && ARM_COMPUTE_OPENMP_SCHEDULER
Scheduler::Type Scheduler::_scheduler_type = Scheduler::Type::OMP;
#elif ARM_COMPUTE_CPP_SCHEDULER && !ARM_COMPUTE_OPENMP_SCHEDULER
-Scheduler::Type Scheduler::_scheduler_type = Scheduler::Type::CPP;
+Scheduler::Type Scheduler::_scheduler_type = Scheduler::Type::CPP;
#elif ARM_COMPUTE_CPP_SCHEDULER && ARM_COMPUTE_OPENMP_SCHEDULER
Scheduler::Type Scheduler::_scheduler_type = Scheduler::Type::CPP;
#else /* ARM_COMPUTE_*_SCHEDULER */
Scheduler::Type Scheduler::_scheduler_type = Scheduler::Type::ST;
#endif /* ARM_COMPUTE_*_SCHEDULER */
-#ifndef ARM_COMPUTE_THREAD_LOCAL_SCHEDULER
std::shared_ptr<IScheduler> Scheduler::_custom_scheduler = nullptr;
-#else // ARM_COMPUTE_THREAD_LOCAL_SCHEDULER
-std::shared_ptr<IScheduler> thread_local Scheduler::_custom_scheduler = nullptr;
-#endif // ARM_COMPUTE_THREAD_LOCAL_SCHEDULER
namespace
{
@@ -78,30 +74,6 @@ void Scheduler::set(Type t)
_scheduler_type = t;
}
-bool Scheduler::is_set()
-{
- if (_scheduler_type == Type::CUSTOM)
- {
- return _custom_scheduler != nullptr;
- }
- else
- {
- return !_schedulers.empty();
- }
-}
-
-unsigned int Scheduler::num_threads()
-{
- if (Scheduler::is_set())
- {
- return Scheduler::get().num_threads();
- }
- else
- {
- return CPUInfo::get().get_cpu_num();
- }
-}
-
bool Scheduler::is_available(Type t)
{
if (t == Type::CUSTOM)