diff options
author | Omar Al Khatib <omar.alkhatib@arm.com> | 2024-05-09 16:06:23 +0100 |
---|---|---|
committer | Omar Al Khatib <omar.alkhatib@arm.com> | 2024-05-17 14:20:06 +0000 |
commit | f5053f782daa942126bd61ac1bcfc0af627b7b31 (patch) | |
tree | eb96c568d3c95021860e06f06271345f68969260 | |
parent | 8710385a9feb050f1b5a422ed57df691e8ba078f (diff) | |
download | ComputeLibrary-f5053f782daa942126bd61ac1bcfc0af627b7b31.tar.gz |
Update logic in the OpenMP scheduler to exclude LITTLE cores
On systems with BIG/MID/LITTLE cores, we need to exclude the LITTLE cores.
This is make changes to CPUInfo to detect number of LITTLE cores and set the num_threads to TOTAL_CORES-NUM_LITTLE cores
Resolves [COMPMID-7014]
Signed-off-by: Omar Al Khatib <omar.alkhatib@arm.com>
Change-Id: I3e1772e5b64d1c45304860be43233b7e5dd8dba1
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11565
Reviewed-by: Pablo Marquez Tello <pablo.tello@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r-- | arm_compute/core/CPP/CPPTypes.h | 12 | ||||
-rw-r--r-- | arm_compute/runtime/OMP/OMPScheduler.h | 10 | ||||
-rw-r--r-- | docs/user_guide/release_version_and_change_log.dox | 1 | ||||
-rw-r--r-- | src/common/cpuinfo/CpuInfo.cpp | 100 | ||||
-rw-r--r-- | src/common/cpuinfo/CpuInfo.h | 10 | ||||
-rw-r--r-- | src/core/CPP/CPPTypes.cpp | 17 | ||||
-rw-r--r-- | src/runtime/OMP/OMPScheduler.cpp | 28 | ||||
-rw-r--r-- | tests/validation/NEON/UNIT/RuntimeContext.cpp | 20 |
8 files changed, 185 insertions, 13 deletions
diff --git a/arm_compute/core/CPP/CPPTypes.h b/arm_compute/core/CPP/CPPTypes.h index 139d630fd7..e5322bdcb1 100644 --- a/arm_compute/core/CPP/CPPTypes.h +++ b/arm_compute/core/CPP/CPPTypes.h @@ -170,6 +170,18 @@ public: * @return Number of CPUs */ unsigned int get_cpu_num() const; + /** Return the maximum number of CPUs present excluding the little cores + * in case of an Android device + * + * @return Number of CPUs excluding little + */ + unsigned int get_cpu_num_excluding_little() const; + /** Return whether the device has little, medium and big CPUs in case + * of an Android device, returns false otherwise + * + * @return Whether the device has little, medium and big CPUs + */ + bool cpu_has_little_mid_big() const; /** Return the vector length in bytes for sme2 * diff --git a/arm_compute/runtime/OMP/OMPScheduler.h b/arm_compute/runtime/OMP/OMPScheduler.h index b522b403a9..c718e74359 100644 --- a/arm_compute/runtime/OMP/OMPScheduler.h +++ b/arm_compute/runtime/OMP/OMPScheduler.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_OMPSCHEDULER_H -#define ARM_COMPUTE_OMPSCHEDULER_H +#ifndef ACL_ARM_COMPUTE_RUNTIME_OMP_OMPSCHEDULER_H +#define ACL_ARM_COMPUTE_RUNTIME_OMP_OMPSCHEDULER_H #include "arm_compute/runtime/IScheduler.h" @@ -79,6 +79,8 @@ protected: private: unsigned int _num_threads; + bool _has_lmb; + unsigned int _nonlittle_num_cpus; }; } // namespace arm_compute -#endif /* ARM_COMPUTE_OMPSCHEDULER_H */ +#endif // ACL_ARM_COMPUTE_RUNTIME_OMP_OMPSCHEDULER_H diff --git a/docs/user_guide/release_version_and_change_log.dox b/docs/user_guide/release_version_and_change_log.dox index f493ff631e..a5f61d669d 100644 --- a/docs/user_guide/release_version_and_change_log.dox +++ b/docs/user_guide/release_version_and_change_log.dox @@ -44,6 +44,7 @@ If there is more than one release in a month then an extra sequential number is v24.05 Public major release - Add @ref CLScatter operator for FP32/16, S32/16/8, U32/16/8 data types - Various fixes to enable FP16 kernels in armv8a multi_isa builds. + - Updated logic in the OpenMP scheduler to exclude LITTLE cores. v24.04 Public major release - Add Bfloat16 data type support for @ref NEMatMul. diff --git a/src/common/cpuinfo/CpuInfo.cpp b/src/common/cpuinfo/CpuInfo.cpp index 809ab3e2c3..92ba5223c9 100644 --- a/src/common/cpuinfo/CpuInfo.cpp +++ b/src/common/cpuinfo/CpuInfo.cpp @@ -29,6 +29,7 @@ #include "support/StringSupport.h" #include "support/ToolchainSupport.h" +#include <map> #include <sstream> #if !defined(BARE_METAL) @@ -269,6 +270,87 @@ int get_max_cpus() } return max_cpus; } + +const static std::map<std::string, std::vector<uint32_t>> known_configurations_with_little_cores = { + {"xiaomi14-pro", {379, 379, 923, 923, 923, 867, 867, 1024}}}; + +const static std::map<std::string, uint32_t> number_of_cores_to_use = {{"xiaomi14-pro", 6}}; + +#if defined(__ANDROID__) +std::vector<uint32_t> get_cpu_capacities() +{ + std::vector<uint32_t> cpu_capacities; + for (int i = 0; i < get_max_cpus(); ++i) + { + std::stringstream str; + str << "/sys/devices/system/cpu/cpu" << i << "/cpu_capacity"; + std::ifstream file(str.str(), std::ios::in); + if (file.is_open()) + { + std::string line; + if (bool(getline(file, line))) + { + cpu_capacities.emplace_back(support::cpp11::stoul(line)); + } + } + } + + return cpu_capacities; +} + +uint32_t not_little_num_cpus_internal() +{ + std::vector<uint32_t> cpus_all = get_cpu_capacities(); + std::vector<uint32_t> cpus_not_little; + + for (auto &it : known_configurations_with_little_cores) + { + if (it.second == cpus_all) + { + return number_of_cores_to_use.find(it.first)->second; + } + } + + std::vector<uint32_t>::iterator result = std::max_element(cpus_all.begin(), cpus_all.end()); + uint32_t max_capacity = *result; + uint32_t threshold = max_capacity / 2; + for (unsigned int i = 0; i < cpus_all.size(); i++) + { + if (!(cpus_all[i] < threshold)) + { + cpus_not_little.emplace_back(cpus_all[i]); + } + } + return cpus_not_little.size(); +} + +bool has_little_mid_big_internal() +{ + std::vector<uint32_t> cpus_all = get_cpu_capacities(); + std::vector<uint32_t> cpus_not_little; + + for (auto &it : known_configurations_with_little_cores) + { + if (it.second == cpus_all) + { + return true; + } + } + std::sort(cpus_all.begin(), cpus_all.end()); + std::vector<uint32_t>::iterator ip; + ip = std::unique(cpus_all.begin(), cpus_all.end()); + cpus_all.resize(std::distance(cpus_all.begin(), ip)); + + if (cpus_all.size() == 3) + { + return true; + } + else + { + return false; + } +} +#endif /* defined(__ANDROID__) */ #elif defined(__aarch64__) && \ defined(__APPLE__) /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */ /** Query features through sysctlbyname @@ -402,6 +484,24 @@ uint32_t CpuInfo::num_cpus() const return _cpus.size(); } +uint32_t CpuInfo::not_little_num_cpus() const +{ +#if defined(__ANDROID__) + return not_little_num_cpus_internal(); +#else /* defined(__ANDROID__) */ + return num_cpus(); +#endif /* defined(__ANDROID__) */ +} + +bool CpuInfo::has_little_mid_big() const +{ +#if defined(__ANDROID__) + return has_little_mid_big_internal(); +#else /* defined(__ANDROID__) */ + return false; +#endif /* defined(__ANDROID__) */ +} + uint32_t num_threads_hint() { unsigned int num_threads_hint = 1; diff --git a/src/common/cpuinfo/CpuInfo.h b/src/common/cpuinfo/CpuInfo.h index 953e4883c3..506830aa81 100644 --- a/src/common/cpuinfo/CpuInfo.h +++ b/src/common/cpuinfo/CpuInfo.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022 Arm Limited. + * Copyright (c) 2021-2022, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_COMMON_CPUINFO_H -#define SRC_COMMON_CPUINFO_H +#ifndef ACL_SRC_COMMON_CPUINFO_CPUINFO_H +#define ACL_SRC_COMMON_CPUINFO_CPUINFO_H #include "src/common/cpuinfo/CpuIsaInfo.h" #include "src/common/cpuinfo/CpuModel.h" @@ -120,6 +120,8 @@ public: CpuModel cpu_model(uint32_t cpuid) const; CpuModel cpu_model() const; uint32_t num_cpus() const; + uint32_t not_little_num_cpus() const; + bool has_little_mid_big() const; private: CpuIsaInfo _isa{}; @@ -135,4 +137,4 @@ private: uint32_t num_threads_hint(); } // namespace cpuinfo } // namespace arm_compute -#endif /* SRC_COMMON_CPUINFO_H */ +#endif // ACL_SRC_COMMON_CPUINFO_CPUINFO_H diff --git a/src/core/CPP/CPPTypes.cpp b/src/core/CPP/CPPTypes.cpp index f6761f27b0..ef0518ed3d 100644 --- a/src/core/CPP/CPPTypes.cpp +++ b/src/core/CPP/CPPTypes.cpp @@ -145,5 +145,20 @@ unsigned long CPUInfo::get_sme2_vector_length() const return 0; #endif // ARM_COMPUTE_ENABLE_SME2 } - +bool CPUInfo::cpu_has_little_mid_big() const +{ +#if defined(__ANDROID__) + return _impl->info.has_little_mid_big(); +#else /* defined(__ANDROID__) */ + return false; +#endif /* defined(__ANDROID__) */ +} +unsigned int CPUInfo::get_cpu_num_excluding_little() const +{ +#if defined(__ANDROID__) + return _impl->info.not_little_num_cpus(); +#else /* defined(__ANDROID__) */ + return get_cpu_num(); +#endif /* defined(__ANDROID__) */ +} } // namespace arm_compute diff --git a/src/runtime/OMP/OMPScheduler.cpp b/src/runtime/OMP/OMPScheduler.cpp index d4d6193fce..2a5abb5f7a 100644 --- a/src/runtime/OMP/OMPScheduler.cpp +++ b/src/runtime/OMP/OMPScheduler.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2023 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -32,10 +32,24 @@ namespace arm_compute { +#if !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \ + (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__) OMPScheduler::OMPScheduler() // NOLINT - : _num_threads(omp_get_max_threads()) + : _num_threads(cpu_info().get_cpu_num_excluding_little()), + _has_lmb(cpu_info().cpu_has_little_mid_big()), + _nonlittle_num_cpus(cpu_info().get_cpu_num_excluding_little()) { } +#else /* !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \ + (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__)*/ +OMPScheduler::OMPScheduler() // NOLINT + : _num_threads(omp_get_max_threads()), + _has_lmb(cpu_info().cpu_has_little_mid_big()), + _nonlittle_num_cpus(cpu_info().get_cpu_num_excluding_little()) +{ +} +#endif /* !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \ + (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__)*/ unsigned int OMPScheduler::num_threads() const { @@ -45,7 +59,15 @@ unsigned int OMPScheduler::num_threads() const void OMPScheduler::set_num_threads(unsigned int num_threads) { const unsigned int num_cores = omp_get_max_threads(); - _num_threads = (num_threads == 0) ? num_cores : num_threads; +#if !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \ + (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__) + const unsigned int adjusted_num_threads = (_has_lmb) ? _nonlittle_num_cpus : num_threads; + _num_threads = (num_threads == 0) ? num_cores : adjusted_num_threads; +#else /* !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \ + (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__)*/ + _num_threads = (num_threads == 0) ? num_cores : num_threads; +#endif /* !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \ + (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__)*/ } void OMPScheduler::schedule(ICPPKernel *kernel, const Hints &hints) diff --git a/tests/validation/NEON/UNIT/RuntimeContext.cpp b/tests/validation/NEON/UNIT/RuntimeContext.cpp index 819811943d..e126aded28 100644 --- a/tests/validation/NEON/UNIT/RuntimeContext.cpp +++ b/tests/validation/NEON/UNIT/RuntimeContext.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -48,6 +48,24 @@ namespace validation { TEST_SUITE(NEON) TEST_SUITE(UNIT) +#if defined(ARM_COMPUTE_OPENMP_SCHEDULER) && !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \ + (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__) +TEST_CASE(CpuCapacity, framework::DatasetMode::ALL) +{ + CPUInfo& ci = arm_compute::Scheduler::get().cpu_info(); + const uint32_t total_num_cpus = ci.get_cpu_num(); + const uint32_t nonlittle_num_cpus = ci.get_cpu_num_excluding_little(); + const bool has_lmb = ci.cpu_has_little_mid_big(); + const uint32_t num_threads = arm_compute::Scheduler::get().num_threads(); + + if(has_lmb){ + ARM_COMPUTE_EXPECT(total_num_cpus!=nonlittle_num_cpus , framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(num_threads==nonlittle_num_cpus , framework::LogLevel::ERRORS); + } +} +#endif /* defined(ARM_COMPUTE_OPENMP_SCHEDULER) && !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \ + (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__)*/ + TEST_SUITE(RuntimeContext) TEST_CASE(Scheduler, framework::DatasetMode::ALL) |