aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOmar Al Khatib <omar.alkhatib@arm.com>2024-05-09 16:06:23 +0100
committerOmar Al Khatib <omar.alkhatib@arm.com>2024-05-17 14:20:06 +0000
commitf5053f782daa942126bd61ac1bcfc0af627b7b31 (patch)
treeeb96c568d3c95021860e06f06271345f68969260
parent8710385a9feb050f1b5a422ed57df691e8ba078f (diff)
downloadComputeLibrary-f5053f782daa942126bd61ac1bcfc0af627b7b31.tar.gz
Update logic in the OpenMP scheduler to exclude LITTLE cores
On systems with BIG/MID/LITTLE cores, we need to exclude the LITTLE cores. This is make changes to CPUInfo to detect number of LITTLE cores and set the num_threads to TOTAL_CORES-NUM_LITTLE cores Resolves [COMPMID-7014] Signed-off-by: Omar Al Khatib <omar.alkhatib@arm.com> Change-Id: I3e1772e5b64d1c45304860be43233b7e5dd8dba1 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11565 Reviewed-by: Pablo Marquez Tello <pablo.tello@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/core/CPP/CPPTypes.h12
-rw-r--r--arm_compute/runtime/OMP/OMPScheduler.h10
-rw-r--r--docs/user_guide/release_version_and_change_log.dox1
-rw-r--r--src/common/cpuinfo/CpuInfo.cpp100
-rw-r--r--src/common/cpuinfo/CpuInfo.h10
-rw-r--r--src/core/CPP/CPPTypes.cpp17
-rw-r--r--src/runtime/OMP/OMPScheduler.cpp28
-rw-r--r--tests/validation/NEON/UNIT/RuntimeContext.cpp20
8 files changed, 185 insertions, 13 deletions
diff --git a/arm_compute/core/CPP/CPPTypes.h b/arm_compute/core/CPP/CPPTypes.h
index 139d630fd7..e5322bdcb1 100644
--- a/arm_compute/core/CPP/CPPTypes.h
+++ b/arm_compute/core/CPP/CPPTypes.h
@@ -170,6 +170,18 @@ public:
* @return Number of CPUs
*/
unsigned int get_cpu_num() const;
+ /** Return the maximum number of CPUs present excluding the little cores
+ * in case of an Android device
+ *
+ * @return Number of CPUs excluding little
+ */
+ unsigned int get_cpu_num_excluding_little() const;
+ /** Return whether the device has little, medium and big CPUs in case
+ * of an Android device, returns false otherwise
+ *
+ * @return Whether the device has little, medium and big CPUs
+ */
+ bool cpu_has_little_mid_big() const;
/** Return the vector length in bytes for sme2
*
diff --git a/arm_compute/runtime/OMP/OMPScheduler.h b/arm_compute/runtime/OMP/OMPScheduler.h
index b522b403a9..c718e74359 100644
--- a/arm_compute/runtime/OMP/OMPScheduler.h
+++ b/arm_compute/runtime/OMP/OMPScheduler.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_OMPSCHEDULER_H
-#define ARM_COMPUTE_OMPSCHEDULER_H
+#ifndef ACL_ARM_COMPUTE_RUNTIME_OMP_OMPSCHEDULER_H
+#define ACL_ARM_COMPUTE_RUNTIME_OMP_OMPSCHEDULER_H
#include "arm_compute/runtime/IScheduler.h"
@@ -79,6 +79,8 @@ protected:
private:
unsigned int _num_threads;
+ bool _has_lmb;
+ unsigned int _nonlittle_num_cpus;
};
} // namespace arm_compute
-#endif /* ARM_COMPUTE_OMPSCHEDULER_H */
+#endif // ACL_ARM_COMPUTE_RUNTIME_OMP_OMPSCHEDULER_H
diff --git a/docs/user_guide/release_version_and_change_log.dox b/docs/user_guide/release_version_and_change_log.dox
index f493ff631e..a5f61d669d 100644
--- a/docs/user_guide/release_version_and_change_log.dox
+++ b/docs/user_guide/release_version_and_change_log.dox
@@ -44,6 +44,7 @@ If there is more than one release in a month then an extra sequential number is
v24.05 Public major release
- Add @ref CLScatter operator for FP32/16, S32/16/8, U32/16/8 data types
- Various fixes to enable FP16 kernels in armv8a multi_isa builds.
+ - Updated logic in the OpenMP scheduler to exclude LITTLE cores.
v24.04 Public major release
- Add Bfloat16 data type support for @ref NEMatMul.
diff --git a/src/common/cpuinfo/CpuInfo.cpp b/src/common/cpuinfo/CpuInfo.cpp
index 809ab3e2c3..92ba5223c9 100644
--- a/src/common/cpuinfo/CpuInfo.cpp
+++ b/src/common/cpuinfo/CpuInfo.cpp
@@ -29,6 +29,7 @@
#include "support/StringSupport.h"
#include "support/ToolchainSupport.h"
+#include <map>
#include <sstream>
#if !defined(BARE_METAL)
@@ -269,6 +270,87 @@ int get_max_cpus()
}
return max_cpus;
}
+
+const static std::map<std::string, std::vector<uint32_t>> known_configurations_with_little_cores = {
+ {"xiaomi14-pro", {379, 379, 923, 923, 923, 867, 867, 1024}}};
+
+const static std::map<std::string, uint32_t> number_of_cores_to_use = {{"xiaomi14-pro", 6}};
+
+#if defined(__ANDROID__)
+std::vector<uint32_t> get_cpu_capacities()
+{
+ std::vector<uint32_t> cpu_capacities;
+ for (int i = 0; i < get_max_cpus(); ++i)
+ {
+ std::stringstream str;
+ str << "/sys/devices/system/cpu/cpu" << i << "/cpu_capacity";
+ std::ifstream file(str.str(), std::ios::in);
+ if (file.is_open())
+ {
+ std::string line;
+ if (bool(getline(file, line)))
+ {
+ cpu_capacities.emplace_back(support::cpp11::stoul(line));
+ }
+ }
+ }
+
+ return cpu_capacities;
+}
+
+uint32_t not_little_num_cpus_internal()
+{
+ std::vector<uint32_t> cpus_all = get_cpu_capacities();
+ std::vector<uint32_t> cpus_not_little;
+
+ for (auto &it : known_configurations_with_little_cores)
+ {
+ if (it.second == cpus_all)
+ {
+ return number_of_cores_to_use.find(it.first)->second;
+ }
+ }
+
+ std::vector<uint32_t>::iterator result = std::max_element(cpus_all.begin(), cpus_all.end());
+ uint32_t max_capacity = *result;
+ uint32_t threshold = max_capacity / 2;
+ for (unsigned int i = 0; i < cpus_all.size(); i++)
+ {
+ if (!(cpus_all[i] < threshold))
+ {
+ cpus_not_little.emplace_back(cpus_all[i]);
+ }
+ }
+ return cpus_not_little.size();
+}
+
+bool has_little_mid_big_internal()
+{
+ std::vector<uint32_t> cpus_all = get_cpu_capacities();
+ std::vector<uint32_t> cpus_not_little;
+
+ for (auto &it : known_configurations_with_little_cores)
+ {
+ if (it.second == cpus_all)
+ {
+ return true;
+ }
+ }
+ std::sort(cpus_all.begin(), cpus_all.end());
+ std::vector<uint32_t>::iterator ip;
+ ip = std::unique(cpus_all.begin(), cpus_all.end());
+ cpus_all.resize(std::distance(cpus_all.begin(), ip));
+
+ if (cpus_all.size() == 3)
+ {
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+}
+#endif /* defined(__ANDROID__) */
#elif defined(__aarch64__) && \
defined(__APPLE__) /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */
/** Query features through sysctlbyname
@@ -402,6 +484,24 @@ uint32_t CpuInfo::num_cpus() const
return _cpus.size();
}
+uint32_t CpuInfo::not_little_num_cpus() const
+{
+#if defined(__ANDROID__)
+ return not_little_num_cpus_internal();
+#else /* defined(__ANDROID__) */
+ return num_cpus();
+#endif /* defined(__ANDROID__) */
+}
+
+bool CpuInfo::has_little_mid_big() const
+{
+#if defined(__ANDROID__)
+ return has_little_mid_big_internal();
+#else /* defined(__ANDROID__) */
+ return false;
+#endif /* defined(__ANDROID__) */
+}
+
uint32_t num_threads_hint()
{
unsigned int num_threads_hint = 1;
diff --git a/src/common/cpuinfo/CpuInfo.h b/src/common/cpuinfo/CpuInfo.h
index 953e4883c3..506830aa81 100644
--- a/src/common/cpuinfo/CpuInfo.h
+++ b/src/common/cpuinfo/CpuInfo.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021-2022 Arm Limited.
+ * Copyright (c) 2021-2022, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_COMMON_CPUINFO_H
-#define SRC_COMMON_CPUINFO_H
+#ifndef ACL_SRC_COMMON_CPUINFO_CPUINFO_H
+#define ACL_SRC_COMMON_CPUINFO_CPUINFO_H
#include "src/common/cpuinfo/CpuIsaInfo.h"
#include "src/common/cpuinfo/CpuModel.h"
@@ -120,6 +120,8 @@ public:
CpuModel cpu_model(uint32_t cpuid) const;
CpuModel cpu_model() const;
uint32_t num_cpus() const;
+ uint32_t not_little_num_cpus() const;
+ bool has_little_mid_big() const;
private:
CpuIsaInfo _isa{};
@@ -135,4 +137,4 @@ private:
uint32_t num_threads_hint();
} // namespace cpuinfo
} // namespace arm_compute
-#endif /* SRC_COMMON_CPUINFO_H */
+#endif // ACL_SRC_COMMON_CPUINFO_CPUINFO_H
diff --git a/src/core/CPP/CPPTypes.cpp b/src/core/CPP/CPPTypes.cpp
index f6761f27b0..ef0518ed3d 100644
--- a/src/core/CPP/CPPTypes.cpp
+++ b/src/core/CPP/CPPTypes.cpp
@@ -145,5 +145,20 @@ unsigned long CPUInfo::get_sme2_vector_length() const
return 0;
#endif // ARM_COMPUTE_ENABLE_SME2
}
-
+bool CPUInfo::cpu_has_little_mid_big() const
+{
+#if defined(__ANDROID__)
+ return _impl->info.has_little_mid_big();
+#else /* defined(__ANDROID__) */
+ return false;
+#endif /* defined(__ANDROID__) */
+}
+unsigned int CPUInfo::get_cpu_num_excluding_little() const
+{
+#if defined(__ANDROID__)
+ return _impl->info.not_little_num_cpus();
+#else /* defined(__ANDROID__) */
+ return get_cpu_num();
+#endif /* defined(__ANDROID__) */
+}
} // namespace arm_compute
diff --git a/src/runtime/OMP/OMPScheduler.cpp b/src/runtime/OMP/OMPScheduler.cpp
index d4d6193fce..2a5abb5f7a 100644
--- a/src/runtime/OMP/OMPScheduler.cpp
+++ b/src/runtime/OMP/OMPScheduler.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2023 Arm Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -32,10 +32,24 @@
namespace arm_compute
{
+#if !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \
+ (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__)
OMPScheduler::OMPScheduler() // NOLINT
- : _num_threads(omp_get_max_threads())
+ : _num_threads(cpu_info().get_cpu_num_excluding_little()),
+ _has_lmb(cpu_info().cpu_has_little_mid_big()),
+ _nonlittle_num_cpus(cpu_info().get_cpu_num_excluding_little())
{
}
+#else /* !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \
+ (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__)*/
+OMPScheduler::OMPScheduler() // NOLINT
+ : _num_threads(omp_get_max_threads()),
+ _has_lmb(cpu_info().cpu_has_little_mid_big()),
+ _nonlittle_num_cpus(cpu_info().get_cpu_num_excluding_little())
+{
+}
+#endif /* !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \
+ (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__)*/
unsigned int OMPScheduler::num_threads() const
{
@@ -45,7 +59,15 @@ unsigned int OMPScheduler::num_threads() const
void OMPScheduler::set_num_threads(unsigned int num_threads)
{
const unsigned int num_cores = omp_get_max_threads();
- _num_threads = (num_threads == 0) ? num_cores : num_threads;
+#if !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \
+ (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__)
+ const unsigned int adjusted_num_threads = (_has_lmb) ? _nonlittle_num_cpus : num_threads;
+ _num_threads = (num_threads == 0) ? num_cores : adjusted_num_threads;
+#else /* !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \
+ (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__)*/
+ _num_threads = (num_threads == 0) ? num_cores : num_threads;
+#endif /* !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \
+ (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__)*/
}
void OMPScheduler::schedule(ICPPKernel *kernel, const Hints &hints)
diff --git a/tests/validation/NEON/UNIT/RuntimeContext.cpp b/tests/validation/NEON/UNIT/RuntimeContext.cpp
index 819811943d..e126aded28 100644
--- a/tests/validation/NEON/UNIT/RuntimeContext.cpp
+++ b/tests/validation/NEON/UNIT/RuntimeContext.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -48,6 +48,24 @@ namespace validation
{
TEST_SUITE(NEON)
TEST_SUITE(UNIT)
+#if defined(ARM_COMPUTE_OPENMP_SCHEDULER) && !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \
+ (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__)
+TEST_CASE(CpuCapacity, framework::DatasetMode::ALL)
+{
+ CPUInfo& ci = arm_compute::Scheduler::get().cpu_info();
+ const uint32_t total_num_cpus = ci.get_cpu_num();
+ const uint32_t nonlittle_num_cpus = ci.get_cpu_num_excluding_little();
+ const bool has_lmb = ci.cpu_has_little_mid_big();
+ const uint32_t num_threads = arm_compute::Scheduler::get().num_threads();
+
+ if(has_lmb){
+ ARM_COMPUTE_EXPECT(total_num_cpus!=nonlittle_num_cpus , framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(num_threads==nonlittle_num_cpus , framework::LogLevel::ERRORS);
+ }
+}
+#endif /* defined(ARM_COMPUTE_OPENMP_SCHEDULER) && !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \
+ (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__)*/
+
TEST_SUITE(RuntimeContext)
TEST_CASE(Scheduler, framework::DatasetMode::ALL)