diff options
Diffstat (limited to 'src/common/cpuinfo/CpuInfo.cpp')
-rw-r--r-- | src/common/cpuinfo/CpuInfo.cpp | 164 |
1 files changed, 109 insertions, 55 deletions
diff --git a/src/common/cpuinfo/CpuInfo.cpp b/src/common/cpuinfo/CpuInfo.cpp index 436e7ea803..93f51e599a 100644 --- a/src/common/cpuinfo/CpuInfo.cpp +++ b/src/common/cpuinfo/CpuInfo.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,7 +25,7 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/Log.h" -#include "src/common/cpuinfo/target/CpuInfoSveUtils.h" + #include "support/StringSupport.h" #include "support/ToolchainSupport.h" @@ -35,27 +35,35 @@ #include <algorithm> #include <cstring> #include <fstream> +#if !defined(_WIN64) #include <regex.h> /* C++ std::regex takes up a lot of space in the standalone builds */ #include <sched.h> +#endif /* !defined(_WIN64) */ + #include <thread> #include <unordered_map> #endif /* !defined(BARE_METAL) */ -#if !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) +#if !defined(_WIN64) +#if !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) #include <asm/hwcap.h> /* Get HWCAP bits from asm/hwcap.h */ #include <sys/auxv.h> -#endif /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */ - -#define ARM_COMPUTE_CPU_FEATURE_HWCAP_CPUID (1 << 11) -#define ARM_COMPUTE_GET_FEATURE_REG(var, freg) __asm __volatile("MRS %0, " #freg \ - : "=r"(var)) +#elif defined(__APPLE__) && defined(__aarch64__) +#include <sys/sysctl.h> +#include <sys/types.h> +#endif /* defined(__APPLE__) && defined(__aarch64__)) */ +#endif /* !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) */ + +#define ARM_COMPUTE_CPU_FEATURE_HWCAP_CPUID (1 << 11) +#define ARM_COMPUTE_GET_FEATURE_REG(var, freg) __asm __volatile("MRS %0, " #freg : "=r"(var)) namespace arm_compute { namespace cpuinfo { namespace { -#if !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) +#if !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \ + (defined(__arm__) || defined(__aarch64__)) /** Extract MIDR using CPUID information that are exposed to user-space * * @param[in] max_num_cpus Maximum number of possible CPUs @@ -65,15 +73,15 @@ namespace std::vector<uint32_t> midr_from_cpuid(uint32_t max_num_cpus) { std::vector<uint32_t> cpus; - for(unsigned int i = 0; i < max_num_cpus; ++i) + for (unsigned int i = 0; i < max_num_cpus; ++i) { std::stringstream str; str << "/sys/devices/system/cpu/cpu" << i << "/regs/identification/midr_el1"; std::ifstream file(str.str(), std::ios::in); - if(file.is_open()) + if (file.is_open()) { std::string line; - if(bool(getline(file, line))) + if (bool(getline(file, line))) { cpus.emplace_back(support::cpp11::stoul(line, nullptr, support::cpp11::NumericBase::BASE_16)); } @@ -115,34 +123,35 @@ std::vector<uint32_t> midr_from_proc_cpuinfo(int max_num_cpus) ARM_COMPUTE_ERROR_ON_MSG(ret_status != 0, "Regex compilation failed."); std::ifstream file("/proc/cpuinfo", std::ios::in); - if(file.is_open()) + if (file.is_open()) { std::string line; int midr = 0; int curcpu = -1; - while(bool(getline(file, line))) + while (bool(getline(file, line))) { std::array<regmatch_t, 2> match; ret_status = regexec(&proc_regex, line.c_str(), 2, match.data(), 0); - if(ret_status == 0) + if (ret_status == 0) { std::string id = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so)); int newcpu = support::cpp11::stoi(id, nullptr); - if(curcpu >= 0 && midr == 0) + if (curcpu >= 0 && midr == 0) { // Matched a new CPU ID without any description of the previous one - looks like old format. return {}; } - if(curcpu >= 0 && curcpu < max_num_cpus) + if (curcpu >= 0 && curcpu < max_num_cpus) { cpus.emplace_back(midr); } else { - ARM_COMPUTE_LOG_INFO_MSG_CORE("Trying to populate a core id with id greater than the expected number of cores!"); + ARM_COMPUTE_LOG_INFO_MSG_CORE( + "Trying to populate a core id with id greater than the expected number of cores!"); } midr = 0; @@ -152,7 +161,7 @@ std::vector<uint32_t> midr_from_proc_cpuinfo(int max_num_cpus) } ret_status = regexec(&imp_regex, line.c_str(), 2, match.data(), 0); - if(ret_status == 0) + if (ret_status == 0) { std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so)); int impv = support::cpp11::stoi(subexp, nullptr, support::cpp11::NumericBase::BASE_16); @@ -162,7 +171,7 @@ std::vector<uint32_t> midr_from_proc_cpuinfo(int max_num_cpus) } ret_status = regexec(&var_regex, line.c_str(), 2, match.data(), 0); - if(ret_status == 0) + if (ret_status == 0) { std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so)); int varv = support::cpp11::stoi(subexp, nullptr, support::cpp11::NumericBase::BASE_16); @@ -172,7 +181,7 @@ std::vector<uint32_t> midr_from_proc_cpuinfo(int max_num_cpus) } ret_status = regexec(&part_regex, line.c_str(), 2, match.data(), 0); - if(ret_status == 0) + if (ret_status == 0) { std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so)); int partv = support::cpp11::stoi(subexp, nullptr, support::cpp11::NumericBase::BASE_16); @@ -182,7 +191,7 @@ std::vector<uint32_t> midr_from_proc_cpuinfo(int max_num_cpus) } ret_status = regexec(&rev_regex, line.c_str(), 2, match.data(), 0); - if(ret_status == 0) + if (ret_status == 0) { std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so)); int regv = support::cpp11::stoi(subexp, nullptr); @@ -193,13 +202,14 @@ std::vector<uint32_t> midr_from_proc_cpuinfo(int max_num_cpus) } } - if(curcpu >= 0 && curcpu < max_num_cpus) + if (curcpu >= 0 && curcpu < max_num_cpus) { cpus.emplace_back(midr); } else { - ARM_COMPUTE_LOG_INFO_MSG_CORE("Trying to populate a core id with id greater than the expected number of cores!"); + ARM_COMPUTE_LOG_INFO_MSG_CORE( + "Trying to populate a core id with id greater than the expected number of cores!"); } } @@ -224,11 +234,11 @@ int get_max_cpus() CPUspresent.open("/sys/devices/system/cpu/present", std::ios::in); bool success = false; - if(CPUspresent.is_open()) + if (CPUspresent.is_open()) { std::string line; - if(bool(getline(CPUspresent, line))) + if (bool(getline(CPUspresent, line))) { /* The content of this file is a list of ranges or single values, e.g. * 0-5, or 1-3,5,7 or similar. As we are interested in the @@ -237,9 +247,9 @@ int get_max_cpus() */ auto startfrom = line.begin(); - for(auto i = line.begin(); i < line.end(); ++i) + for (auto i = line.begin(); i < line.end(); ++i) { - if(*i == '-' || *i == ',') + if (*i == '-' || *i == ',') { startfrom = i + 1; } @@ -253,38 +263,64 @@ int get_max_cpus() } // Return std::thread::hardware_concurrency() as a fallback. - if(!success) + if (!success) { max_cpus = std::thread::hardware_concurrency(); } return max_cpus; } -#endif /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */ +#elif defined(__aarch64__) && \ + defined(__APPLE__) /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */ +/** Query features through sysctlbyname + * + * @return int value queried + */ +int get_hw_capability(const std::string &cap) +{ + int64_t result(0); + size_t size = sizeof(result); + sysctlbyname(cap.c_str(), &result, &size, NULL, 0); + return result; +} +#endif /* !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) */ + +#if defined(BARE_METAL) && defined(__aarch64__) +uint64_t get_sve_feature_reg() +{ + uint64_t svefr0 = 0; + __asm __volatile(".inst 0xd5380483 // mrs x3, ID_AA64ZFR0_EL1\n" + "MOV %0, X3" + : "=r"(svefr0) + : + : "x3"); + return svefr0; +} +#endif /* defined(BARE_METAL) && defined(__aarch64__) */ } // namespace -CpuInfo::CpuInfo(CpuIsaInfo isa, std::vector<CpuModel> cpus) - : _isa(std::move(isa)), _cpus(std::move(cpus)) +CpuInfo::CpuInfo(CpuIsaInfo isa, std::vector<CpuModel> cpus) : _isa(std::move(isa)), _cpus(std::move(cpus)) { } CpuInfo CpuInfo::build() { -#if !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) +#if !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \ + (defined(__arm__) || defined(__aarch64__)) const uint32_t hwcaps = getauxval(AT_HWCAP); const uint32_t hwcaps2 = getauxval(AT_HWCAP2); const uint32_t max_cpus = get_max_cpus(); // Populate midr values std::vector<uint32_t> cpus_midr; - if(hwcaps & ARM_COMPUTE_CPU_FEATURE_HWCAP_CPUID) + if (hwcaps & ARM_COMPUTE_CPU_FEATURE_HWCAP_CPUID) { cpus_midr = midr_from_cpuid(max_cpus); } - if(cpus_midr.empty()) + if (cpus_midr.empty()) { cpus_midr = midr_from_proc_cpuinfo(max_cpus); } - if(cpus_midr.empty()) + if (cpus_midr.empty()) { cpus_midr.resize(max_cpus, 0); } @@ -300,32 +336,49 @@ CpuInfo CpuInfo::build() CpuInfo info(isa, cpus_model); return info; -#elif(BARE_METAL) && defined(__aarch64__) /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */ +#elif (BARE_METAL) && \ + defined( \ + __aarch64__) /* !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) */ // Assume single CPU in bare metal mode. Just read the ID register and feature bits directly. - uint64_t isar0 = 0, isar1 = 0, pfr0 = 0, svefr0 = 0, midr = 0; + uint64_t isar0 = 0, isar1 = 0, pfr0 = 0, pfr1 = 0, svefr0 = 0, midr = 0; ARM_COMPUTE_GET_FEATURE_REG(isar0, ID_AA64ISAR0_EL1); ARM_COMPUTE_GET_FEATURE_REG(isar1, ID_AA64ISAR1_EL1); ARM_COMPUTE_GET_FEATURE_REG(pfr0, ID_AA64PFR0_EL1); + ARM_COMPUTE_GET_FEATURE_REG(pfr1, ID_AA64PFR1_EL1); ARM_COMPUTE_GET_FEATURE_REG(midr, MIDR_EL1); - if((pfr0 >> 32) & 0xf) + if ((pfr0 >> 32) & 0xf) { svefr0 = get_sve_feature_reg(); } - CpuIsaInfo isa = init_cpu_isa_from_regs(isar0, isar1, pfr0, svefr0, midr); + CpuIsaInfo isa = init_cpu_isa_from_regs(isar0, isar1, pfr0, pfr1, svefr0, midr); std::vector<CpuModel> cpus_model(1, midr_to_model(midr)); CpuInfo info(isa, cpus_model); return info; -#else /* #elif(BARE_METAL) && defined(__aarch64__) */ - CpuInfo info(CpuIsaInfo(), { CpuModel::GENERIC }); +#elif defined(__aarch64__) && defined(__APPLE__) /* #elif(BARE_METAL) && defined(__aarch64__) */ + int ncpus = get_hw_capability("hw.perflevel0.logicalcpu"); + CpuIsaInfo isainfo; + std::vector<CpuModel> cpus_model(ncpus); + isainfo.neon = get_hw_capability("hw.optional.neon"); + isainfo.fp16 = get_hw_capability("hw.optional.neon_fp16"); + isainfo.dot = get_hw_capability("hw.optional.arm.FEAT_DotProd"); + CpuInfo info(isainfo, cpus_model); + return info; +#elif defined(__aarch64__) && defined(_WIN64) /* #elif defined(__aarch64__) && defined(__APPLE__) */ + CpuIsaInfo isainfo; + isainfo.neon = true; + CpuInfo info(isainfo, {CpuModel::GENERIC}); return info; -#endif /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */ +#else /* #elif defined(__aarch64__) && defined(_WIN64) */ + CpuInfo info(CpuIsaInfo(), {CpuModel::GENERIC}); + return info; +#endif /* !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) */ } CpuModel CpuInfo::cpu_model(uint32_t cpuid) const { - if(cpuid < _cpus.size()) + if (cpuid < _cpus.size()) { return _cpus[cpuid]; } @@ -334,11 +387,12 @@ CpuModel CpuInfo::cpu_model(uint32_t cpuid) const CpuModel CpuInfo::cpu_model() const { -#if defined(BARE_METAL) || defined(__APPLE__) || (!defined(__arm__) && !defined(__aarch64__)) +#if defined(_WIN64) || defined(BARE_METAL) || defined(__APPLE__) || defined(__OpenBSD__) || \ + (!defined(__arm__) && !defined(__aarch64__)) return cpu_model(0); -#else /* defined(BARE_METAL) || defined(__APPLE__) || (!defined(__arm__) && !defined(__aarch64__)) */ +#else /* defined(BARE_METAL) || defined(__APPLE__) || defined(__OpenBSD__) || (!defined(__arm__) && !defined(__aarch64__)) */ return cpu_model(sched_getcpu()); -#endif /* defined(BARE_METAL) || defined(__APPLE__) || (!defined(__arm__) && !defined(__aarch64__)) */ +#endif /* defined(BARE_METAL) || defined(__APPLE__) || defined(__OpenBSD__) || (!defined(__arm__) && !defined(__aarch64__)) */ } uint32_t CpuInfo::num_cpus() const @@ -350,7 +404,7 @@ uint32_t num_threads_hint() { unsigned int num_threads_hint = 1; -#if !defined(BARE_METAL) +#if !defined(BARE_METAL) && !defined(_WIN64) && !defined(ARM_COMPUTE_DISABLE_THREADS_HINT) std::vector<std::string> cpus; cpus.reserve(64); @@ -363,13 +417,13 @@ uint32_t num_threads_hint() // Read cpuinfo and get occurrence of each core std::ifstream cpuinfo_file("/proc/cpuinfo", std::ios::in); - if(cpuinfo_file.is_open()) + if (cpuinfo_file.is_open()) { std::string line; - while(bool(getline(cpuinfo_file, line))) + while (bool(getline(cpuinfo_file, line))) { std::array<regmatch_t, 2> match; - if(regexec(&cpu_part_rgx, line.c_str(), 2, match.data(), 0) == 0) + if (regexec(&cpu_part_rgx, line.c_str(), 2, match.data(), 0) == 0) { cpus.emplace_back(line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so))); } @@ -382,13 +436,13 @@ uint32_t num_threads_hint() auto least_frequent_cpu_occurences = [](const std::vector<std::string> &cpus) -> uint32_t { std::unordered_map<std::string, uint32_t> cpus_freq; - for(const auto &cpu : cpus) + for (const auto &cpu : cpus) { cpus_freq[cpu]++; } uint32_t vmin = cpus.size() + 1; - for(const auto &cpu_freq : cpus_freq) + for (const auto &cpu_freq : cpus_freq) { vmin = std::min(vmin, cpu_freq.second); } @@ -397,9 +451,9 @@ uint32_t num_threads_hint() // Set thread hint num_threads_hint = cpus.empty() ? std::thread::hardware_concurrency() : least_frequent_cpu_occurences(cpus); -#endif /* !defined(BARE_METAL) */ +#endif /* !defined(BARE_METAL) && !defined(_WIN64) && !defined(ARM_COMPUTE_DISABLE_THREADS_HINT) */ return num_threads_hint; } } // namespace cpuinfo -} // namespace arm_compute
\ No newline at end of file +} // namespace arm_compute |