diff options
Diffstat (limited to 'src/common/cpuinfo')
-rw-r--r-- | src/common/cpuinfo/CpuInfo.cpp | 266 | ||||
-rw-r--r-- | src/common/cpuinfo/CpuInfo.h | 43 | ||||
-rw-r--r-- | src/common/cpuinfo/CpuIsaInfo.cpp | 159 | ||||
-rw-r--r-- | src/common/cpuinfo/CpuIsaInfo.h | 28 | ||||
-rw-r--r-- | src/common/cpuinfo/CpuModel.cpp | 83 | ||||
-rw-r--r-- | src/common/cpuinfo/CpuModel.h | 16 | ||||
-rw-r--r-- | src/common/cpuinfo/target/CpuInfoSveUtils.cpp | 40 | ||||
-rw-r--r-- | src/common/cpuinfo/target/CpuInfoSveUtils.h | 40 |
8 files changed, 379 insertions, 296 deletions
diff --git a/src/common/cpuinfo/CpuInfo.cpp b/src/common/cpuinfo/CpuInfo.cpp index 436e7ea803..92ba5223c9 100644 --- a/src/common/cpuinfo/CpuInfo.cpp +++ b/src/common/cpuinfo/CpuInfo.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,37 +25,46 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/Log.h" -#include "src/common/cpuinfo/target/CpuInfoSveUtils.h" + #include "support/StringSupport.h" #include "support/ToolchainSupport.h" +#include <map> #include <sstream> #if !defined(BARE_METAL) #include <algorithm> #include <cstring> #include <fstream> +#if !defined(_WIN64) #include <regex.h> /* C++ std::regex takes up a lot of space in the standalone builds */ #include <sched.h> +#endif /* !defined(_WIN64) */ + #include <thread> #include <unordered_map> #endif /* !defined(BARE_METAL) */ -#if !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) +#if !defined(_WIN64) +#if !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) #include <asm/hwcap.h> /* Get HWCAP bits from asm/hwcap.h */ #include <sys/auxv.h> -#endif /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */ - -#define ARM_COMPUTE_CPU_FEATURE_HWCAP_CPUID (1 << 11) -#define ARM_COMPUTE_GET_FEATURE_REG(var, freg) __asm __volatile("MRS %0, " #freg \ - : "=r"(var)) +#elif defined(__APPLE__) && defined(__aarch64__) +#include <sys/sysctl.h> +#include <sys/types.h> +#endif /* defined(__APPLE__) && defined(__aarch64__)) */ +#endif /* !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) */ + +#define ARM_COMPUTE_CPU_FEATURE_HWCAP_CPUID (1 << 11) +#define ARM_COMPUTE_GET_FEATURE_REG(var, freg) __asm __volatile("MRS %0, " #freg : "=r"(var)) namespace arm_compute { namespace cpuinfo { namespace { -#if !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) +#if !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \ + (defined(__arm__) || defined(__aarch64__)) /** Extract MIDR using CPUID information that are exposed to user-space * * @param[in] max_num_cpus Maximum number of possible CPUs @@ -65,15 +74,15 @@ namespace std::vector<uint32_t> midr_from_cpuid(uint32_t max_num_cpus) { std::vector<uint32_t> cpus; - for(unsigned int i = 0; i < max_num_cpus; ++i) + for (unsigned int i = 0; i < max_num_cpus; ++i) { std::stringstream str; str << "/sys/devices/system/cpu/cpu" << i << "/regs/identification/midr_el1"; std::ifstream file(str.str(), std::ios::in); - if(file.is_open()) + if (file.is_open()) { std::string line; - if(bool(getline(file, line))) + if (bool(getline(file, line))) { cpus.emplace_back(support::cpp11::stoul(line, nullptr, support::cpp11::NumericBase::BASE_16)); } @@ -115,34 +124,35 @@ std::vector<uint32_t> midr_from_proc_cpuinfo(int max_num_cpus) ARM_COMPUTE_ERROR_ON_MSG(ret_status != 0, "Regex compilation failed."); std::ifstream file("/proc/cpuinfo", std::ios::in); - if(file.is_open()) + if (file.is_open()) { std::string line; int midr = 0; int curcpu = -1; - while(bool(getline(file, line))) + while (bool(getline(file, line))) { std::array<regmatch_t, 2> match; ret_status = regexec(&proc_regex, line.c_str(), 2, match.data(), 0); - if(ret_status == 0) + if (ret_status == 0) { std::string id = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so)); int newcpu = support::cpp11::stoi(id, nullptr); - if(curcpu >= 0 && midr == 0) + if (curcpu >= 0 && midr == 0) { // Matched a new CPU ID without any description of the previous one - looks like old format. return {}; } - if(curcpu >= 0 && curcpu < max_num_cpus) + if (curcpu >= 0 && curcpu < max_num_cpus) { cpus.emplace_back(midr); } else { - ARM_COMPUTE_LOG_INFO_MSG_CORE("Trying to populate a core id with id greater than the expected number of cores!"); + ARM_COMPUTE_LOG_INFO_MSG_CORE( + "Trying to populate a core id with id greater than the expected number of cores!"); } midr = 0; @@ -152,7 +162,7 @@ std::vector<uint32_t> midr_from_proc_cpuinfo(int max_num_cpus) } ret_status = regexec(&imp_regex, line.c_str(), 2, match.data(), 0); - if(ret_status == 0) + if (ret_status == 0) { std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so)); int impv = support::cpp11::stoi(subexp, nullptr, support::cpp11::NumericBase::BASE_16); @@ -162,7 +172,7 @@ std::vector<uint32_t> midr_from_proc_cpuinfo(int max_num_cpus) } ret_status = regexec(&var_regex, line.c_str(), 2, match.data(), 0); - if(ret_status == 0) + if (ret_status == 0) { std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so)); int varv = support::cpp11::stoi(subexp, nullptr, support::cpp11::NumericBase::BASE_16); @@ -172,7 +182,7 @@ std::vector<uint32_t> midr_from_proc_cpuinfo(int max_num_cpus) } ret_status = regexec(&part_regex, line.c_str(), 2, match.data(), 0); - if(ret_status == 0) + if (ret_status == 0) { std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so)); int partv = support::cpp11::stoi(subexp, nullptr, support::cpp11::NumericBase::BASE_16); @@ -182,7 +192,7 @@ std::vector<uint32_t> midr_from_proc_cpuinfo(int max_num_cpus) } ret_status = regexec(&rev_regex, line.c_str(), 2, match.data(), 0); - if(ret_status == 0) + if (ret_status == 0) { std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so)); int regv = support::cpp11::stoi(subexp, nullptr); @@ -193,13 +203,14 @@ std::vector<uint32_t> midr_from_proc_cpuinfo(int max_num_cpus) } } - if(curcpu >= 0 && curcpu < max_num_cpus) + if (curcpu >= 0 && curcpu < max_num_cpus) { cpus.emplace_back(midr); } else { - ARM_COMPUTE_LOG_INFO_MSG_CORE("Trying to populate a core id with id greater than the expected number of cores!"); + ARM_COMPUTE_LOG_INFO_MSG_CORE( + "Trying to populate a core id with id greater than the expected number of cores!"); } } @@ -224,11 +235,11 @@ int get_max_cpus() CPUspresent.open("/sys/devices/system/cpu/present", std::ios::in); bool success = false; - if(CPUspresent.is_open()) + if (CPUspresent.is_open()) { std::string line; - if(bool(getline(CPUspresent, line))) + if (bool(getline(CPUspresent, line))) { /* The content of this file is a list of ranges or single values, e.g. * 0-5, or 1-3,5,7 or similar. As we are interested in the @@ -237,9 +248,9 @@ int get_max_cpus() */ auto startfrom = line.begin(); - for(auto i = line.begin(); i < line.end(); ++i) + for (auto i = line.begin(); i < line.end(); ++i) { - if(*i == '-' || *i == ',') + if (*i == '-' || *i == ',') { startfrom = i + 1; } @@ -253,38 +264,145 @@ int get_max_cpus() } // Return std::thread::hardware_concurrency() as a fallback. - if(!success) + if (!success) { max_cpus = std::thread::hardware_concurrency(); } return max_cpus; } -#endif /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */ + +const static std::map<std::string, std::vector<uint32_t>> known_configurations_with_little_cores = { + {"xiaomi14-pro", {379, 379, 923, 923, 923, 867, 867, 1024}}}; + +const static std::map<std::string, uint32_t> number_of_cores_to_use = {{"xiaomi14-pro", 6}}; + +#if defined(__ANDROID__) +std::vector<uint32_t> get_cpu_capacities() +{ + std::vector<uint32_t> cpu_capacities; + for (int i = 0; i < get_max_cpus(); ++i) + { + std::stringstream str; + str << "/sys/devices/system/cpu/cpu" << i << "/cpu_capacity"; + std::ifstream file(str.str(), std::ios::in); + if (file.is_open()) + { + std::string line; + if (bool(getline(file, line))) + { + cpu_capacities.emplace_back(support::cpp11::stoul(line)); + } + } + } + + return cpu_capacities; +} + +uint32_t not_little_num_cpus_internal() +{ + std::vector<uint32_t> cpus_all = get_cpu_capacities(); + std::vector<uint32_t> cpus_not_little; + + for (auto &it : known_configurations_with_little_cores) + { + if (it.second == cpus_all) + { + return number_of_cores_to_use.find(it.first)->second; + } + } + + std::vector<uint32_t>::iterator result = std::max_element(cpus_all.begin(), cpus_all.end()); + uint32_t max_capacity = *result; + uint32_t threshold = max_capacity / 2; + for (unsigned int i = 0; i < cpus_all.size(); i++) + { + if (!(cpus_all[i] < threshold)) + { + cpus_not_little.emplace_back(cpus_all[i]); + } + } + return cpus_not_little.size(); +} + +bool has_little_mid_big_internal() +{ + std::vector<uint32_t> cpus_all = get_cpu_capacities(); + std::vector<uint32_t> cpus_not_little; + + for (auto &it : known_configurations_with_little_cores) + { + if (it.second == cpus_all) + { + return true; + } + } + std::sort(cpus_all.begin(), cpus_all.end()); + std::vector<uint32_t>::iterator ip; + ip = std::unique(cpus_all.begin(), cpus_all.end()); + cpus_all.resize(std::distance(cpus_all.begin(), ip)); + + if (cpus_all.size() == 3) + { + return true; + } + else + { + return false; + } +} +#endif /* defined(__ANDROID__) */ +#elif defined(__aarch64__) && \ + defined(__APPLE__) /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */ +/** Query features through sysctlbyname + * + * @return int value queried + */ +int get_hw_capability(const std::string &cap) +{ + int64_t result(0); + size_t size = sizeof(result); + sysctlbyname(cap.c_str(), &result, &size, NULL, 0); + return result; +} +#endif /* !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) */ + +#if defined(BARE_METAL) && defined(__aarch64__) +uint64_t get_sve_feature_reg() +{ + uint64_t svefr0 = 0; + __asm __volatile(".inst 0xd5380483 // mrs x3, ID_AA64ZFR0_EL1\n" + "MOV %0, X3" + : "=r"(svefr0) + : + : "x3"); + return svefr0; +} +#endif /* defined(BARE_METAL) && defined(__aarch64__) */ } // namespace -CpuInfo::CpuInfo(CpuIsaInfo isa, std::vector<CpuModel> cpus) - : _isa(std::move(isa)), _cpus(std::move(cpus)) +CpuInfo::CpuInfo(CpuIsaInfo isa, std::vector<CpuModel> cpus) : _isa(std::move(isa)), _cpus(std::move(cpus)) { } CpuInfo CpuInfo::build() { -#if !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) +#if !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \ + (defined(__arm__) || defined(__aarch64__)) const uint32_t hwcaps = getauxval(AT_HWCAP); const uint32_t hwcaps2 = getauxval(AT_HWCAP2); const uint32_t max_cpus = get_max_cpus(); // Populate midr values std::vector<uint32_t> cpus_midr; - if(hwcaps & ARM_COMPUTE_CPU_FEATURE_HWCAP_CPUID) + if (hwcaps & ARM_COMPUTE_CPU_FEATURE_HWCAP_CPUID) { cpus_midr = midr_from_cpuid(max_cpus); } - if(cpus_midr.empty()) + if (cpus_midr.empty()) { cpus_midr = midr_from_proc_cpuinfo(max_cpus); } - if(cpus_midr.empty()) + if (cpus_midr.empty()) { cpus_midr.resize(max_cpus, 0); } @@ -300,32 +418,51 @@ CpuInfo CpuInfo::build() CpuInfo info(isa, cpus_model); return info; -#elif(BARE_METAL) && defined(__aarch64__) /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */ +#elif (BARE_METAL) && \ + defined( \ + __aarch64__) /* !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) */ // Assume single CPU in bare metal mode. Just read the ID register and feature bits directly. - uint64_t isar0 = 0, isar1 = 0, pfr0 = 0, svefr0 = 0, midr = 0; + uint64_t isar0 = 0, isar1 = 0, pfr0 = 0, pfr1 = 0, svefr0 = 0, midr = 0; ARM_COMPUTE_GET_FEATURE_REG(isar0, ID_AA64ISAR0_EL1); ARM_COMPUTE_GET_FEATURE_REG(isar1, ID_AA64ISAR1_EL1); ARM_COMPUTE_GET_FEATURE_REG(pfr0, ID_AA64PFR0_EL1); + ARM_COMPUTE_GET_FEATURE_REG(pfr1, ID_AA64PFR1_EL1); ARM_COMPUTE_GET_FEATURE_REG(midr, MIDR_EL1); - if((pfr0 >> 32) & 0xf) + if ((pfr0 >> 32) & 0xf) { svefr0 = get_sve_feature_reg(); } - CpuIsaInfo isa = init_cpu_isa_from_regs(isar0, isar1, pfr0, svefr0, midr); + CpuIsaInfo isa = init_cpu_isa_from_regs(isar0, isar1, pfr0, pfr1, svefr0, midr); std::vector<CpuModel> cpus_model(1, midr_to_model(midr)); CpuInfo info(isa, cpus_model); return info; -#else /* #elif(BARE_METAL) && defined(__aarch64__) */ - CpuInfo info(CpuIsaInfo(), { CpuModel::GENERIC }); +#elif defined(__aarch64__) && defined(__APPLE__) /* #elif(BARE_METAL) && defined(__aarch64__) */ + int ncpus = get_hw_capability("hw.perflevel0.logicalcpu"); + CpuIsaInfo isainfo; + std::vector<CpuModel> cpus_model(ncpus); + isainfo.neon = get_hw_capability("hw.optional.neon"); + isainfo.fp16 = get_hw_capability("hw.optional.neon_fp16"); + isainfo.dot = get_hw_capability("hw.optional.arm.FEAT_DotProd"); + isainfo.bf16 = get_hw_capability("hw.optional.arm.FEAT_BF16"); + isainfo.i8mm = get_hw_capability("hw.optional.arm.FEAT_I8MM"); + CpuInfo info(isainfo, cpus_model); return info; -#endif /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */ +#elif defined(__aarch64__) && defined(_WIN64) /* #elif defined(__aarch64__) && defined(__APPLE__) */ + CpuIsaInfo isainfo; + isainfo.neon = true; + CpuInfo info(isainfo, {CpuModel::GENERIC}); + return info; +#else /* #elif defined(__aarch64__) && defined(_WIN64) */ + CpuInfo info(CpuIsaInfo(), {CpuModel::GENERIC}); + return info; +#endif /* !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) */ } CpuModel CpuInfo::cpu_model(uint32_t cpuid) const { - if(cpuid < _cpus.size()) + if (cpuid < _cpus.size()) { return _cpus[cpuid]; } @@ -334,11 +471,12 @@ CpuModel CpuInfo::cpu_model(uint32_t cpuid) const CpuModel CpuInfo::cpu_model() const { -#if defined(BARE_METAL) || defined(__APPLE__) || (!defined(__arm__) && !defined(__aarch64__)) +#if defined(_WIN64) || defined(BARE_METAL) || defined(__APPLE__) || defined(__OpenBSD__) || \ + (!defined(__arm__) && !defined(__aarch64__)) return cpu_model(0); -#else /* defined(BARE_METAL) || defined(__APPLE__) || (!defined(__arm__) && !defined(__aarch64__)) */ +#else /* defined(BARE_METAL) || defined(__APPLE__) || defined(__OpenBSD__) || (!defined(__arm__) && !defined(__aarch64__)) */ return cpu_model(sched_getcpu()); -#endif /* defined(BARE_METAL) || defined(__APPLE__) || (!defined(__arm__) && !defined(__aarch64__)) */ +#endif /* defined(BARE_METAL) || defined(__APPLE__) || defined(__OpenBSD__) || (!defined(__arm__) && !defined(__aarch64__)) */ } uint32_t CpuInfo::num_cpus() const @@ -346,11 +484,29 @@ uint32_t CpuInfo::num_cpus() const return _cpus.size(); } +uint32_t CpuInfo::not_little_num_cpus() const +{ +#if defined(__ANDROID__) + return not_little_num_cpus_internal(); +#else /* defined(__ANDROID__) */ + return num_cpus(); +#endif /* defined(__ANDROID__) */ +} + +bool CpuInfo::has_little_mid_big() const +{ +#if defined(__ANDROID__) + return has_little_mid_big_internal(); +#else /* defined(__ANDROID__) */ + return false; +#endif /* defined(__ANDROID__) */ +} + uint32_t num_threads_hint() { unsigned int num_threads_hint = 1; -#if !defined(BARE_METAL) +#if !defined(BARE_METAL) && !defined(_WIN64) && !defined(ARM_COMPUTE_DISABLE_THREADS_HINT) std::vector<std::string> cpus; cpus.reserve(64); @@ -363,13 +519,13 @@ uint32_t num_threads_hint() // Read cpuinfo and get occurrence of each core std::ifstream cpuinfo_file("/proc/cpuinfo", std::ios::in); - if(cpuinfo_file.is_open()) + if (cpuinfo_file.is_open()) { std::string line; - while(bool(getline(cpuinfo_file, line))) + while (bool(getline(cpuinfo_file, line))) { std::array<regmatch_t, 2> match; - if(regexec(&cpu_part_rgx, line.c_str(), 2, match.data(), 0) == 0) + if (regexec(&cpu_part_rgx, line.c_str(), 2, match.data(), 0) == 0) { cpus.emplace_back(line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so))); } @@ -382,13 +538,13 @@ uint32_t num_threads_hint() auto least_frequent_cpu_occurences = [](const std::vector<std::string> &cpus) -> uint32_t { std::unordered_map<std::string, uint32_t> cpus_freq; - for(const auto &cpu : cpus) + for (const auto &cpu : cpus) { cpus_freq[cpu]++; } uint32_t vmin = cpus.size() + 1; - for(const auto &cpu_freq : cpus_freq) + for (const auto &cpu_freq : cpus_freq) { vmin = std::min(vmin, cpu_freq.second); } @@ -397,9 +553,9 @@ uint32_t num_threads_hint() // Set thread hint num_threads_hint = cpus.empty() ? std::thread::hardware_concurrency() : least_frequent_cpu_occurences(cpus); -#endif /* !defined(BARE_METAL) */ +#endif /* !defined(BARE_METAL) && !defined(_WIN64) && !defined(ARM_COMPUTE_DISABLE_THREADS_HINT) */ return num_threads_hint; } } // namespace cpuinfo -} // namespace arm_compute
\ No newline at end of file +} // namespace arm_compute diff --git a/src/common/cpuinfo/CpuInfo.h b/src/common/cpuinfo/CpuInfo.h index f3056d2faf..506830aa81 100644 --- a/src/common/cpuinfo/CpuInfo.h +++ b/src/common/cpuinfo/CpuInfo.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021-2022, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_COMMON_CPUINFO_H -#define SRC_COMMON_CPUINFO_H +#ifndef ACL_SRC_COMMON_CPUINFO_CPUINFO_H +#define ACL_SRC_COMMON_CPUINFO_CPUINFO_H #include "src/common/cpuinfo/CpuIsaInfo.h" #include "src/common/cpuinfo/CpuModel.h" @@ -71,6 +71,14 @@ public: { return _isa.sve2; } + bool has_sme() const + { + return _isa.sme; + } + bool has_sme2() const + { + return _isa.sme2; + } bool has_fp16() const { return _isa.fp16; @@ -79,22 +87,41 @@ public: { return _isa.bf16; } + bool has_svebf16() const + { + return _isa.svebf16; + } bool has_dotprod() const { return _isa.dot; } - bool has_immla() const + bool has_i8mm() const + { + return _isa.i8mm; + } + bool has_svei8mm() const + { + return _isa.svei8mm; + } + bool has_svef32mm() const + { + return _isa.svef32mm; + } + + const CpuIsaInfo &isa() const { - return _isa.immla; + return _isa; } - bool has_fmmla() const + const std::vector<CpuModel> &cpus() const { - return _isa.fmmla; + return _cpus; } CpuModel cpu_model(uint32_t cpuid) const; CpuModel cpu_model() const; uint32_t num_cpus() const; + uint32_t not_little_num_cpus() const; + bool has_little_mid_big() const; private: CpuIsaInfo _isa{}; @@ -110,4 +137,4 @@ private: uint32_t num_threads_hint(); } // namespace cpuinfo } // namespace arm_compute -#endif /* SRC_COMMON_CPUINFO_H */ +#endif // ACL_SRC_COMMON_CPUINFO_CPUINFO_H diff --git a/src/common/cpuinfo/CpuIsaInfo.cpp b/src/common/cpuinfo/CpuIsaInfo.cpp index d99f9aec29..c9e39b9a08 100644 --- a/src/common/cpuinfo/CpuIsaInfo.cpp +++ b/src/common/cpuinfo/CpuIsaInfo.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021-2022, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,6 +24,7 @@ #include "src/common/cpuinfo/CpuIsaInfo.h" #include "arm_compute/core/Error.h" + #include "src/common/cpuinfo/CpuModel.h" /* Arm Feature flags */ @@ -31,17 +32,18 @@ #define ARM_COMPUTE_CPU_FEATURE_HWCAP_NEON (1 << 12) /* Arm64 Feature flags */ -#define ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMD (1 << 1) -#define ARM_COMPUTE_CPU_FEATURE_HWCAP_FPHP (1 << 9) -#define ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDHP (1 << 10) -#define ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDDP (1 << 20) -#define ARM_COMPUTE_CPU_FEATURE_HWCAP_SVE (1 << 22) -#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVE2 (1 << 1) -#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEI8MM (1 << 9) +#define ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMD (1 << 1) +#define ARM_COMPUTE_CPU_FEATURE_HWCAP_FPHP (1 << 9) +#define ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDHP (1 << 10) +#define ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDDP (1 << 20) +#define ARM_COMPUTE_CPU_FEATURE_HWCAP_SVE (1 << 22) +#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVE2 (1 << 1) +#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEI8MM (1 << 9) #define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEF32MM (1 << 10) -#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEBF16 (1 << 12) -#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_I8MM (1 << 13) -#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_BF16 (1 << 14) +#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEBF16 (1 << 12) +#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_I8MM (1 << 13) +#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_BF16 (1 << 14) +#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SME (1 << 23) namespace arm_compute { @@ -49,61 +51,40 @@ namespace cpuinfo { namespace { +inline bool is_feature_supported(uint64_t features, uint64_t feature_mask) +{ + return (features & feature_mask); +} + #if defined(__arm__) void decode_hwcaps(CpuIsaInfo &isa, const uint32_t hwcaps, const uint32_t hwcaps2) { ARM_COMPUTE_UNUSED(hwcaps2); - - if(hwcaps & ARM_COMPUTE_CPU_FEATURE_HWCAP_HALF) - { - isa.fp16 = true; - } - if(hwcaps & ARM_COMPUTE_CPU_FEATURE_HWCAP_NEON) - { - isa.neon = true; - } + isa.fp16 = false; + isa.neon = is_feature_supported(hwcaps, ARM_COMPUTE_CPU_FEATURE_HWCAP_NEON); } #elif defined(__aarch64__) void decode_hwcaps(CpuIsaInfo &isa, const uint32_t hwcaps, const uint32_t hwcaps2) { // High-level SIMD support - if(hwcaps & ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMD) - { - isa.neon = true; - } - if(hwcaps & ARM_COMPUTE_CPU_FEATURE_HWCAP_SVE) - { - isa.sve = true; - } - if(hwcaps2 & ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVE2) - { - isa.sve2 = true; - } + isa.neon = is_feature_supported(hwcaps, ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMD); + isa.sve = is_feature_supported(hwcaps, ARM_COMPUTE_CPU_FEATURE_HWCAP_SVE); + isa.sve2 = is_feature_supported(hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVE2); + + // Detection of SME from type HWCAP2 in the auxillary vector + isa.sme = is_feature_supported(hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_SME); + isa.sme2 = isa.sme; // Needs to be set properly // Data-type support - const uint32_t fp16_support_mask = ARM_COMPUTE_CPU_FEATURE_HWCAP_FPHP | ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDHP; - if(hwcaps & fp16_support_mask) - { - isa.fp16 = true; - } - if(hwcaps2 & ARM_COMPUTE_CPU_FEATURE_HWCAP2_BF16) - { - isa.bf16 = true; - } + isa.fp16 = is_feature_supported(hwcaps, ARM_COMPUTE_CPU_FEATURE_HWCAP_FPHP | ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDHP); + isa.bf16 = is_feature_supported(hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_BF16); + isa.svebf16 = is_feature_supported(hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEBF16); // Instruction extensions - if(hwcaps & ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDDP) - { - isa.dot = true; - } - if(hwcaps2 & ARM_COMPUTE_CPU_FEATURE_HWCAP2_I8MM) - { - isa.immla = true; - } - if(hwcaps2 & ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEF32MM) - { - isa.fmmla = true; - } + isa.dot = is_feature_supported(hwcaps, ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDDP); + isa.i8mm = is_feature_supported(hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_I8MM); + isa.svei8mm = is_feature_supported(hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEI8MM); + isa.svef32mm = is_feature_supported(hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEF32MM); } #else /* defined(__aarch64__) */ void decode_hwcaps(CpuIsaInfo &isa, const uint32_t hwcaps, const uint32_t hwcaps2) @@ -112,62 +93,49 @@ void decode_hwcaps(CpuIsaInfo &isa, const uint32_t hwcaps, const uint32_t hwcaps } #endif /* defined(__aarch64__) */ -void decode_regs(CpuIsaInfo &isa, const uint64_t isar0, const uint64_t isar1, const uint64_t pfr0, const uint64_t svefr0) +void decode_regs(CpuIsaInfo &isa, + const uint64_t isar0, + const uint64_t isar1, + const uint64_t pfr0, + const uint64_t pfr1, + const uint64_t svefr0) { + auto is_supported = [](uint64_t feature_reg, uint8_t feature_pos) -> bool + { return ((feature_reg >> feature_pos) & 0xf); }; + // High-level SIMD support - if((pfr0 >> 32) & 0xf) - { - isa.sve = true; - } - if(svefr0 & 0xf) - { - isa.sve2 = true; - } + isa.sve = is_supported(pfr0, 32); + isa.sve2 = is_supported(svefr0, 0); + isa.sme = is_supported(pfr1, 24); + isa.sme2 = (((pfr1 >> 24) & 0xf) > 1); // Data-type support - if((pfr0 >> 16) & 0xf) - { - isa.fp16 = true; - } - if((isar1 >> 44) & 0xf) - { - isa.bf16 = true; - } + isa.fp16 = is_supported(pfr0, 16); + isa.bf16 = is_supported(isar1, 44); + isa.svebf16 = is_supported(svefr0, 20); // Instruction extensions - if((isar0 >> 44) & 0xf) - { - isa.dot = true; - } - if((isar1 >> 48) & 0xf) - { - isa.immla = true; - } - if((svefr0 >> 52) & 0xf) - { - isa.fmmla = true; - } + isa.dot = is_supported(isar0, 44); + isa.i8mm = is_supported(isar1, 48); + isa.svei8mm = is_supported(svefr0, 44); + isa.svef32mm = is_supported(svefr0, 52); } -/** Handle features from whitelisted models in case of problematic kernels +/** Handle features from allow-listed models in case of problematic kernels * * @param[in, out] isa ISA to update * @param[in] model CPU model type */ -void whitelisted_model_features(CpuIsaInfo &isa, CpuModel model) +void allowlisted_model_features(CpuIsaInfo &isa, CpuModel model) { - if(isa.dot == false) + if (isa.dot == false) { isa.dot = model_supports_dot(model); } - if(isa.fp16 == false) + if (isa.fp16 == false) { isa.fp16 = model_supports_fp16(model); } - if(isa.sve == false) - { - isa.sve = model_supports_sve(model); - } } } // namespace @@ -178,21 +146,22 @@ CpuIsaInfo init_cpu_isa_from_hwcaps(uint32_t hwcaps, uint32_t hwcaps2, uint32_t decode_hwcaps(isa, hwcaps, hwcaps2); const CpuModel model = midr_to_model(midr); - whitelisted_model_features(isa, model); + allowlisted_model_features(isa, model); return isa; } -CpuIsaInfo init_cpu_isa_from_regs(uint64_t isar0, uint64_t isar1, uint64_t pfr0, uint64_t svefr0, uint64_t midr) +CpuIsaInfo +init_cpu_isa_from_regs(uint64_t isar0, uint64_t isar1, uint64_t pfr0, uint64_t pfr1, uint64_t svefr0, uint64_t midr) { CpuIsaInfo isa; - decode_regs(isa, isar0, isar1, pfr0, svefr0); + decode_regs(isa, isar0, isar1, pfr0, pfr1, svefr0); const CpuModel model = midr_to_model(midr); - whitelisted_model_features(isa, model); + allowlisted_model_features(isa, model); return isa; } } // namespace cpuinfo -} // namespace arm_compute
\ No newline at end of file +} // namespace arm_compute diff --git a/src/common/cpuinfo/CpuIsaInfo.h b/src/common/cpuinfo/CpuIsaInfo.h index 1125f766dd..9d6bc07b67 100644 --- a/src/common/cpuinfo/CpuIsaInfo.h +++ b/src/common/cpuinfo/CpuIsaInfo.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -37,18 +37,22 @@ namespace cpuinfo struct CpuIsaInfo { /* SIMD extension support */ - bool neon{ false }; - bool sve{ false }; - bool sve2{ false }; + bool neon{false}; + bool sve{false}; + bool sve2{false}; + bool sme{false}; + bool sme2{false}; /* Data-type extensions support */ - bool fp16{ false }; - bool bf16{ false }; + bool fp16{false}; + bool bf16{false}; + bool svebf16{false}; /* Instruction support */ - bool dot{ false }; - bool immla{ false }; - bool fmmla{ false }; + bool dot{false}; + bool i8mm{false}; + bool svei8mm{false}; + bool svef32mm{false}; }; /** Identify ISA related information through system information @@ -65,13 +69,15 @@ CpuIsaInfo init_cpu_isa_from_hwcaps(uint32_t hwcaps, uint32_t hwcaps2, uint32_t * * @param[in] isar0 Value of Instruction Set Attribute Register 0 (ID_AA64ISAR0_EL1) * @param[in] isar1 Value of Instruction Set Attribute Register 1 (ID_AA64ISAR1_EL1) - * @param[in] pfr0 Value of Processor Feature Register 0 (ID_AA64PFR0_EL1) + * @param[in] pfr0 Value of Processor Feature Register 0 (ID_AA64PFR0_EL1) + * @param[in] pfr1 Value of Processor Feature Register 1 (ID_AA64PFR1_EL1) * @param[in] svefr0 Value of SVE feature ID register 0 (ID_AA64ZFR0_EL1) * @param[in] midr Value of Main ID Register (MIDR) * * @return CpuIsaInfo A populated ISA feature structure */ -CpuIsaInfo init_cpu_isa_from_regs(uint64_t isar0, uint64_t isar1, uint64_t pfr0, uint64_t svefr0, uint64_t midr); +CpuIsaInfo +init_cpu_isa_from_regs(uint64_t isar0, uint64_t isar1, uint64_t pfr0, uint64_t pfr1, uint64_t svefr0, uint64_t midr); } // namespace cpuinfo } // namespace arm_compute diff --git a/src/common/cpuinfo/CpuModel.cpp b/src/common/cpuinfo/CpuModel.cpp index 9f4d5d1433..8c3f8a8faf 100644 --- a/src/common/cpuinfo/CpuModel.cpp +++ b/src/common/cpuinfo/CpuModel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,12 +29,12 @@ namespace cpuinfo { std::string cpu_model_to_string(CpuModel model) { - switch(model) + switch (model) { #define X(MODEL) \ -case CpuModel::MODEL: \ - return #MODEL; - ARM_COMPUTE_CPU_MODEL_LIST + case CpuModel::MODEL: \ + return #MODEL; + ARM_COMPUTE_CPU_MODEL_LIST #undef X default: { @@ -45,13 +45,16 @@ case CpuModel::MODEL: \ bool model_supports_fp16(CpuModel model) { - switch(model) + switch (model) { case CpuModel::GENERIC_FP16: case CpuModel::GENERIC_FP16_DOT: case CpuModel::A55r1: + case CpuModel::A510: case CpuModel::X1: - case CpuModel::KLEIN: + case CpuModel::V1: + case CpuModel::A64FX: + case CpuModel::N1: return true; default: return false; @@ -60,23 +63,14 @@ bool model_supports_fp16(CpuModel model) bool model_supports_dot(CpuModel model) { - switch(model) + switch (model) { case CpuModel::GENERIC_FP16_DOT: case CpuModel::A55r1: + case CpuModel::A510: case CpuModel::X1: - case CpuModel::KLEIN: - return true; - default: - return false; - } -} - -bool model_supports_sve(CpuModel model) -{ - switch(model) - { - case CpuModel::KLEIN: + case CpuModel::V1: + case CpuModel::N1: return true; default: return false; @@ -92,17 +86,17 @@ CpuModel midr_to_model(uint32_t midr) const int variant = (midr >> 20) & 0xF; const int cpunum = (midr >> 4) & 0xFFF; - if(implementer == 0x41) // Arm CPUs + // Only CPUs we have code paths for are detected. All other CPUs can be safely classed as "GENERIC" + if (implementer == 0x41) // Arm CPUs { - // Only CPUs we have code paths for are detected. All other CPUs can be safely classed as "GENERIC" - switch(cpunum) + switch (cpunum) { case 0xd03: // A53 case 0xd04: // A35 model = CpuModel::A53; break; case 0xd05: // A55 - if(variant != 0) + if (variant != 0) { model = CpuModel::A55r1; } @@ -115,7 +109,7 @@ CpuModel midr_to_model(uint32_t midr) model = CpuModel::A73; break; case 0xd0a: // A75 - if(variant != 0) + if (variant != 0) { model = CpuModel::GENERIC_FP16_DOT; } @@ -124,9 +118,11 @@ CpuModel midr_to_model(uint32_t midr) model = CpuModel::GENERIC_FP16; } break; + case 0xd0c: // N1 + model = CpuModel::N1; + break; case 0xd06: // A65 case 0xd0b: // A76 - case 0xd0c: // N1 case 0xd0d: // A77 case 0xd0e: // A76AE case 0xd41: // A78 @@ -134,21 +130,39 @@ CpuModel midr_to_model(uint32_t midr) case 0xd4a: // E1 model = CpuModel::GENERIC_FP16_DOT; break; + case 0xd40: // V1 + model = CpuModel::V1; + break; case 0xd44: // X1 model = CpuModel::X1; break; - case 0xd46: - model = CpuModel::KLEIN; + case 0xd46: // A510 + case 0xd80: // A520 + model = CpuModel::A510; + break; + case 0xd15: // R82 + model = CpuModel::A55r1; + break; + default: + model = CpuModel::GENERIC; + break; + } + } + else if (implementer == 0x46) + { + switch (cpunum) + { + case 0x001: // A64FX + model = CpuModel::A64FX; break; default: model = CpuModel::GENERIC; break; } } - else if(implementer == 0x48) + else if (implementer == 0x48) { - // Only CPUs we have code paths for are detected. All other CPUs can be safely classed as "GENERIC" - switch(cpunum) + switch (cpunum) { case 0xd40: // A76 model = CpuModel::GENERIC_FP16_DOT; @@ -158,10 +172,9 @@ CpuModel midr_to_model(uint32_t midr) break; } } - else if(implementer == 0x51) + else if (implementer == 0x51) { - // Only CPUs we have code paths for are detected. All other CPUs can be safely classed as "GENERIC" - switch(cpunum) + switch (cpunum) { case 0x800: // A73 model = CpuModel::A73; @@ -187,4 +200,4 @@ CpuModel midr_to_model(uint32_t midr) return model; } } // namespace cpuinfo -} // namespace arm_compute
\ No newline at end of file +} // namespace arm_compute diff --git a/src/common/cpuinfo/CpuModel.h b/src/common/cpuinfo/CpuModel.h index 071efc4b3f..3b9d9e3494 100644 --- a/src/common/cpuinfo/CpuModel.h +++ b/src/common/cpuinfo/CpuModel.h @@ -24,11 +24,11 @@ #ifndef SRC_COMMON_CPUINFO_CPUMODEL_H #define SRC_COMMON_CPUINFO_CPUMODEL_H +#include "arm_compute/core/CPP/CPPTypes.h" + #include <cstdint> #include <string> -#include "arm_compute/core/CPP/CPPTypes.h" - namespace arm_compute { namespace cpuinfo @@ -55,23 +55,15 @@ CpuModel midr_to_model(uint32_t midr); * * @note This is used in case of old kernel configurations where some capabilities are not exposed. * - * @param[in] model Model to check for whitelisted capabilities + * @param[in] model Model to check for allowlisted capabilities */ bool model_supports_fp16(CpuModel model); -/** Check if a model supports SVE - * - * @note This is used in case of old kernel configurations where some capabilities are not exposed. - * - * @param[in] model Model to check for whitelisted capabilities - */ -bool model_supports_sve(CpuModel model); - /** Check if a model supports dot product * * @note This is used in case of old kernel configurations where some capabilities are not exposed. * - * @param[in] model Model to check for whitelisted capabilities + * @param[in] model Model to check for allowlisted capabilities */ bool model_supports_dot(CpuModel model); } // namespace cpuinfo diff --git a/src/common/cpuinfo/target/CpuInfoSveUtils.cpp b/src/common/cpuinfo/target/CpuInfoSveUtils.cpp deleted file mode 100644 index 750a1b01d1..0000000000 --- a/src/common/cpuinfo/target/CpuInfoSveUtils.cpp +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/common/cpuinfo/target/CpuInfoSveUtils.h" - -namespace arm_compute -{ -namespace cpuinfo -{ -uint64_t get_sve_feature_reg() -{ - uint64_t reg = 0; -#if defined(ENABLE_SVE) - __asm __volatile("MRS %0, ID_AA64ZFR0_EL1" - : "=r"(reg)); -#endif /* defined(DENABLE_SVE) */ - return reg; -} -} // namespace cpuinfo -} // namespace arm_compute diff --git a/src/common/cpuinfo/target/CpuInfoSveUtils.h b/src/common/cpuinfo/target/CpuInfoSveUtils.h deleted file mode 100644 index 73862b131c..0000000000 --- a/src/common/cpuinfo/target/CpuInfoSveUtils.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_COMMON_CPUINFO_TARGET_CPUINFO_SVE_UTILS_H -#define SRC_COMMON_CPUINFO_TARGET_CPUINFO_SVE_UTILS_H - -#include <cstdint> - -namespace arm_compute -{ -namespace cpuinfo -{ -/** Returns the contents of the SVE feature register (ID_AA64ZFR0_EL1) - * - * @return uint64_t The value of the register - */ -uint64_t get_sve_feature_reg(); -} // namespace cpuinfo -} // namespace arm_compute -#endif /* SRC_COMMON_CPUINFO_CPUISAINFO_H */ |