diff options
Diffstat (limited to 'src/common/cpuinfo')
-rw-r--r-- | src/common/cpuinfo/CpuInfo.cpp | 511 | ||||
-rw-r--r-- | src/common/cpuinfo/CpuInfo.h | 139 | ||||
-rw-r--r-- | src/common/cpuinfo/CpuIsaInfo.cpp | 167 | ||||
-rw-r--r-- | src/common/cpuinfo/CpuIsaInfo.h | 84 | ||||
-rw-r--r-- | src/common/cpuinfo/CpuModel.cpp | 203 | ||||
-rw-r--r-- | src/common/cpuinfo/CpuModel.h | 71 |
6 files changed, 1175 insertions, 0 deletions
diff --git a/src/common/cpuinfo/CpuInfo.cpp b/src/common/cpuinfo/CpuInfo.cpp new file mode 100644 index 0000000000..d46d8d7773 --- /dev/null +++ b/src/common/cpuinfo/CpuInfo.cpp @@ -0,0 +1,511 @@ +/* + * Copyright (c) 2021-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/common/cpuinfo/CpuInfo.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Log.h" + +#include "support/StringSupport.h" +#include "support/ToolchainSupport.h" + +#include <map> +#include <sstream> + +#if !defined(BARE_METAL) +#include <algorithm> +#include <cstring> +#include <fstream> +#if !defined(_WIN64) +#include <regex.h> /* C++ std::regex takes up a lot of space in the standalone builds */ +#include <sched.h> +#endif /* !defined(_WIN64) */ + +#include <thread> +#include <unordered_map> +#endif /* !defined(BARE_METAL) */ + +#if !defined(_WIN64) +#if !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) +#include <asm/hwcap.h> /* Get HWCAP bits from asm/hwcap.h */ +#include <sys/auxv.h> +#elif defined(__APPLE__) && defined(__aarch64__) +#include <sys/sysctl.h> +#include <sys/types.h> +#endif /* defined(__APPLE__) && defined(__aarch64__)) */ +#endif /* !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) */ + +#define ARM_COMPUTE_CPU_FEATURE_HWCAP_CPUID (1 << 11) +#define ARM_COMPUTE_GET_FEATURE_REG(var, freg) __asm __volatile("MRS %0, " #freg : "=r"(var)) +namespace arm_compute +{ +namespace cpuinfo +{ +namespace +{ +#if !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \ + (defined(__arm__) || defined(__aarch64__)) +/** Extract MIDR using CPUID information that are exposed to user-space + * + * @param[in] max_num_cpus Maximum number of possible CPUs + * + * @return std::vector<uint32_t> A list of the MIDR of each core + */ +std::vector<uint32_t> midr_from_cpuid(uint32_t max_num_cpus) +{ + std::vector<uint32_t> cpus; + for (unsigned int i = 0; i < max_num_cpus; ++i) + { + std::stringstream str; + str << "/sys/devices/system/cpu/cpu" << i << "/regs/identification/midr_el1"; + std::ifstream file(str.str(), std::ios::in); + if (file.is_open()) + { + std::string line; + if (bool(getline(file, line))) + { + cpus.emplace_back(support::cpp11::stoul(line, nullptr, support::cpp11::NumericBase::BASE_16)); + } + } + } + return cpus; +} + +/** Extract MIDR by parsing the /proc/cpuinfo meta-data + * + * @param[in] max_num_cpus Maximum number of possible CPUs + * + * @return std::vector<uint32_t> A list of the MIDR of each core + */ +std::vector<uint32_t> midr_from_proc_cpuinfo(int max_num_cpus) +{ + std::vector<uint32_t> cpus; + + regex_t proc_regex; + regex_t imp_regex; + regex_t var_regex; + regex_t part_regex; + regex_t rev_regex; + + memset(&proc_regex, 0, sizeof(regex_t)); + memset(&imp_regex, 0, sizeof(regex_t)); + memset(&var_regex, 0, sizeof(regex_t)); + memset(&part_regex, 0, sizeof(regex_t)); + memset(&rev_regex, 0, sizeof(regex_t)); + + int ret_status = 0; + // If "long-form" cpuinfo is present, parse that to populate models. + ret_status |= regcomp(&proc_regex, R"(^processor.*([[:digit:]]+)$)", REG_EXTENDED); + ret_status |= regcomp(&imp_regex, R"(^CPU implementer.*0x(..)$)", REG_EXTENDED); + ret_status |= regcomp(&var_regex, R"(^CPU variant.*0x(.)$)", REG_EXTENDED); + ret_status |= regcomp(&part_regex, R"(^CPU part.*0x(...)$)", REG_EXTENDED); + ret_status |= regcomp(&rev_regex, R"(^CPU revision.*([[:digit:]]+)$)", REG_EXTENDED); + ARM_COMPUTE_UNUSED(ret_status); + ARM_COMPUTE_ERROR_ON_MSG(ret_status != 0, "Regex compilation failed."); + + std::ifstream file("/proc/cpuinfo", std::ios::in); + if (file.is_open()) + { + std::string line; + int midr = 0; + int curcpu = -1; + + while (bool(getline(file, line))) + { + std::array<regmatch_t, 2> match; + ret_status = regexec(&proc_regex, line.c_str(), 2, match.data(), 0); + if (ret_status == 0) + { + std::string id = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so)); + int newcpu = support::cpp11::stoi(id, nullptr); + + if (curcpu >= 0 && midr == 0) + { + // Matched a new CPU ID without any description of the previous one - looks like old format. + return {}; + } + + if (curcpu >= 0 && curcpu < max_num_cpus) + { + cpus.emplace_back(midr); + } + else + { + ARM_COMPUTE_LOG_INFO_MSG_CORE( + "Trying to populate a core id with id greater than the expected number of cores!"); + } + + midr = 0; + curcpu = newcpu; + + continue; + } + + ret_status = regexec(&imp_regex, line.c_str(), 2, match.data(), 0); + if (ret_status == 0) + { + std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so)); + int impv = support::cpp11::stoi(subexp, nullptr, support::cpp11::NumericBase::BASE_16); + midr |= (impv << 24); + + continue; + } + + ret_status = regexec(&var_regex, line.c_str(), 2, match.data(), 0); + if (ret_status == 0) + { + std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so)); + int varv = support::cpp11::stoi(subexp, nullptr, support::cpp11::NumericBase::BASE_16); + midr |= (varv << 20); + + continue; + } + + ret_status = regexec(&part_regex, line.c_str(), 2, match.data(), 0); + if (ret_status == 0) + { + std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so)); + int partv = support::cpp11::stoi(subexp, nullptr, support::cpp11::NumericBase::BASE_16); + midr |= (partv << 4); + + continue; + } + + ret_status = regexec(&rev_regex, line.c_str(), 2, match.data(), 0); + if (ret_status == 0) + { + std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so)); + int regv = support::cpp11::stoi(subexp, nullptr); + midr |= (regv); + midr |= (0xf << 16); + + continue; + } + } + + if (curcpu >= 0 && curcpu < max_num_cpus) + { + cpus.emplace_back(midr); + } + else + { + ARM_COMPUTE_LOG_INFO_MSG_CORE( + "Trying to populate a core id with id greater than the expected number of cores!"); + } + } + + // Free allocated memory + regfree(&proc_regex); + regfree(&imp_regex); + regfree(&var_regex); + regfree(&part_regex); + regfree(&rev_regex); + + return cpus; +} + +/** Get the maximim number of CPUs in the system by parsing /sys/devices/system/cpu/present + * + * @return int Maximum number of CPUs + */ +int get_max_cpus() +{ + int max_cpus = 1; + std::ifstream CPUspresent; + CPUspresent.open("/sys/devices/system/cpu/present", std::ios::in); + bool success = false; + + if (CPUspresent.is_open()) + { + std::string line; + + if (bool(getline(CPUspresent, line))) + { + /* The content of this file is a list of ranges or single values, e.g. + * 0-5, or 1-3,5,7 or similar. As we are interested in the + * max valid ID, we just need to find the last valid + * delimiter ('-' or ',') and parse the integer immediately after that. + */ + auto startfrom = line.begin(); + + for (auto i = line.begin(); i < line.end(); ++i) + { + if (*i == '-' || *i == ',') + { + startfrom = i + 1; + } + } + + line.erase(line.begin(), startfrom); + + max_cpus = support::cpp11::stoi(line, nullptr) + 1; + success = true; + } + } + + // Return std::thread::hardware_concurrency() as a fallback. + if (!success) + { + max_cpus = std::thread::hardware_concurrency(); + } + return max_cpus; +} +#if defined(__ANDROID__) +std::vector<uint32_t> get_cpu_capacities() +{ + std::vector<uint32_t> cpu_capacities; + for (int i = 0; i < get_max_cpus(); ++i) + { + std::stringstream str; + str << "/sys/devices/system/cpu/cpu" << i << "/cpu_capacity"; + std::ifstream file(str.str(), std::ios::in); + if (file.is_open()) + { + std::string line; + if (bool(getline(file, line))) + { + cpu_capacities.emplace_back(support::cpp11::stoul(line)); + } + } + } + + return cpu_capacities; +} + +uint32_t not_little_num_cpus_internal() +{ + std::vector<uint32_t> cpus_all = get_cpu_capacities(); + std::vector<uint32_t> cpus_not_little; + + std::vector<uint32_t>::iterator result = std::max_element(cpus_all.begin(), cpus_all.end()); + uint32_t max_capacity = *result; + uint32_t threshold = max_capacity / 2; + for (unsigned int i = 0; i < cpus_all.size(); i++) + { + if (!(cpus_all[i] < threshold)) + { + cpus_not_little.emplace_back(cpus_all[i]); + } + } + return cpus_not_little.size(); +} +#endif /* defined(__ANDROID__) */ +#elif defined(__aarch64__) && \ + defined(__APPLE__) /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */ +/** Query features through sysctlbyname + * + * @return int value queried + */ +int get_hw_capability(const std::string &cap) +{ + int64_t result(0); + size_t size = sizeof(result); + sysctlbyname(cap.c_str(), &result, &size, NULL, 0); + return result; +} +#endif /* !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) */ + +#if defined(BARE_METAL) && defined(__aarch64__) +uint64_t get_sve_feature_reg() +{ + uint64_t svefr0 = 0; + __asm __volatile(".inst 0xd5380483 // mrs x3, ID_AA64ZFR0_EL1\n" + "MOV %0, X3" + : "=r"(svefr0) + : + : "x3"); + return svefr0; +} +#endif /* defined(BARE_METAL) && defined(__aarch64__) */ +} // namespace + +CpuInfo::CpuInfo(CpuIsaInfo isa, std::vector<CpuModel> cpus) : _isa(std::move(isa)), _cpus(std::move(cpus)) +{ +} + +CpuInfo CpuInfo::build() +{ +#if !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \ + (defined(__arm__) || defined(__aarch64__)) + const uint32_t hwcaps = getauxval(AT_HWCAP); + const uint32_t hwcaps2 = getauxval(AT_HWCAP2); + const uint32_t max_cpus = get_max_cpus(); + + // Populate midr values + std::vector<uint32_t> cpus_midr; + if (hwcaps & ARM_COMPUTE_CPU_FEATURE_HWCAP_CPUID) + { + cpus_midr = midr_from_cpuid(max_cpus); + } + if (cpus_midr.empty()) + { + cpus_midr = midr_from_proc_cpuinfo(max_cpus); + } + if (cpus_midr.empty()) + { + cpus_midr.resize(max_cpus, 0); + } + + // Populate isa (Assume homogeneous ISA specification) + CpuIsaInfo isa = init_cpu_isa_from_hwcaps(hwcaps, hwcaps2, cpus_midr.back()); + + // Convert midr to models + std::vector<CpuModel> cpus_model; + std::transform(std::begin(cpus_midr), std::end(cpus_midr), std::back_inserter(cpus_model), + [](uint32_t midr) -> CpuModel { return midr_to_model(midr); }); + + CpuInfo info(isa, cpus_model); + return info; + +#elif (BARE_METAL) && \ + defined( \ + __aarch64__) /* !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) */ + + // Assume single CPU in bare metal mode. Just read the ID register and feature bits directly. + uint64_t isar0 = 0, isar1 = 0, pfr0 = 0, pfr1 = 0, svefr0 = 0, midr = 0; + ARM_COMPUTE_GET_FEATURE_REG(isar0, ID_AA64ISAR0_EL1); + ARM_COMPUTE_GET_FEATURE_REG(isar1, ID_AA64ISAR1_EL1); + ARM_COMPUTE_GET_FEATURE_REG(pfr0, ID_AA64PFR0_EL1); + ARM_COMPUTE_GET_FEATURE_REG(pfr1, ID_AA64PFR1_EL1); + ARM_COMPUTE_GET_FEATURE_REG(midr, MIDR_EL1); + if ((pfr0 >> 32) & 0xf) + { + svefr0 = get_sve_feature_reg(); + } + + CpuIsaInfo isa = init_cpu_isa_from_regs(isar0, isar1, pfr0, pfr1, svefr0, midr); + std::vector<CpuModel> cpus_model(1, midr_to_model(midr)); + CpuInfo info(isa, cpus_model); + return info; +#elif defined(__aarch64__) && defined(__APPLE__) /* #elif(BARE_METAL) && defined(__aarch64__) */ + int ncpus = get_hw_capability("hw.perflevel0.logicalcpu"); + CpuIsaInfo isainfo; + std::vector<CpuModel> cpus_model(ncpus); + isainfo.neon = get_hw_capability("hw.optional.neon"); + isainfo.fp16 = get_hw_capability("hw.optional.neon_fp16"); + isainfo.dot = get_hw_capability("hw.optional.arm.FEAT_DotProd"); + isainfo.bf16 = get_hw_capability("hw.optional.arm.FEAT_BF16"); + isainfo.i8mm = get_hw_capability("hw.optional.arm.FEAT_I8MM"); + CpuInfo info(isainfo, cpus_model); + return info; +#elif defined(__aarch64__) && defined(_WIN64) /* #elif defined(__aarch64__) && defined(__APPLE__) */ + CpuIsaInfo isainfo; + isainfo.neon = true; + CpuInfo info(isainfo, {CpuModel::GENERIC}); + return info; +#else /* #elif defined(__aarch64__) && defined(_WIN64) */ + CpuInfo info(CpuIsaInfo(), {CpuModel::GENERIC}); + return info; +#endif /* !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) */ +} + +CpuModel CpuInfo::cpu_model(uint32_t cpuid) const +{ + if (cpuid < _cpus.size()) + { + return _cpus[cpuid]; + } + return CpuModel::GENERIC; +} + +CpuModel CpuInfo::cpu_model() const +{ +#if defined(_WIN64) || defined(BARE_METAL) || defined(__APPLE__) || defined(__OpenBSD__) || \ + (!defined(__arm__) && !defined(__aarch64__)) + return cpu_model(0); +#else /* defined(BARE_METAL) || defined(__APPLE__) || defined(__OpenBSD__) || (!defined(__arm__) && !defined(__aarch64__)) */ + return cpu_model(sched_getcpu()); +#endif /* defined(BARE_METAL) || defined(__APPLE__) || defined(__OpenBSD__) || (!defined(__arm__) && !defined(__aarch64__)) */ +} + +uint32_t CpuInfo::num_cpus() const +{ + return _cpus.size(); +} + +uint32_t CpuInfo::not_little_num_cpus() const +{ +#if defined(__ANDROID__) + return not_little_num_cpus_internal(); +#else /* defined(__ANDROID__) */ + return num_cpus(); +#endif /* defined(__ANDROID__) */ +} + +uint32_t num_threads_hint() +{ + unsigned int num_threads_hint = 1; + +#if !defined(BARE_METAL) && !defined(_WIN64) && !defined(ARM_COMPUTE_DISABLE_THREADS_HINT) + std::vector<std::string> cpus; + cpus.reserve(64); + + // CPU part regex + regex_t cpu_part_rgx; + memset(&cpu_part_rgx, 0, sizeof(regex_t)); + int ret_status = regcomp(&cpu_part_rgx, R"(.*CPU part.+/?\:[[:space:]]+([[:alnum:]]+).*)", REG_EXTENDED); + ARM_COMPUTE_UNUSED(ret_status); + ARM_COMPUTE_ERROR_ON_MSG(ret_status != 0, "Regex compilation failed."); + + // Read cpuinfo and get occurrence of each core + std::ifstream cpuinfo_file("/proc/cpuinfo", std::ios::in); + if (cpuinfo_file.is_open()) + { + std::string line; + while (bool(getline(cpuinfo_file, line))) + { + std::array<regmatch_t, 2> match; + if (regexec(&cpu_part_rgx, line.c_str(), 2, match.data(), 0) == 0) + { + cpus.emplace_back(line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so))); + } + } + } + regfree(&cpu_part_rgx); + + // Get min number of threads + std::sort(std::begin(cpus), std::end(cpus)); + auto least_frequent_cpu_occurences = [](const std::vector<std::string> &cpus) -> uint32_t + { + std::unordered_map<std::string, uint32_t> cpus_freq; + for (const auto &cpu : cpus) + { + cpus_freq[cpu]++; + } + + uint32_t vmin = cpus.size() + 1; + for (const auto &cpu_freq : cpus_freq) + { + vmin = std::min(vmin, cpu_freq.second); + } + return vmin; + }; + + // Set thread hint + num_threads_hint = cpus.empty() ? std::thread::hardware_concurrency() : least_frequent_cpu_occurences(cpus); +#endif /* !defined(BARE_METAL) && !defined(_WIN64) && !defined(ARM_COMPUTE_DISABLE_THREADS_HINT) */ + + return num_threads_hint; +} +} // namespace cpuinfo +} // namespace arm_compute diff --git a/src/common/cpuinfo/CpuInfo.h b/src/common/cpuinfo/CpuInfo.h new file mode 100644 index 0000000000..78d11e9610 --- /dev/null +++ b/src/common/cpuinfo/CpuInfo.h @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2021-2022, 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_SRC_COMMON_CPUINFO_CPUINFO_H +#define ACL_SRC_COMMON_CPUINFO_CPUINFO_H + +#include "src/common/cpuinfo/CpuIsaInfo.h" +#include "src/common/cpuinfo/CpuModel.h" + +#include <string> +#include <vector> + +namespace arm_compute +{ +namespace cpuinfo +{ +/** Aggregate class that contains CPU related information + * + * Contains information about the numbers of the CPUs, the model of each CPU, + * ISA related information and more + * + * @note We can safely assume that the ISA is common between different clusters of cores + */ +class CpuInfo +{ +public: + /** Default constructor */ + CpuInfo() = default; + /** Construct a new Cpu Info object + * + * @param[in] isa ISA capabilities information + * @param[in] cpus CPU models information + */ + CpuInfo(CpuIsaInfo isa, std::vector<CpuModel> cpus); + /** CpuInfo builder function from system related information + * + * @return CpuInfo A populated CpuInfo structure + */ + static CpuInfo build(); + +public: + bool has_neon() const + { + return _isa.neon; + } + bool has_sve() const + { + return _isa.sve; + } + bool has_sve2() const + { + return _isa.sve2; + } + bool has_sme() const + { + return _isa.sme; + } + bool has_sme2() const + { + return _isa.sme2; + } + bool has_fp16() const + { + return _isa.fp16; + } + bool has_bf16() const + { + return _isa.bf16; + } + bool has_svebf16() const + { + return _isa.svebf16; + } + bool has_dotprod() const + { + return _isa.dot; + } + bool has_i8mm() const + { + return _isa.i8mm; + } + bool has_svei8mm() const + { + return _isa.svei8mm; + } + bool has_svef32mm() const + { + return _isa.svef32mm; + } + + const CpuIsaInfo &isa() const + { + return _isa; + } + const std::vector<CpuModel> &cpus() const + { + return _cpus; + } + + CpuModel cpu_model(uint32_t cpuid) const; + CpuModel cpu_model() const; + uint32_t num_cpus() const; + uint32_t not_little_num_cpus() const; + +private: + CpuIsaInfo _isa{}; + std::vector<CpuModel> _cpus{}; +}; + +/** Some systems have both big and small cores, this fuction computes the minimum number of cores + * that are exactly the same on the system. To maximize performance the library attempts to process + * workloads concurrently using as many threads as big cores are available on the system. + * + * @return The minumum number of common cores. + */ +uint32_t num_threads_hint(); +} // namespace cpuinfo +} // namespace arm_compute +#endif // ACL_SRC_COMMON_CPUINFO_CPUINFO_H diff --git a/src/common/cpuinfo/CpuIsaInfo.cpp b/src/common/cpuinfo/CpuIsaInfo.cpp new file mode 100644 index 0000000000..c9e39b9a08 --- /dev/null +++ b/src/common/cpuinfo/CpuIsaInfo.cpp @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2021-2022, 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/common/cpuinfo/CpuIsaInfo.h" + +#include "arm_compute/core/Error.h" + +#include "src/common/cpuinfo/CpuModel.h" + +/* Arm Feature flags */ +#define ARM_COMPUTE_CPU_FEATURE_HWCAP_HALF (1 << 1) +#define ARM_COMPUTE_CPU_FEATURE_HWCAP_NEON (1 << 12) + +/* Arm64 Feature flags */ +#define ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMD (1 << 1) +#define ARM_COMPUTE_CPU_FEATURE_HWCAP_FPHP (1 << 9) +#define ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDHP (1 << 10) +#define ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDDP (1 << 20) +#define ARM_COMPUTE_CPU_FEATURE_HWCAP_SVE (1 << 22) +#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVE2 (1 << 1) +#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEI8MM (1 << 9) +#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEF32MM (1 << 10) +#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEBF16 (1 << 12) +#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_I8MM (1 << 13) +#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_BF16 (1 << 14) +#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SME (1 << 23) + +namespace arm_compute +{ +namespace cpuinfo +{ +namespace +{ +inline bool is_feature_supported(uint64_t features, uint64_t feature_mask) +{ + return (features & feature_mask); +} + +#if defined(__arm__) +void decode_hwcaps(CpuIsaInfo &isa, const uint32_t hwcaps, const uint32_t hwcaps2) +{ + ARM_COMPUTE_UNUSED(hwcaps2); + isa.fp16 = false; + isa.neon = is_feature_supported(hwcaps, ARM_COMPUTE_CPU_FEATURE_HWCAP_NEON); +} +#elif defined(__aarch64__) +void decode_hwcaps(CpuIsaInfo &isa, const uint32_t hwcaps, const uint32_t hwcaps2) +{ + // High-level SIMD support + isa.neon = is_feature_supported(hwcaps, ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMD); + isa.sve = is_feature_supported(hwcaps, ARM_COMPUTE_CPU_FEATURE_HWCAP_SVE); + isa.sve2 = is_feature_supported(hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVE2); + + // Detection of SME from type HWCAP2 in the auxillary vector + isa.sme = is_feature_supported(hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_SME); + isa.sme2 = isa.sme; // Needs to be set properly + + // Data-type support + isa.fp16 = is_feature_supported(hwcaps, ARM_COMPUTE_CPU_FEATURE_HWCAP_FPHP | ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDHP); + isa.bf16 = is_feature_supported(hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_BF16); + isa.svebf16 = is_feature_supported(hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEBF16); + + // Instruction extensions + isa.dot = is_feature_supported(hwcaps, ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDDP); + isa.i8mm = is_feature_supported(hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_I8MM); + isa.svei8mm = is_feature_supported(hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEI8MM); + isa.svef32mm = is_feature_supported(hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEF32MM); +} +#else /* defined(__aarch64__) */ +void decode_hwcaps(CpuIsaInfo &isa, const uint32_t hwcaps, const uint32_t hwcaps2) +{ + ARM_COMPUTE_UNUSED(isa, hwcaps, hwcaps2); +} +#endif /* defined(__aarch64__) */ + +void decode_regs(CpuIsaInfo &isa, + const uint64_t isar0, + const uint64_t isar1, + const uint64_t pfr0, + const uint64_t pfr1, + const uint64_t svefr0) +{ + auto is_supported = [](uint64_t feature_reg, uint8_t feature_pos) -> bool + { return ((feature_reg >> feature_pos) & 0xf); }; + + // High-level SIMD support + isa.sve = is_supported(pfr0, 32); + isa.sve2 = is_supported(svefr0, 0); + isa.sme = is_supported(pfr1, 24); + isa.sme2 = (((pfr1 >> 24) & 0xf) > 1); + + // Data-type support + isa.fp16 = is_supported(pfr0, 16); + isa.bf16 = is_supported(isar1, 44); + isa.svebf16 = is_supported(svefr0, 20); + + // Instruction extensions + isa.dot = is_supported(isar0, 44); + isa.i8mm = is_supported(isar1, 48); + isa.svei8mm = is_supported(svefr0, 44); + isa.svef32mm = is_supported(svefr0, 52); +} + +/** Handle features from allow-listed models in case of problematic kernels + * + * @param[in, out] isa ISA to update + * @param[in] model CPU model type + */ +void allowlisted_model_features(CpuIsaInfo &isa, CpuModel model) +{ + if (isa.dot == false) + { + isa.dot = model_supports_dot(model); + } + if (isa.fp16 == false) + { + isa.fp16 = model_supports_fp16(model); + } +} +} // namespace + +CpuIsaInfo init_cpu_isa_from_hwcaps(uint32_t hwcaps, uint32_t hwcaps2, uint32_t midr) +{ + CpuIsaInfo isa; + + decode_hwcaps(isa, hwcaps, hwcaps2); + + const CpuModel model = midr_to_model(midr); + allowlisted_model_features(isa, model); + + return isa; +} + +CpuIsaInfo +init_cpu_isa_from_regs(uint64_t isar0, uint64_t isar1, uint64_t pfr0, uint64_t pfr1, uint64_t svefr0, uint64_t midr) +{ + CpuIsaInfo isa; + + decode_regs(isa, isar0, isar1, pfr0, pfr1, svefr0); + + const CpuModel model = midr_to_model(midr); + allowlisted_model_features(isa, model); + + return isa; +} +} // namespace cpuinfo +} // namespace arm_compute diff --git a/src/common/cpuinfo/CpuIsaInfo.h b/src/common/cpuinfo/CpuIsaInfo.h new file mode 100644 index 0000000000..9d6bc07b67 --- /dev/null +++ b/src/common/cpuinfo/CpuIsaInfo.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2021-2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_COMMON_CPUINFO_CPUISAINFO_H +#define SRC_COMMON_CPUINFO_CPUISAINFO_H + +#include <cstdint> + +namespace arm_compute +{ +namespace cpuinfo +{ +/** CPU ISA (Instruction Set Architecture) information + * + * Contains ISA related information around the Arm architecture + */ +struct CpuIsaInfo +{ + /* SIMD extension support */ + bool neon{false}; + bool sve{false}; + bool sve2{false}; + bool sme{false}; + bool sme2{false}; + + /* Data-type extensions support */ + bool fp16{false}; + bool bf16{false}; + bool svebf16{false}; + + /* Instruction support */ + bool dot{false}; + bool i8mm{false}; + bool svei8mm{false}; + bool svef32mm{false}; +}; + +/** Identify ISA related information through system information + * + * @param[in] hwcaps HWCAPS feature information + * @param[in] hwcaps2 HWCAPS2 feature information + * @param[in] midr MIDR value + * + * @return CpuIsaInfo A populated ISA feature structure + */ +CpuIsaInfo init_cpu_isa_from_hwcaps(uint32_t hwcaps, uint32_t hwcaps2, uint32_t midr); + +/** Identify ISA related information through register information + * + * @param[in] isar0 Value of Instruction Set Attribute Register 0 (ID_AA64ISAR0_EL1) + * @param[in] isar1 Value of Instruction Set Attribute Register 1 (ID_AA64ISAR1_EL1) + * @param[in] pfr0 Value of Processor Feature Register 0 (ID_AA64PFR0_EL1) + * @param[in] pfr1 Value of Processor Feature Register 1 (ID_AA64PFR1_EL1) + * @param[in] svefr0 Value of SVE feature ID register 0 (ID_AA64ZFR0_EL1) + * @param[in] midr Value of Main ID Register (MIDR) + * + * @return CpuIsaInfo A populated ISA feature structure + */ +CpuIsaInfo +init_cpu_isa_from_regs(uint64_t isar0, uint64_t isar1, uint64_t pfr0, uint64_t pfr1, uint64_t svefr0, uint64_t midr); +} // namespace cpuinfo +} // namespace arm_compute + +#endif /* SRC_COMMON_CPUINFO_CPUISAINFO_H */ diff --git a/src/common/cpuinfo/CpuModel.cpp b/src/common/cpuinfo/CpuModel.cpp new file mode 100644 index 0000000000..8c3f8a8faf --- /dev/null +++ b/src/common/cpuinfo/CpuModel.cpp @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2021-2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/common/cpuinfo/CpuModel.h" + +namespace arm_compute +{ +namespace cpuinfo +{ +std::string cpu_model_to_string(CpuModel model) +{ + switch (model) + { +#define X(MODEL) \ + case CpuModel::MODEL: \ + return #MODEL; + ARM_COMPUTE_CPU_MODEL_LIST +#undef X + default: + { + return std::string("GENERIC"); + } + }; +} + +bool model_supports_fp16(CpuModel model) +{ + switch (model) + { + case CpuModel::GENERIC_FP16: + case CpuModel::GENERIC_FP16_DOT: + case CpuModel::A55r1: + case CpuModel::A510: + case CpuModel::X1: + case CpuModel::V1: + case CpuModel::A64FX: + case CpuModel::N1: + return true; + default: + return false; + } +} + +bool model_supports_dot(CpuModel model) +{ + switch (model) + { + case CpuModel::GENERIC_FP16_DOT: + case CpuModel::A55r1: + case CpuModel::A510: + case CpuModel::X1: + case CpuModel::V1: + case CpuModel::N1: + return true; + default: + return false; + } +} + +CpuModel midr_to_model(uint32_t midr) +{ + CpuModel model = CpuModel::GENERIC; + + // Unpack variant and CPU ID + const int implementer = (midr >> 24) & 0xFF; + const int variant = (midr >> 20) & 0xF; + const int cpunum = (midr >> 4) & 0xFFF; + + // Only CPUs we have code paths for are detected. All other CPUs can be safely classed as "GENERIC" + if (implementer == 0x41) // Arm CPUs + { + switch (cpunum) + { + case 0xd03: // A53 + case 0xd04: // A35 + model = CpuModel::A53; + break; + case 0xd05: // A55 + if (variant != 0) + { + model = CpuModel::A55r1; + } + else + { + model = CpuModel::A55r0; + } + break; + case 0xd09: // A73 + model = CpuModel::A73; + break; + case 0xd0a: // A75 + if (variant != 0) + { + model = CpuModel::GENERIC_FP16_DOT; + } + else + { + model = CpuModel::GENERIC_FP16; + } + break; + case 0xd0c: // N1 + model = CpuModel::N1; + break; + case 0xd06: // A65 + case 0xd0b: // A76 + case 0xd0d: // A77 + case 0xd0e: // A76AE + case 0xd41: // A78 + case 0xd42: // A78AE + case 0xd4a: // E1 + model = CpuModel::GENERIC_FP16_DOT; + break; + case 0xd40: // V1 + model = CpuModel::V1; + break; + case 0xd44: // X1 + model = CpuModel::X1; + break; + case 0xd46: // A510 + case 0xd80: // A520 + model = CpuModel::A510; + break; + case 0xd15: // R82 + model = CpuModel::A55r1; + break; + default: + model = CpuModel::GENERIC; + break; + } + } + else if (implementer == 0x46) + { + switch (cpunum) + { + case 0x001: // A64FX + model = CpuModel::A64FX; + break; + default: + model = CpuModel::GENERIC; + break; + } + } + else if (implementer == 0x48) + { + switch (cpunum) + { + case 0xd40: // A76 + model = CpuModel::GENERIC_FP16_DOT; + break; + default: + model = CpuModel::GENERIC; + break; + } + } + else if (implementer == 0x51) + { + switch (cpunum) + { + case 0x800: // A73 + model = CpuModel::A73; + break; + case 0x801: // A53 + model = CpuModel::A53; + break; + case 0x803: // A55r0 + model = CpuModel::A55r0; + break; + case 0x804: // A76 + model = CpuModel::GENERIC_FP16_DOT; + break; + case 0x805: // A55r1 + model = CpuModel::A55r1; + break; + default: + model = CpuModel::GENERIC; + break; + } + } + + return model; +} +} // namespace cpuinfo +} // namespace arm_compute diff --git a/src/common/cpuinfo/CpuModel.h b/src/common/cpuinfo/CpuModel.h new file mode 100644 index 0000000000..3b9d9e3494 --- /dev/null +++ b/src/common/cpuinfo/CpuModel.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_COMMON_CPUINFO_CPUMODEL_H +#define SRC_COMMON_CPUINFO_CPUMODEL_H + +#include "arm_compute/core/CPP/CPPTypes.h" + +#include <cstdint> +#include <string> + +namespace arm_compute +{ +namespace cpuinfo +{ +using CpuModel = arm_compute::CPUModel; + +/** Convert a CPU model value to a string + * + * @param model CpuModel value to be converted + * + * @return String representing the corresponding CpuModel + */ +std::string cpu_model_to_string(CpuModel model); + +/** Extract the model type from the MIDR value + * + * @param[in] midr MIDR information + * + * @return CpuModel a mapped CPU model + */ +CpuModel midr_to_model(uint32_t midr); + +/** Check if a model supports half-precision floating point arithmetic + * + * @note This is used in case of old kernel configurations where some capabilities are not exposed. + * + * @param[in] model Model to check for allowlisted capabilities + */ +bool model_supports_fp16(CpuModel model); + +/** Check if a model supports dot product + * + * @note This is used in case of old kernel configurations where some capabilities are not exposed. + * + * @param[in] model Model to check for allowlisted capabilities + */ +bool model_supports_dot(CpuModel model); +} // namespace cpuinfo +} // namespace arm_compute +#endif /* SRC_COMMON_CPUINFO_CPUMODEL_H */ |