From 08302c17cd57356b35d46e17dc8d8f76672da5cf Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Wed, 9 Jun 2021 10:08:27 +0100 Subject: Add CPU discovery capabilities. Resolves: COMPMID-4500 Signed-off-by: Georgios Pinitas Change-Id: I008c51934ef813fb1f489b531288c4419e701955 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5799 Reviewed-by: Michele Di Giorgio Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins --- src/runtime/CPUUtils.cpp | 556 ----------------------------------------------- 1 file changed, 556 deletions(-) delete mode 100644 src/runtime/CPUUtils.cpp (limited to 'src/runtime/CPUUtils.cpp') diff --git a/src/runtime/CPUUtils.cpp b/src/runtime/CPUUtils.cpp deleted file mode 100644 index 2bcba72f77..0000000000 --- a/src/runtime/CPUUtils.cpp +++ /dev/null @@ -1,556 +0,0 @@ -/* - * Copyright (c) 2018-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/CPUUtils.h" - -#include "arm_compute/core/CPP/CPPTypes.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Log.h" -#include "support/StringSupport.h" - -#include -#include -#include -#include -#include -#include - -#if !defined(BARE_METAL) -/* C++ std::regex takes up a lot of space in the standalone builds */ -#include -#include -#endif /* !defined(BARE_METAL) */ - -#if !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) -#include - -/* Get HWCAP bits from asm/hwcap.h */ -#include -#endif /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */ - -/* Make sure the bits we care about are defined, just in case asm/hwcap.h is - * out of date (or for bare metal mode) */ -#ifndef HWCAP_ASIMDHP -#define HWCAP_ASIMDHP (1 << 10) // NOLINT -#endif /* HWCAP_ASIMDHP */ - -#ifndef HWCAP_CPUID -#define HWCAP_CPUID (1 << 11) // NOLINT -#endif /* HWCAP_CPUID */ - -#ifndef HWCAP_ASIMDDP -#define HWCAP_ASIMDDP (1 << 20) // NOLINT -#endif /* HWCAP_ASIMDDP */ - -#ifndef HWCAP_SVE -#define HWCAP_SVE (1 << 22) // NOLINT -#endif /* HWCAP_SVE */ - -namespace -{ -using namespace arm_compute; - -#if !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) - -bool model_supports_sve(CPUModel model) -{ - switch(model) - { - case CPUModel::KLEIN: - return true; - default: - return false; - } -} - -bool model_supports_dot(CPUModel model) -{ - switch(model) - { - case CPUModel::GENERIC_FP16_DOT: - case CPUModel::A55r1: - case CPUModel::X1: - case CPUModel::KLEIN: - return true; - default: - return false; - } -} - -bool model_supports_fp16(CPUModel model) -{ - switch(model) - { - case CPUModel::GENERIC_FP16: - case CPUModel::GENERIC_FP16_DOT: - case CPUModel::A55r1: - case CPUModel::X1: - case CPUModel::KLEIN: - return true; - default: - return false; - } -} - -/* Convert an MIDR register value to a CPUModel enum value. */ -CPUModel midr_to_model(const unsigned int midr) -{ - CPUModel model = CPUModel::GENERIC; - - // Unpack variant and CPU ID - const int implementer = (midr >> 24) & 0xFF; - const int variant = (midr >> 20) & 0xF; - const int cpunum = (midr >> 4) & 0xFFF; - - if(implementer == 0x41) // Arm CPUs - { - // Only CPUs we have code paths for are detected. All other CPUs can be safely classed as "GENERIC" - switch(cpunum) - { - case 0xd03: // A53 - case 0xd04: // A35 - model = CPUModel::A53; - break; - case 0xd05: // A55 - if(variant != 0) - { - model = CPUModel::A55r1; - } - else - { - model = CPUModel::A55r0; - } - break; - case 0xd09: // A73 - model = CPUModel::A73; - break; - case 0xd0a: // A75 - if(variant != 0) - { - model = CPUModel::GENERIC_FP16_DOT; - } - else - { - model = CPUModel::GENERIC_FP16; - } - break; - case 0xd06: // A65 - case 0xd0b: // A76 - case 0xd0c: // N1 - case 0xd0d: // A77 - case 0xd41: // A78 - model = CPUModel::GENERIC_FP16_DOT; - break; - case 0xd44: // X1 - model = CPUModel::X1; - break; - case 0xd46: - model = CPUModel::KLEIN; - break; - default: - model = CPUModel::GENERIC; - break; - } - } - else if(implementer == 0x48) - { - // Only CPUs we have code paths for are detected. All other CPUs can be safely classed as "GENERIC" - switch(cpunum) - { - case 0xd40: // A76 - model = CPUModel::GENERIC_FP16_DOT; - break; - default: - model = CPUModel::GENERIC; - break; - } - } - else if(implementer == 0x51) - { - // Only CPUs we have code paths for are detected. All other CPUs can be safely classed as "GENERIC" - switch(cpunum) - { - case 0x800: // A73 - model = CPUModel::A73; - break; - case 0x801: // A53 - model = CPUModel::A53; - break; - case 0x803: // A55r0 - model = CPUModel::A55r0; - break; - case 0x804: // A76 - model = CPUModel::GENERIC_FP16_DOT; - break; - case 0x805: // A55r1 - model = CPUModel::A55r1; - break; - default: - model = CPUModel::GENERIC; - break; - } - } - - return model; -} - -void populate_models_cpuid(std::vector &cpusv) -{ - // If the CPUID capability is present, MIDR information is provided in /sys. Use that to populate the CPU model table. - uint32_t i = 0; - for(auto &c : cpusv) - { - std::stringstream str; - str << "/sys/devices/system/cpu/cpu" << i++ << "/regs/identification/midr_el1"; - std::ifstream file; - file.open(str.str(), std::ios::in); - if(file.is_open()) - { - std::string line; - if(bool(getline(file, line))) - { - const uint32_t midr = support::cpp11::stoul(line, nullptr, support::cpp11::NumericBase::BASE_16); - c = midr_to_model(midr & 0xffffffff); - } - } - } -} - -void populate_models_cpuinfo(std::vector &cpusv) -{ - regex_t proc_regex; - regex_t imp_regex; - regex_t var_regex; - regex_t part_regex; - regex_t rev_regex; - - memset(&proc_regex, 0, sizeof(regex_t)); - memset(&imp_regex, 0, sizeof(regex_t)); - memset(&var_regex, 0, sizeof(regex_t)); - memset(&part_regex, 0, sizeof(regex_t)); - memset(&rev_regex, 0, sizeof(regex_t)); - - int ret_status = 0; - // If "long-form" cpuinfo is present, parse that to populate models. - ret_status |= regcomp(&proc_regex, R"(^processor.*([[:digit:]]+)$)", REG_EXTENDED); - ret_status |= regcomp(&imp_regex, R"(^CPU implementer.*0x(..)$)", REG_EXTENDED); - ret_status |= regcomp(&var_regex, R"(^CPU variant.*0x(.)$)", REG_EXTENDED); - ret_status |= regcomp(&part_regex, R"(^CPU part.*0x(...)$)", REG_EXTENDED); - ret_status |= regcomp(&rev_regex, R"(^CPU revision.*([[:digit:]]+)$)", REG_EXTENDED); - ARM_COMPUTE_UNUSED(ret_status); - ARM_COMPUTE_ERROR_ON_MSG(ret_status != 0, "Regex compilation failed."); - - std::ifstream file; - file.open("/proc/cpuinfo", std::ios::in); - - if(file.is_open()) - { - std::string line; - int midr = 0; - int curcpu = -1; - const int num_cpus = static_cast(cpusv.size()); - - while(bool(getline(file, line))) - { - std::array match; - ret_status = regexec(&proc_regex, line.c_str(), 2, match.data(), 0); - if(ret_status == 0) - { - std::string id = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so)); - int newcpu = support::cpp11::stoi(id, nullptr); - - if(curcpu >= 0 && midr == 0) - { - // Matched a new CPU ID without any description of the previous one - looks like old format. - return; - } - - if(curcpu >= 0 && curcpu < num_cpus) - { - cpusv[curcpu] = midr_to_model(midr); - } - else - { - ARM_COMPUTE_LOG_INFO_MSG_CORE("Trying to populate a core id with id greater than the expected number of cores!"); - } - - midr = 0; - curcpu = newcpu; - - continue; - } - - ret_status = regexec(&imp_regex, line.c_str(), 2, match.data(), 0); - if(ret_status == 0) - { - std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so)); - int impv = support::cpp11::stoi(subexp, nullptr, support::cpp11::NumericBase::BASE_16); - midr |= (impv << 24); - - continue; - } - - ret_status = regexec(&var_regex, line.c_str(), 2, match.data(), 0); - if(ret_status == 0) - { - std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so)); - int varv = support::cpp11::stoi(subexp, nullptr, support::cpp11::NumericBase::BASE_16); - midr |= (varv << 20); - - continue; - } - - ret_status = regexec(&part_regex, line.c_str(), 2, match.data(), 0); - if(ret_status == 0) - { - std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so)); - int partv = support::cpp11::stoi(subexp, nullptr, support::cpp11::NumericBase::BASE_16); - midr |= (partv << 4); - - continue; - } - - ret_status = regexec(&rev_regex, line.c_str(), 2, match.data(), 0); - if(ret_status == 0) - { - std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so)); - int regv = support::cpp11::stoi(subexp, nullptr); - midr |= (regv); - midr |= (0xf << 16); - - continue; - } - } - - if(curcpu >= 0 && curcpu < num_cpus) - { - cpusv[curcpu] = midr_to_model(midr); - } - else - { - ARM_COMPUTE_LOG_INFO_MSG_CORE("Trying to populate a core id with id greater than the expected number of cores!"); - } - } - - // Free allocated memory - regfree(&proc_regex); - regfree(&imp_regex); - regfree(&var_regex); - regfree(&part_regex); - regfree(&rev_regex); -} - -int get_max_cpus() -{ - int max_cpus = 1; - std::ifstream CPUspresent; - CPUspresent.open("/sys/devices/system/cpu/present", std::ios::in); - bool success = false; - - if(CPUspresent.is_open()) - { - std::string line; - - if(bool(getline(CPUspresent, line))) - { - /* The content of this file is a list of ranges or single values, e.g. - * 0-5, or 1-3,5,7 or similar. As we are interested in the - * max valid ID, we just need to find the last valid - * delimiter ('-' or ',') and parse the integer immediately after that. - */ - auto startfrom = line.begin(); - - for(auto i = line.begin(); i < line.end(); ++i) - { - if(*i == '-' || *i == ',') - { - startfrom = i + 1; - } - } - - line.erase(line.begin(), startfrom); - - max_cpus = support::cpp11::stoi(line, nullptr) + 1; - success = true; - } - } - - // Return std::thread::hardware_concurrency() as a fallback. - if(!success) - { - max_cpus = std::thread::hardware_concurrency(); - } - return max_cpus; -} -#endif /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */ - -} // namespace - -namespace arm_compute -{ -namespace utils -{ -namespace cpu -{ -void get_cpu_configuration(CPUInfo &cpuinfo) -{ -#if !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) - bool cpuid = false; - bool hwcaps_fp16_support = false; - bool hwcaps_dot_support = false; - bool hwcaps_sve = false; - const uint32_t hwcaps = getauxval(AT_HWCAP); - - if((hwcaps & HWCAP_CPUID) != 0) - { - cpuid = true; - } - - if((hwcaps & HWCAP_ASIMDHP) != 0) - { - hwcaps_fp16_support = true; - } - -#if defined(__aarch64__) - if((hwcaps & HWCAP_ASIMDDP) != 0) - { - hwcaps_dot_support = true; - } - - if((hwcaps & HWCAP_SVE) != 0) - { - hwcaps_sve = true; - } -#endif /* defined(__aarch64__) */ - - const unsigned int max_cpus = get_max_cpus(); - cpuinfo.set_cpu_num(max_cpus); - std::vector percpu(max_cpus, CPUModel::GENERIC); - if(cpuid) - { - populate_models_cpuid(percpu); - } - else - { - populate_models_cpuinfo(percpu); - } - int j(0); - // Update dot product and FP16 support if one of the CPUs support these features - // We assume that the system does not have mixed architectures - bool one_supports_dot = false; - bool one_supports_fp16 = false; - bool one_supports_sve = false; - for(const auto &v : percpu) - { - one_supports_dot = one_supports_dot || model_supports_dot(v); - one_supports_fp16 = one_supports_fp16 || model_supports_fp16(v); - one_supports_sve = one_supports_sve || model_supports_sve(v); - cpuinfo.set_cpu_model(j++, v); - } - cpuinfo.set_dotprod(one_supports_dot || hwcaps_dot_support); - cpuinfo.set_fp16(one_supports_fp16 || hwcaps_fp16_support); - cpuinfo.set_sve(one_supports_sve || hwcaps_sve); -#elif(BARE_METAL) && defined(__aarch64__) /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */ - cpuinfo.set_cpu_num(1); - const CPUModel cpumodel{ CPUModel::GENERIC }; - cpuinfo.set_cpu_model(0, cpumodel); - // Assume single CPU in bare metal mode. Just read the ID register and feature bits directly. - uint64_t fr0, pfr0, midr; - __asm __volatile( - "MRS %0, ID_AA64ISAR0_EL1\n" - "MRS %1, ID_AA64PFR0_EL1\n" - "MRS %2, midr_el1" - : "=r"(fr0), "=r"(pfr0), "=r"(midr)); - if((fr0 >> 44) & 0xf) - { - cpuinfo.set_dotprod(true); - } - if((pfr0 >> 16) & 0xf) - { - cpuinfo.set_fp16(true); - } - if((pfr0 >> 32) & 0xf) - { - cpuinfo.set_sve(true); - } -#else /* #elif(BARE_METAL) && defined(__aarch64__) */ - ARM_COMPUTE_UNUSED(cpuinfo); -#endif /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */ -} - -unsigned int get_threads_hint() -{ - unsigned int num_threads_hint = 1; - -#if !defined(BARE_METAL) - std::map cpu_part_occurrence_map; - - // CPU part regex - regex_t cpu_part_rgx; - memset(&cpu_part_rgx, 0, sizeof(regex_t)); - int ret_status = regcomp(&cpu_part_rgx, R"(.*CPU part.+/?\:[[:space:]]+([[:alnum:]]+).*)", REG_EXTENDED); - ARM_COMPUTE_UNUSED(ret_status); - ARM_COMPUTE_ERROR_ON_MSG(ret_status != 0, "Regex compilation failed."); - - // Read cpuinfo and get occurrence of each core - std::ifstream cpuinfo; - cpuinfo.open("/proc/cpuinfo", std::ios::in); - if(cpuinfo.is_open()) - { - std::string line; - while(bool(getline(cpuinfo, line))) - { - std::array match; - ret_status = regexec(&cpu_part_rgx, line.c_str(), 2, match.data(), 0); - if(ret_status == 0) - { - std::string cpu_part = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so)); - if(cpu_part_occurrence_map.find(cpu_part) != cpu_part_occurrence_map.end()) - { - cpu_part_occurrence_map[cpu_part]++; - } - else - { - cpu_part_occurrence_map[cpu_part] = 1; - } - } - } - } - regfree(&cpu_part_rgx); - - // Get min number of threads - auto min_common_cores = std::min_element(cpu_part_occurrence_map.begin(), cpu_part_occurrence_map.end(), - [](const std::pair &p1, const std::pair &p2) - { - return p1.second < p2.second; - }); - - // Set thread hint - num_threads_hint = cpu_part_occurrence_map.empty() ? std::thread::hardware_concurrency() : min_common_cores->second; -#endif /* !defined(BARE_METAL) */ - - return num_threads_hint; -} -} // namespace cpu -} // namespace utils -} // namespace arm_compute -- cgit v1.2.1