From a50f19346c5b79e2743f882ce0c691c07076f207 Mon Sep 17 00:00:00 2001 From: Pablo Marquez Tello Date: Mon, 8 Mar 2021 17:27:05 +0000 Subject: Updated cpu detection * Added the case in the cpu detection code for Klein cores * Added has_sve() and set_sve() methods in CpuInfo * Detection code checks for presence of SVE via HWCAP_SVE * Updated the heuristic in sve kernels to check for the absence of Klein * Resolves: COMPMID-4085 Change-Id: I0b8c72ff19dc5a3a81628d121a1afa836e724b4f Signed-off-by: Pablo Marquez Tello Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5257 Reviewed-by: Georgios Pinitas Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins --- src/runtime/CPUUtils.cpp | 65 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 58 insertions(+), 7 deletions(-) (limited to 'src/runtime/CPUUtils.cpp') diff --git a/src/runtime/CPUUtils.cpp b/src/runtime/CPUUtils.cpp index 63c9a8639c..82b42336e6 100644 --- a/src/runtime/CPUUtils.cpp +++ b/src/runtime/CPUUtils.cpp @@ -62,12 +62,27 @@ #define HWCAP_ASIMDDP (1 << 20) // NOLINT #endif /* HWCAP_ASIMDDP */ +#ifndef HWCAP_SVE +#define HWCAP_SVE (1 << 22) // NOLINT +#endif /* HWCAP_SVE */ + namespace { using namespace arm_compute; #if !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) +bool model_supports_sve(CPUModel model) +{ + switch(model) + { + case CPUModel::KLEIN: + return true; + default: + return false; + } +} + bool model_supports_dot(CPUModel model) { switch(model) @@ -75,6 +90,7 @@ bool model_supports_dot(CPUModel model) case CPUModel::GENERIC_FP16_DOT: case CPUModel::A55r1: case CPUModel::X1: + case CPUModel::KLEIN: return true; default: return false; @@ -89,6 +105,7 @@ bool model_supports_fp16(CPUModel model) case CPUModel::GENERIC_FP16_DOT: case CPUModel::A55r1: case CPUModel::X1: + case CPUModel::KLEIN: return true; default: return false; @@ -146,6 +163,9 @@ CPUModel midr_to_model(const unsigned int midr) case 0xd0d: model = CPUModel::GENERIC_FP16_DOT; break; + case 0xd46: + model = CPUModel::KLEIN; + break; default: model = CPUModel::GENERIC; break; @@ -369,11 +389,11 @@ namespace cpu void get_cpu_configuration(CPUInfo &cpuinfo) { #if !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) - bool cpuid = false; - bool hwcaps_fp16_support = false; - bool hwcaps_dot_support = false; - - const uint32_t hwcaps = getauxval(AT_HWCAP); + bool cpuid = false; + bool hwcaps_fp16_support = false; + bool hwcaps_dot_support = false; + bool hwcaps_sve = false; + const uint32_t hwcaps = getauxval(AT_HWCAP); if((hwcaps & HWCAP_CPUID) != 0) { @@ -390,6 +410,11 @@ void get_cpu_configuration(CPUInfo &cpuinfo) { hwcaps_dot_support = true; } + + if((hwcaps & HWCAP_SVE) != 0) + { + hwcaps_sve = true; + } #endif /* defined(__aarch64__) */ const unsigned int max_cpus = get_max_cpus(); @@ -408,17 +433,43 @@ void get_cpu_configuration(CPUInfo &cpuinfo) // We assume that the system does not have mixed architectures bool one_supports_dot = false; bool one_supports_fp16 = false; + bool one_supports_sve = false; for(const auto &v : percpu) { one_supports_dot = one_supports_dot || model_supports_dot(v); one_supports_fp16 = one_supports_fp16 || model_supports_fp16(v); + one_supports_sve = one_supports_sve || model_supports_sve(v); cpuinfo.set_cpu_model(j++, v); } cpuinfo.set_dotprod(one_supports_dot || hwcaps_dot_support); cpuinfo.set_fp16(one_supports_fp16 || hwcaps_fp16_support); -#else /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */ + cpuinfo.set_sve(one_supports_sve || hwcaps_sve); +#elif(BARE_METAL) && defined(__aarch64__) /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */ + cpuinfo.set_cpu_num(1); + const CPUModel cpumodel{ CPUModel::GENERIC }; + cpuinfo.set_cpu_model(0, cpumodel); + // Assume single CPU in bare metal mode. Just read the ID register and feature bits directly. + uint64_t fr0, pfr0, midr; + __asm __volatile( + "MRS %0, ID_AA64ISAR0_EL1\n" + "MRS %1, ID_AA64PFR0_EL1\n" + "MRS %2, midr_el1" + : "=r"(fr0), "=r"(pfr0), "=r"(midr)); + if((fr0 >> 44) & 0xf) + { + cpuinfo.set_dotprod(true); + } + if((pfr0 >> 16) & 0xf) + { + cpuinfo.set_fp16(true); + } + if((pfr0 >> 32) & 0xf) + { + cpuinfo.set_sve(true); + } +#else /* #elif(BARE_METAL) && defined(__aarch64__) */ ARM_COMPUTE_UNUSED(cpuinfo); -#endif /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */ +#endif /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */ } unsigned int get_threads_hint() -- cgit v1.2.1