diff options
author | Pablo Marquez Tello <pablo.tello@arm.com> | 2021-03-08 17:27:05 +0000 |
---|---|---|
committer | Pablo Marquez Tello <pablo.tello@arm.com> | 2021-03-17 12:45:26 +0000 |
commit | a50f19346c5b79e2743f882ce0c691c07076f207 (patch) | |
tree | 40141711eae786bc65738f04baa4e17cd6a20d97 /src/runtime/CPUUtils.cpp | |
parent | d0c9cb808f674ce8bbfbdf0e66c5b8451f6af0f2 (diff) | |
download | ComputeLibrary-a50f19346c5b79e2743f882ce0c691c07076f207.tar.gz |
Updated cpu detection
* Added the case in the cpu detection code for Klein cores
* Added has_sve() and set_sve() methods in CpuInfo
* Detection code checks for presence of SVE via HWCAP_SVE
* Updated the heuristic in sve kernels to check for the absence of Klein
* Resolves: COMPMID-4085
Change-Id: I0b8c72ff19dc5a3a81628d121a1afa836e724b4f
Signed-off-by: Pablo Marquez Tello <pablo.tello@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5257
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/runtime/CPUUtils.cpp')
-rw-r--r-- | src/runtime/CPUUtils.cpp | 65 |
1 files changed, 58 insertions, 7 deletions
diff --git a/src/runtime/CPUUtils.cpp b/src/runtime/CPUUtils.cpp index 63c9a8639c..82b42336e6 100644 --- a/src/runtime/CPUUtils.cpp +++ b/src/runtime/CPUUtils.cpp @@ -62,12 +62,27 @@ #define HWCAP_ASIMDDP (1 << 20) // NOLINT #endif /* HWCAP_ASIMDDP */ +#ifndef HWCAP_SVE +#define HWCAP_SVE (1 << 22) // NOLINT +#endif /* HWCAP_SVE */ + namespace { using namespace arm_compute; #if !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) +bool model_supports_sve(CPUModel model) +{ + switch(model) + { + case CPUModel::KLEIN: + return true; + default: + return false; + } +} + bool model_supports_dot(CPUModel model) { switch(model) @@ -75,6 +90,7 @@ bool model_supports_dot(CPUModel model) case CPUModel::GENERIC_FP16_DOT: case CPUModel::A55r1: case CPUModel::X1: + case CPUModel::KLEIN: return true; default: return false; @@ -89,6 +105,7 @@ bool model_supports_fp16(CPUModel model) case CPUModel::GENERIC_FP16_DOT: case CPUModel::A55r1: case CPUModel::X1: + case CPUModel::KLEIN: return true; default: return false; @@ -146,6 +163,9 @@ CPUModel midr_to_model(const unsigned int midr) case 0xd0d: model = CPUModel::GENERIC_FP16_DOT; break; + case 0xd46: + model = CPUModel::KLEIN; + break; default: model = CPUModel::GENERIC; break; @@ -369,11 +389,11 @@ namespace cpu void get_cpu_configuration(CPUInfo &cpuinfo) { #if !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) - bool cpuid = false; - bool hwcaps_fp16_support = false; - bool hwcaps_dot_support = false; - - const uint32_t hwcaps = getauxval(AT_HWCAP); + bool cpuid = false; + bool hwcaps_fp16_support = false; + bool hwcaps_dot_support = false; + bool hwcaps_sve = false; + const uint32_t hwcaps = getauxval(AT_HWCAP); if((hwcaps & HWCAP_CPUID) != 0) { @@ -390,6 +410,11 @@ void get_cpu_configuration(CPUInfo &cpuinfo) { hwcaps_dot_support = true; } + + if((hwcaps & HWCAP_SVE) != 0) + { + hwcaps_sve = true; + } #endif /* defined(__aarch64__) */ const unsigned int max_cpus = get_max_cpus(); @@ -408,17 +433,43 @@ void get_cpu_configuration(CPUInfo &cpuinfo) // We assume that the system does not have mixed architectures bool one_supports_dot = false; bool one_supports_fp16 = false; + bool one_supports_sve = false; for(const auto &v : percpu) { one_supports_dot = one_supports_dot || model_supports_dot(v); one_supports_fp16 = one_supports_fp16 || model_supports_fp16(v); + one_supports_sve = one_supports_sve || model_supports_sve(v); cpuinfo.set_cpu_model(j++, v); } cpuinfo.set_dotprod(one_supports_dot || hwcaps_dot_support); cpuinfo.set_fp16(one_supports_fp16 || hwcaps_fp16_support); -#else /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */ + cpuinfo.set_sve(one_supports_sve || hwcaps_sve); +#elif(BARE_METAL) && defined(__aarch64__) /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */ + cpuinfo.set_cpu_num(1); + const CPUModel cpumodel{ CPUModel::GENERIC }; + cpuinfo.set_cpu_model(0, cpumodel); + // Assume single CPU in bare metal mode. Just read the ID register and feature bits directly. + uint64_t fr0, pfr0, midr; + __asm __volatile( + "MRS %0, ID_AA64ISAR0_EL1\n" + "MRS %1, ID_AA64PFR0_EL1\n" + "MRS %2, midr_el1" + : "=r"(fr0), "=r"(pfr0), "=r"(midr)); + if((fr0 >> 44) & 0xf) + { + cpuinfo.set_dotprod(true); + } + if((pfr0 >> 16) & 0xf) + { + cpuinfo.set_fp16(true); + } + if((pfr0 >> 32) & 0xf) + { + cpuinfo.set_sve(true); + } +#else /* #elif(BARE_METAL) && defined(__aarch64__) */ ARM_COMPUTE_UNUSED(cpuinfo); -#endif /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */ +#endif /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */ } unsigned int get_threads_hint() |