aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/CPUUtils.cpp
diff options
context:
space:
mode:
authorPablo Marquez Tello <pablo.tello@arm.com>2021-03-08 17:27:05 +0000
committerPablo Marquez Tello <pablo.tello@arm.com>2021-03-17 12:45:26 +0000
commita50f19346c5b79e2743f882ce0c691c07076f207 (patch)
tree40141711eae786bc65738f04baa4e17cd6a20d97 /src/runtime/CPUUtils.cpp
parentd0c9cb808f674ce8bbfbdf0e66c5b8451f6af0f2 (diff)
downloadComputeLibrary-a50f19346c5b79e2743f882ce0c691c07076f207.tar.gz
Updated cpu detection
* Added the case in the cpu detection code for Klein cores * Added has_sve() and set_sve() methods in CpuInfo * Detection code checks for presence of SVE via HWCAP_SVE * Updated the heuristic in sve kernels to check for the absence of Klein * Resolves: COMPMID-4085 Change-Id: I0b8c72ff19dc5a3a81628d121a1afa836e724b4f Signed-off-by: Pablo Marquez Tello <pablo.tello@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5257 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/runtime/CPUUtils.cpp')
-rw-r--r--src/runtime/CPUUtils.cpp65
1 files changed, 58 insertions, 7 deletions
diff --git a/src/runtime/CPUUtils.cpp b/src/runtime/CPUUtils.cpp
index 63c9a8639c..82b42336e6 100644
--- a/src/runtime/CPUUtils.cpp
+++ b/src/runtime/CPUUtils.cpp
@@ -62,12 +62,27 @@
#define HWCAP_ASIMDDP (1 << 20) // NOLINT
#endif /* HWCAP_ASIMDDP */
+#ifndef HWCAP_SVE
+#define HWCAP_SVE (1 << 22) // NOLINT
+#endif /* HWCAP_SVE */
+
namespace
{
using namespace arm_compute;
#if !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__))
+bool model_supports_sve(CPUModel model)
+{
+ switch(model)
+ {
+ case CPUModel::KLEIN:
+ return true;
+ default:
+ return false;
+ }
+}
+
bool model_supports_dot(CPUModel model)
{
switch(model)
@@ -75,6 +90,7 @@ bool model_supports_dot(CPUModel model)
case CPUModel::GENERIC_FP16_DOT:
case CPUModel::A55r1:
case CPUModel::X1:
+ case CPUModel::KLEIN:
return true;
default:
return false;
@@ -89,6 +105,7 @@ bool model_supports_fp16(CPUModel model)
case CPUModel::GENERIC_FP16_DOT:
case CPUModel::A55r1:
case CPUModel::X1:
+ case CPUModel::KLEIN:
return true;
default:
return false;
@@ -146,6 +163,9 @@ CPUModel midr_to_model(const unsigned int midr)
case 0xd0d:
model = CPUModel::GENERIC_FP16_DOT;
break;
+ case 0xd46:
+ model = CPUModel::KLEIN;
+ break;
default:
model = CPUModel::GENERIC;
break;
@@ -369,11 +389,11 @@ namespace cpu
void get_cpu_configuration(CPUInfo &cpuinfo)
{
#if !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__))
- bool cpuid = false;
- bool hwcaps_fp16_support = false;
- bool hwcaps_dot_support = false;
-
- const uint32_t hwcaps = getauxval(AT_HWCAP);
+ bool cpuid = false;
+ bool hwcaps_fp16_support = false;
+ bool hwcaps_dot_support = false;
+ bool hwcaps_sve = false;
+ const uint32_t hwcaps = getauxval(AT_HWCAP);
if((hwcaps & HWCAP_CPUID) != 0)
{
@@ -390,6 +410,11 @@ void get_cpu_configuration(CPUInfo &cpuinfo)
{
hwcaps_dot_support = true;
}
+
+ if((hwcaps & HWCAP_SVE) != 0)
+ {
+ hwcaps_sve = true;
+ }
#endif /* defined(__aarch64__) */
const unsigned int max_cpus = get_max_cpus();
@@ -408,17 +433,43 @@ void get_cpu_configuration(CPUInfo &cpuinfo)
// We assume that the system does not have mixed architectures
bool one_supports_dot = false;
bool one_supports_fp16 = false;
+ bool one_supports_sve = false;
for(const auto &v : percpu)
{
one_supports_dot = one_supports_dot || model_supports_dot(v);
one_supports_fp16 = one_supports_fp16 || model_supports_fp16(v);
+ one_supports_sve = one_supports_sve || model_supports_sve(v);
cpuinfo.set_cpu_model(j++, v);
}
cpuinfo.set_dotprod(one_supports_dot || hwcaps_dot_support);
cpuinfo.set_fp16(one_supports_fp16 || hwcaps_fp16_support);
-#else /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */
+ cpuinfo.set_sve(one_supports_sve || hwcaps_sve);
+#elif(BARE_METAL) && defined(__aarch64__) /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */
+ cpuinfo.set_cpu_num(1);
+ const CPUModel cpumodel{ CPUModel::GENERIC };
+ cpuinfo.set_cpu_model(0, cpumodel);
+ // Assume single CPU in bare metal mode. Just read the ID register and feature bits directly.
+ uint64_t fr0, pfr0, midr;
+ __asm __volatile(
+ "MRS %0, ID_AA64ISAR0_EL1\n"
+ "MRS %1, ID_AA64PFR0_EL1\n"
+ "MRS %2, midr_el1"
+ : "=r"(fr0), "=r"(pfr0), "=r"(midr));
+ if((fr0 >> 44) & 0xf)
+ {
+ cpuinfo.set_dotprod(true);
+ }
+ if((pfr0 >> 16) & 0xf)
+ {
+ cpuinfo.set_fp16(true);
+ }
+ if((pfr0 >> 32) & 0xf)
+ {
+ cpuinfo.set_sve(true);
+ }
+#else /* #elif(BARE_METAL) && defined(__aarch64__) */
ARM_COMPUTE_UNUSED(cpuinfo);
-#endif /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */
+#endif /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */
}
unsigned int get_threads_hint()