aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPablo Marquez Tello <pablo.tello@arm.com>2021-03-08 17:27:05 +0000
committerPablo Marquez Tello <pablo.tello@arm.com>2021-03-17 12:45:26 +0000
commita50f19346c5b79e2743f882ce0c691c07076f207 (patch)
tree40141711eae786bc65738f04baa4e17cd6a20d97
parentd0c9cb808f674ce8bbfbdf0e66c5b8451f6af0f2 (diff)
downloadComputeLibrary-a50f19346c5b79e2743f882ce0c691c07076f207.tar.gz
Updated cpu detection
* Added the case in the cpu detection code for Klein cores * Added has_sve() and set_sve() methods in CpuInfo * Detection code checks for presence of SVE via HWCAP_SVE * Updated the heuristic in sve kernels to check for the absence of Klein * Resolves: COMPMID-4085 Change-Id: I0b8c72ff19dc5a3a81628d121a1afa836e724b4f Signed-off-by: Pablo Marquez Tello <pablo.tello@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5257 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/core/CPP/CPPTypes.h18
-rwxr-xr-xscripts/clang_tidy_rules.py1
-rw-r--r--src/core/CPP/CPPTypes.cpp11
-rw-r--r--src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp12
-rw-r--r--src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp8
-rw-r--r--src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp24
-rw-r--r--src/core/NEON/kernels/arm_gemm/gemm_int8.cpp16
-rw-r--r--src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp24
-rw-r--r--src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp22
-rw-r--r--src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp16
-rw-r--r--src/runtime/CPUUtils.cpp65
11 files changed, 148 insertions, 69 deletions
diff --git a/arm_compute/core/CPP/CPPTypes.h b/arm_compute/core/CPP/CPPTypes.h
index fd6bfc3907..2de73acaa2 100644
--- a/arm_compute/core/CPP/CPPTypes.h
+++ b/arm_compute/core/CPP/CPPTypes.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -45,6 +45,7 @@ enum class CPUModel
A53,
A55r0,
A55r1,
+ KLEIN,
X1,
A73
};
@@ -76,6 +77,10 @@ inline std::string cpu_model_to_string(CPUModel val)
{
return std::string("GENERIC");
}
+ case CPUModel::KLEIN:
+ {
+ return std::string("KLEIN");
+ }
case CPUModel::GENERIC_FP16:
{
return std::string("GENERIC_FP16");
@@ -136,6 +141,11 @@ public:
* @return true of the cpu supports dot product, false otherwise
*/
bool has_dotprod() const;
+ /** Checks if the cpu model supports sve.
+ *
+ * @return true of the cpu supports sve, false otherwise
+ */
+ bool has_sve() const;
/** Gets the cpu model for a given cpuid.
*
* @param[in] cpuid the id of the cpu core to be retrieved,
@@ -178,6 +188,11 @@ public:
* @param[in] dotprod whether the cpu supports dot product.
*/
void set_dotprod(const bool dotprod);
+ /** Set sve support
+ *
+ * @param[in] sve whether the cpu supports sve.
+ */
+ void set_sve(const bool sve);
/** Set the cpumodel for a given cpu core
*
* @param[in] cpuid the id of the core to be set.
@@ -200,6 +215,7 @@ private:
std::vector<CPUModel> _percpu = {};
bool _fp16 = false;
bool _dotprod = false;
+ bool _sve = false;
unsigned int _L1_cache_size = 32768;
unsigned int _L2_cache_size = 262144;
};
diff --git a/scripts/clang_tidy_rules.py b/scripts/clang_tidy_rules.py
index 0ccf0b2910..2e72c824f0 100755
--- a/scripts/clang_tidy_rules.py
+++ b/scripts/clang_tidy_rules.py
@@ -66,6 +66,7 @@ def filter_clang_tidy_lines( lines ):
("Utils.h" in line and "no member named 'unmap' in 'arm_compute::Tensor'" in line) or
("Utils.h" in line and "no member named 'map' in 'arm_compute::Tensor'" in line) or
("CPUUtils.cpp" in line and "'asm/hwcap.h' file not found" in line) or
+ ("CPUUtils.cpp" in line and "use of undeclared identifier 'HWCAP_SVE'" in line) or
("'arm_compute_version.embed' file not found" in line) ):
print_context=False
continue
diff --git a/src/core/CPP/CPPTypes.cpp b/src/core/CPP/CPPTypes.cpp
index 139e106ca6..0850df29fd 100644
--- a/src/core/CPP/CPPTypes.cpp
+++ b/src/core/CPP/CPPTypes.cpp
@@ -42,6 +42,11 @@ void CPUInfo::set_dotprod(const bool dotprod)
_dotprod = dotprod;
}
+void CPUInfo::set_sve(const bool sve)
+{
+ _sve = sve;
+}
+
void CPUInfo::set_cpu_model(unsigned int cpuid, CPUModel model)
{
ARM_COMPUTE_ERROR_ON(cpuid >= _percpu.size());
@@ -55,6 +60,12 @@ unsigned int CPUInfo::get_cpu_num() const
{
return _percpu.size();
}
+
+bool CPUInfo::has_sve() const
+{
+ return _sve;
+}
+
bool CPUInfo::has_fp16() const
{
return _fp16;
diff --git a/src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp b/src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp
index 96b9734221..d8134c4bb5 100644
--- a/src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp
+++ b/src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp
@@ -49,22 +49,22 @@ static const GemmImplementation<bfloat16, float> gemm_bf16_methods[] =
{ // gemm_bf16_interleaved
GemmMethod::GEMM_INTERLEAVED,
"sve_interleaved_bf16fp32_mmla_8x3VL",
- [](const GemmArgs &args) { return (args._Ksize>4); },
- nullptr,
+ [](const GemmArgs &args) { return args._ci->has_sve() && (args._Ksize>4); },
+ [](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args) { return new GemmInterleaved<cls_sve_interleaved_bf16fp32_mmla_8x3VL, bfloat16, float>(args); }
},
{
GemmMethod::GEMM_HYBRID,
"sve_hybrid_bf16fp32_dot_6x4VL",
- nullptr,
- [](const GemmArgs &args) { return ((args._Ksize <= 128) && (args._Nsize <= 128)); },
+ [](const GemmArgs &args) { return args._ci->has_sve(); },
+ [](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN && ((args._Ksize <= 128) && (args._Nsize <= 128)); },
[](const GemmArgs &args) { return new GemmHybridIndirect<cls_sve_hybrid_bf16fp32_dot_6x4VL, bfloat16, float>(args); }
},
{ // gemm_bf16_interleaved
GemmMethod::GEMM_INTERLEAVED,
"sve_interleaved_bf16fp32_dot_8x3VL",
- [](const GemmArgs &args) { return (args._Ksize>2); },
- nullptr,
+ [](const GemmArgs &args) { return args._ci->has_sve() && (args._Ksize>2); },
+ [](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args) { return new GemmInterleaved<cls_sve_interleaved_bf16fp32_dot_8x3VL, bfloat16, float>(args); }
},
# endif // SVE
diff --git a/src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp b/src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp
index 93563a63d0..8e355c8f2c 100644
--- a/src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp
+++ b/src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp
@@ -47,15 +47,15 @@ static const GemmImplementation<__fp16, __fp16> gemm_fp16_methods[] = {
{
GemmMethod::GEMM_HYBRID,
"sve_hybrid_fp16_mla_6x4VL",
- nullptr,
- [](const GemmArgs &args) { return ((args._Ksize <= 256) && (args._Nsize <= 256)) || ((args._nmulti > 1) && ((args._Msize / args._maxthreads) < 8)); },
+ [](const GemmArgs &args) { return args._ci->has_sve(); },
+ [](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN && (((args._Ksize <= 256) && (args._Nsize <= 256)) || ((args._nmulti > 1) && ((args._Msize / args._maxthreads) < 8))); },
[](const GemmArgs &args) { return new GemmHybridIndirect<cls_a64_hybrid_fp16_mla_6x32, __fp16, __fp16>(args); }
},
{
GemmMethod::GEMM_INTERLEAVED,
"sve_interleaved_fp16_mla_8x3VL",
- [](const GemmArgs &args) { return (args._Ksize > 4); },
- nullptr,
+ [](const GemmArgs &args) { return args._ci->has_sve() && (args._Ksize > 4); },
+ [](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args) { return new GemmInterleaved<cls_sve_interleaved_fp16_mla_8x3VL, __fp16, __fp16>(args); }
},
#endif
diff --git a/src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp b/src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp
index b0e912d188..5c894c01c8 100644
--- a/src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp
+++ b/src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp
@@ -62,8 +62,8 @@ static const GemmImplementation<float, float> gemm_fp32_methods[] =
{
GemmMethod::GEMM_HYBRID,
"sve_gemv_fp32_mla_8VL",
- [](const GemmArgs &args) { return args._Msize==1 && args._nbatches==1 && !args._indirect_input; },
- nullptr,
+ [](const GemmArgs &args) { return args._ci->has_sve() && args._Msize==1 && args._nbatches==1 && !args._indirect_input; },
+ [](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args) { return new GemvPretransposed<cls_sve_gemv_fp32_mla_8VL, float, float>(args); }
},
#endif
@@ -80,8 +80,8 @@ static const GemmImplementation<float, float> gemm_fp32_methods[] =
{
GemmMethod::GEMM_INTERLEAVED,
"sve_interleaved_fp32_mmla_8x3VL",
- [](const GemmArgs &args) { return (args._Ksize>4); },
- nullptr,
+ [](const GemmArgs &args) { return args._ci->has_sve() && (args._Ksize>4); },
+ [](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args) { return new GemmInterleaved<cls_sve_interleaved_fp32_mmla_8x3VL, float, float>(args); }
},
#endif // __ARM_FEATURE_SVE && MMLA_FP32
@@ -91,22 +91,22 @@ static const GemmImplementation<float, float> gemm_fp32_methods[] =
{
GemmMethod::GEMM_HYBRID,
"sve_smallK_hybrid_fp32_mla_8x1VL",
- [](const GemmArgs &args) { return args._Ksize <= 24 && !args._indirect_input; },
- nullptr,
+ [](const GemmArgs &args) { return args._ci->has_sve() && args._Ksize <= 24 && !args._indirect_input; },
+ [](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args) { return new GemmHybrid<cls_sve_smallK_hybrid_fp32_mla_8x1VL, float, float>(args); }
},
{
GemmMethod::GEMM_HYBRID,
"sve_hybrid_fp32_mla_8x1VL",
- nullptr,
- [](const GemmArgs &args) { return (args._Nsize < 12); },
+ [](const GemmArgs &args) { return args._ci->has_sve(); },
+ [](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN && (args._Nsize < 12); },
[](const GemmArgs &args) { return new GemmHybridIndirect<cls_sve_hybrid_fp32_mla_8x1VL, float, float>(args); }
},
{
GemmMethod::GEMM_HYBRID,
"sve_hybrid_fp32_mla_6x4VL",
- nullptr,
- [](const GemmArgs &args) { return ((args._Ksize <= 256) && (args._Nsize <= 256)) || ((args._nmulti > 1) && ((args._Msize / args._maxthreads) < 8)); },
+ [](const GemmArgs &args) { return args._ci->has_sve(); },
+ [](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN && (((args._Ksize <= 256) && (args._Nsize <= 256)) || ((args._nmulti > 1) && ((args._Msize / args._maxthreads) < 8))); },
[](const GemmArgs &args) { return new GemmHybridIndirect<cls_sve_hybrid_fp32_mla_6x4VL, float, float>(args); }
},
#endif // __ARM_FEATURE_SVE
@@ -144,8 +144,8 @@ GemmImplementation<float, float>::with_estimate(
{
GemmMethod::GEMM_INTERLEAVED,
"sve_interleaved_fp32_mla_8x3VL",
- [](const GemmArgs &args) { return (args._Ksize>4); },
- nullptr,
+ [](const GemmArgs &args) { return args._ci->has_sve() && (args._Ksize>4); },
+ [](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args) { return new GemmInterleaved<cls_sve_interleaved_fp32_mla_8x3VL, float, float>(args); }
},
#endif // __ARM_FEATURE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/gemm_int8.cpp b/src/core/NEON/kernels/arm_gemm/gemm_int8.cpp
index f081558c40..60cf82f9c6 100644
--- a/src/core/NEON/kernels/arm_gemm/gemm_int8.cpp
+++ b/src/core/NEON/kernels/arm_gemm/gemm_int8.cpp
@@ -51,30 +51,30 @@ static const GemmImplementation<int8_t, int32_t> gemm_s8_methods[] = {
{
GemmMethod::GEMM_INTERLEAVED,
"sve_interleaved_s8s32_mmla_8x3VL",
- [](const GemmArgs &args) { return (args._Ksize>8); },
- nullptr,
+ [](const GemmArgs &args) { return args._ci->has_sve() && (args._Ksize>8); },
+ [](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args) { return new GemmInterleaved<cls_sve_interleaved_s8s32_mmla_8x3VL, int8_t, int32_t>(args); }
},
#endif
{
GemmMethod::GEMM_HYBRID,
"sve_smallK_hybrid_s8s32_dot_8x1VL",
- [](const GemmArgs &args) { return args._Ksize<=64 && !args._indirect_input; },
- nullptr,
+ [](const GemmArgs &args) { return args._ci->has_sve() && args._Ksize<=64 && !args._indirect_input; },
+ [](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args) { return new GemmHybrid<cls_sve_smallK_hybrid_s8s32_dot_8x1VL, int8_t, int32_t>(args); }
},
{
GemmMethod::GEMM_HYBRID,
"sve_hybrid_s8s32_dot_6x4VL",
- [](const GemmArgs &args) { return args._Ksize>=16; },
- [](const GemmArgs &args) { return ((args._Ksize <= 128) && (args._Nsize <= 128)) || ((args._nmulti > 1) && ((args._Msize / args._maxthreads) < 8)); },
+ [](const GemmArgs &args) { return args._ci->has_sve() && args._Ksize>=16; },
+ [](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN && (((args._Ksize <= 128) && (args._Nsize <= 128)) || ((args._nmulti > 1) && ((args._Msize / args._maxthreads) < 8))); },
[](const GemmArgs &args) { return new GemmHybridIndirect<cls_sve_hybrid_s8s32_dot_6x4VL, int8_t, int32_t>(args); }
},
{
GemmMethod::GEMM_INTERLEAVED,
"sve_interleaved_s8s32_dot_8x3VL",
- [](const GemmArgs &args) { return (args._Ksize>4); },
- nullptr,
+ [](const GemmArgs &args) { return args._ci->has_sve() && (args._Ksize>4); },
+ [](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args) { return new GemmInterleaved<cls_sve_interleaved_s8s32_dot_8x3VL, int8_t, int32_t>(args); }
},
#endif // SVE
diff --git a/src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp b/src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp
index d3a55eba6b..094b6fdff4 100644
--- a/src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp
+++ b/src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp
@@ -58,46 +58,46 @@ static const GemmImplementation<int8_t, int8_t, Requantize32> gemm_qint8_methods
{
GemmMethod::GEMM_INTERLEAVED,
"sve_interleaved_s8s32_mmla_8x3VL",
- [](const GemmArgs &args, const Requantize32 &) { return (args._Ksize>8); },
- nullptr,
+ [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_sve() && (args._Ksize>8); },
+ [](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized<cls_sve_interleaved_s8s32_mmla_8x3VL, int8_t, int8_t>(args, qp); }
},
#endif
{
GemmMethod::GEMM_HYBRID_QUANTIZED,
"sve_smallK_hybrid_s8s32_dot_8x1VL",
- [](const GemmArgs &args, const Requantize32 &) { return args._Ksize<=64 && !args._indirect_input; },
- nullptr,
+ [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_sve() && args._Ksize<=64 && !args._indirect_input; },
+ [](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridQuantized<cls_sve_smallK_hybrid_s8s32_dot_8x1VL, int8_t, int8_t>(args, qp); }
},
#ifdef SVE2
{
GemmMethod::GEMM_HYBRID,
"sve_hybrid_s8qs_dot_6x4VL",
- [](const GemmArgs &, const Requantize32 &qp) { return quant_hybrid_symmetric(qp); },
- nullptr,
+ [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sve() && quant_hybrid_symmetric(qp); },
+ [](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_sve_hybrid_s8qs_dot_6x4VL, int8_t, int8_t, Requantize32>(args, qp); }
},
{
GemmMethod::GEMM_HYBRID,
"sve_hybrid_s8qa_dot_4x4VL",
- [](const GemmArgs &, const Requantize32 &qp) { return quant_hybrid_asymmetric(qp); },
- nullptr,
+ [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sve() && quant_hybrid_asymmetric(qp); },
+ [](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_sve_hybrid_s8qa_dot_4x4VL, int8_t, int8_t, Requantize32>(args, qp); }
},
#endif
{
GemmMethod::GEMM_HYBRID,
"sve_hybrid_s8s32_dot_6x4VL",
- nullptr,
- nullptr,
+ [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_sve(); },
+ [](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_sve_hybrid_s8s32_dot_6x4VL, int8_t, int8_t, Requantize32, true>(args, qp); }
},
{
GemmMethod::GEMM_INTERLEAVED,
"sve_interleaved_s8s32_dot_8x3VL",
- [](const GemmArgs &args, const Requantize32 &) { return (args._Ksize>4); },
- nullptr,
+ [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_sve() && (args._Ksize>4); },
+ [](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized<cls_sve_interleaved_s8s32_dot_8x3VL, int8_t, int8_t>(args, qp); }
},
#endif // SVE
diff --git a/src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp b/src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp
index 9720c7d06e..be27b3a117 100644
--- a/src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp
+++ b/src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp
@@ -55,39 +55,39 @@ static const GemmImplementation<uint8_t, uint8_t, Requantize32> gemm_quint8_meth
{
GemmMethod::GEMM_INTERLEAVED,
"sve_interleaved_u8u32_mmla_8x3VL",
- [](const GemmArgs &args, const Requantize32 &) { return (args._Ksize>8); },
- nullptr,
+ [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_sve() && (args._Ksize>8); },
+ [](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized<cls_sve_interleaved_u8u32_mmla_8x3VL, uint8_t, uint8_t>(args, qp); }
},
#endif
{
GemmMethod::GEMM_HYBRID_QUANTIZED,
"sve_smallK_hybrid_u8u32_dot_8x1VL",
- [](const GemmArgs &args, const Requantize32 &) { return args._Ksize<=64 && !args._indirect_input; },
- nullptr,
+ [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_sve() && args._Ksize<=64 && !args._indirect_input; },
+ [](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridQuantized<cls_sve_smallK_hybrid_u8u32_dot_8x1VL, uint8_t, uint8_t>(args, qp); }
},
#ifdef SVE2 // Requantizing kernels include some SVE2 only instructions (SQRDMULH, SRSHL)
{
GemmMethod::GEMM_HYBRID,
"sve_hybrid_u8qa_dot_4x4VL",
- [](const GemmArgs &, const Requantize32 &qp) { return quant_hybrid_asymmetric(qp); },
- nullptr,
+ [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sve() && quant_hybrid_asymmetric(qp); },
+ [](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_sve_hybrid_u8qa_dot_4x4VL, uint8_t, uint8_t, Requantize32>(args, qp); }
},
#endif
{
GemmMethod::GEMM_HYBRID,
"sve_hybrid_u8u32_dot_6x4VL",
- nullptr,
- nullptr,
+ [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_sve(); },
+ [](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_sve_hybrid_u8u32_dot_6x4VL, uint8_t, uint8_t, Requantize32, true>(args, qp); }
},
{
GemmMethod::GEMM_INTERLEAVED,
"sve_interleaved_u8u32_dot_8x3VL",
- [](const GemmArgs &args, const Requantize32 &) { return (args._Ksize>4); },
- nullptr,
+ [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_sve() && (args._Ksize>4); },
+ [](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized<cls_sve_interleaved_u8u32_dot_8x3VL, uint8_t, uint8_t>(args, qp); }
},
#endif
@@ -96,7 +96,7 @@ static const GemmImplementation<uint8_t, uint8_t, Requantize32> gemm_quint8_meth
GemmMethod::GEMM_INTERLEAVED,
"a64_interleaved_u8u32_mmla_8x12",
[](const GemmArgs &args, const Requantize32 &) { return (args._Ksize>8); },
- nullptr,
+ [](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized<cls_a64_interleaved_u8u32_mmla_8x12, uint8_t, uint8_t>(args, qp); }
},
#endif
diff --git a/src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp b/src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp
index d2ebe00f3b..fb41a9fc09 100644
--- a/src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp
+++ b/src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp
@@ -51,30 +51,30 @@ static const GemmImplementation<uint8_t, uint32_t> gemm_u8_methods[] = {
{
GemmMethod::GEMM_INTERLEAVED,
"sve_interleaved_u8u32_mmla_8x3VL",
- [](const GemmArgs &args) { return (args._Ksize>8); },
- nullptr,
+ [](const GemmArgs &args) { args._ci->has_sve() && return (args._Ksize>8); },
+ [](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args) { return new GemmInterleaved<cls_sve_interleaved_u8u32_mmla_8x3VL, uint8_t, uint32_t>(args); }
},
#endif
{
GemmMethod::GEMM_HYBRID,
"smallK_hybrid_u8u32_dot_8x1VL",
- [](const GemmArgs &args) { return args._Ksize<=64 && !args._indirect_input; },
- nullptr,
+ [](const GemmArgs &args) { return args._ci->has_sve() && args._Ksize<=64 && !args._indirect_input; },
+ [](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args) { return new GemmHybrid<cls_sve_smallK_hybrid_u8u32_dot_8x1VL, uint8_t, uint32_t>(args); }
},
{
GemmMethod::GEMM_HYBRID,
"sve_hybrid_u8u32_dot_6x4VL",
- nullptr,
- [](const GemmArgs &args) { return ((args._Ksize <= 128) && (args._Nsize <= 128)) || ((args._nmulti > 1) && ((args._Msize / args._maxthreads) < 8)); },
+ [](const GemmArgs &args) { return args._ci->has_sve(); },
+ [](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN && (((args._Ksize <= 128) && (args._Nsize <= 128)) || ((args._nmulti > 1) && ((args._Msize / args._maxthreads) < 8))); },
[](const GemmArgs &args) { return new GemmHybridIndirect<cls_sve_hybrid_u8u32_dot_6x4VL, uint8_t, uint32_t>(args); }
},
{
GemmMethod::GEMM_INTERLEAVED,
"sve_interleaved_u8u32_dot_8x3VL",
- [](const GemmArgs &args) { return (args._Ksize>4); },
- nullptr,
+ [](const GemmArgs &args) { return args._ci->has_sve() && (args._Ksize>4); },
+ [](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args) { return new GemmInterleaved<cls_sve_interleaved_u8u32_dot_8x3VL, uint8_t, uint32_t>(args); }
},
#endif
diff --git a/src/runtime/CPUUtils.cpp b/src/runtime/CPUUtils.cpp
index 63c9a8639c..82b42336e6 100644
--- a/src/runtime/CPUUtils.cpp
+++ b/src/runtime/CPUUtils.cpp
@@ -62,12 +62,27 @@
#define HWCAP_ASIMDDP (1 << 20) // NOLINT
#endif /* HWCAP_ASIMDDP */
+#ifndef HWCAP_SVE
+#define HWCAP_SVE (1 << 22) // NOLINT
+#endif /* HWCAP_SVE */
+
namespace
{
using namespace arm_compute;
#if !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__))
+bool model_supports_sve(CPUModel model)
+{
+ switch(model)
+ {
+ case CPUModel::KLEIN:
+ return true;
+ default:
+ return false;
+ }
+}
+
bool model_supports_dot(CPUModel model)
{
switch(model)
@@ -75,6 +90,7 @@ bool model_supports_dot(CPUModel model)
case CPUModel::GENERIC_FP16_DOT:
case CPUModel::A55r1:
case CPUModel::X1:
+ case CPUModel::KLEIN:
return true;
default:
return false;
@@ -89,6 +105,7 @@ bool model_supports_fp16(CPUModel model)
case CPUModel::GENERIC_FP16_DOT:
case CPUModel::A55r1:
case CPUModel::X1:
+ case CPUModel::KLEIN:
return true;
default:
return false;
@@ -146,6 +163,9 @@ CPUModel midr_to_model(const unsigned int midr)
case 0xd0d:
model = CPUModel::GENERIC_FP16_DOT;
break;
+ case 0xd46:
+ model = CPUModel::KLEIN;
+ break;
default:
model = CPUModel::GENERIC;
break;
@@ -369,11 +389,11 @@ namespace cpu
void get_cpu_configuration(CPUInfo &cpuinfo)
{
#if !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__))
- bool cpuid = false;
- bool hwcaps_fp16_support = false;
- bool hwcaps_dot_support = false;
-
- const uint32_t hwcaps = getauxval(AT_HWCAP);
+ bool cpuid = false;
+ bool hwcaps_fp16_support = false;
+ bool hwcaps_dot_support = false;
+ bool hwcaps_sve = false;
+ const uint32_t hwcaps = getauxval(AT_HWCAP);
if((hwcaps & HWCAP_CPUID) != 0)
{
@@ -390,6 +410,11 @@ void get_cpu_configuration(CPUInfo &cpuinfo)
{
hwcaps_dot_support = true;
}
+
+ if((hwcaps & HWCAP_SVE) != 0)
+ {
+ hwcaps_sve = true;
+ }
#endif /* defined(__aarch64__) */
const unsigned int max_cpus = get_max_cpus();
@@ -408,17 +433,43 @@ void get_cpu_configuration(CPUInfo &cpuinfo)
// We assume that the system does not have mixed architectures
bool one_supports_dot = false;
bool one_supports_fp16 = false;
+ bool one_supports_sve = false;
for(const auto &v : percpu)
{
one_supports_dot = one_supports_dot || model_supports_dot(v);
one_supports_fp16 = one_supports_fp16 || model_supports_fp16(v);
+ one_supports_sve = one_supports_sve || model_supports_sve(v);
cpuinfo.set_cpu_model(j++, v);
}
cpuinfo.set_dotprod(one_supports_dot || hwcaps_dot_support);
cpuinfo.set_fp16(one_supports_fp16 || hwcaps_fp16_support);
-#else /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */
+ cpuinfo.set_sve(one_supports_sve || hwcaps_sve);
+#elif(BARE_METAL) && defined(__aarch64__) /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */
+ cpuinfo.set_cpu_num(1);
+ const CPUModel cpumodel{ CPUModel::GENERIC };
+ cpuinfo.set_cpu_model(0, cpumodel);
+ // Assume single CPU in bare metal mode. Just read the ID register and feature bits directly.
+ uint64_t fr0, pfr0, midr;
+ __asm __volatile(
+ "MRS %0, ID_AA64ISAR0_EL1\n"
+ "MRS %1, ID_AA64PFR0_EL1\n"
+ "MRS %2, midr_el1"
+ : "=r"(fr0), "=r"(pfr0), "=r"(midr));
+ if((fr0 >> 44) & 0xf)
+ {
+ cpuinfo.set_dotprod(true);
+ }
+ if((pfr0 >> 16) & 0xf)
+ {
+ cpuinfo.set_fp16(true);
+ }
+ if((pfr0 >> 32) & 0xf)
+ {
+ cpuinfo.set_sve(true);
+ }
+#else /* #elif(BARE_METAL) && defined(__aarch64__) */
ARM_COMPUTE_UNUSED(cpuinfo);
-#endif /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */
+#endif /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */
}
unsigned int get_threads_hint()