aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/arm_gemm/utils.hpp
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2021-06-07 14:23:57 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2021-06-23 12:25:50 +0000
commit20fca524baf99402f742ce38c538f2fd07d5fff9 (patch)
treeb63d98383d1ba22bb3ca59d393e4ab9d47a9c762 /src/core/NEON/kernels/arm_gemm/utils.hpp
parent1d359279e22874121def2ce4bfdb633d94ea5ade (diff)
downloadComputeLibrary-20fca524baf99402f742ce38c538f2fd07d5fff9.tar.gz
Create core library using high priority operators
A smaller core library is created using a subset of the operators. Changed the structure of filelist.json in order to include more information about the kernels and make the selection easier. Resolves: COMPMID-4514 Change-Id: I079ca7d8e64346174eebdd13b834e1dd4dc36ca2 Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5786 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/arm_gemm/utils.hpp')
-rw-r--r--src/core/NEON/kernels/arm_gemm/utils.hpp50
1 files changed, 17 insertions, 33 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/utils.hpp b/src/core/NEON/kernels/arm_gemm/utils.hpp
index 1269ef62a6..e648ce2fb5 100644
--- a/src/core/NEON/kernels/arm_gemm/utils.hpp
+++ b/src/core/NEON/kernels/arm_gemm/utils.hpp
@@ -141,52 +141,36 @@ struct IndirectInputArg {
};
namespace utils {
-namespace {
-
-#ifdef __ARM_FEATURE_SVE
-template<size_t sz>
-inline unsigned long get_vector_length_sz() {
- unsigned long v;
-
- __asm (
- "cntb %0"
- : "=r" (v)
- );
-
- return v / sz;
-}
-
-#define VEC_LEN_SPEC(sz, opcode) template <> inline unsigned long get_vector_length_sz<sz>() { unsigned long v; __asm ( opcode " %0" : "=r" (v)); return v; }
-
-VEC_LEN_SPEC(8, "cntd")
-VEC_LEN_SPEC(4, "cntw")
-VEC_LEN_SPEC(2, "cnth")
-VEC_LEN_SPEC(1, "cntb")
-#endif
-
-} // anonymous namespace
-
template <typename T>
inline unsigned long get_vector_length() {
-#ifdef __ARM_FEATURE_SVE
- return get_vector_length_sz<sizeof(T)>();
-#else
+#if defined(ARM_COMPUTE_ENABLE_SVE)
+ uint64_t vl;
+
+ __asm __volatile (
+ ".inst 0x0420e3e0\n" // CNTB X0, ALL, MUL #1
+ "mov %0, X0\n"
+ : "=r" (vl)
+ :
+ : "x0"
+ );
+
+ return vl / sizeof(T);
+#else // !defined(ARM_COMPUTE_ENABLE_SVE)
return 16 / sizeof(T);
-#endif
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
}
template <typename T>
inline unsigned long get_vector_length(VLType vl_type) {
switch (vl_type) {
-#ifdef __ARM_FEATURE_SVE
+#if defined(ARM_COMPUTE_ENABLE_SVE)
case VLType::SVE:
- return get_vector_length_sz<sizeof(T)>();
-#endif
+ return get_vector_length<T>();
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
default:
return 16 / sizeof(T);
}
}
-
} // utils namespace
} // arm_gemm namespace