From 8bd25568aac3f6e6ef5e878666daf7b5cc174510 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Thu, 22 Jul 2021 11:56:32 +0100 Subject: Fix vector_length identification mechanism for SVE Signed-off-by: Georgios Pinitas Change-Id: I1196a5e3357a616f76b0ab1e92b15544b3c01247 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5976 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- src/core/NEON/kernels/arm_gemm/utils.hpp | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/core/NEON/kernels/arm_gemm/utils.hpp b/src/core/NEON/kernels/arm_gemm/utils.hpp index 4ba03da6e7..82464d2eff 100644 --- a/src/core/NEON/kernels/arm_gemm/utils.hpp +++ b/src/core/NEON/kernels/arm_gemm/utils.hpp @@ -164,9 +164,15 @@ struct IndirectInputArg { }; namespace utils { + +// get_vector_length(): Returns SVE vector length for type "T". +// +// It is required that this can be compiled by a compiler in non-SVE mode, but it must be prevented from running (at +// runtime) if SVE is not enabled. Typically this is used by switchyard/driver code which is built in normal mode +// which then calls SVE kernels (compiled accordingly) iff SVE is detected at runtime. template inline unsigned long get_vector_length() { -#if defined(ARM_COMPUTE_ENABLE_SVE) +#if defined(__aarch64__) uint64_t vl; __asm __volatile ( @@ -178,18 +184,20 @@ inline unsigned long get_vector_length() { ); return vl / sizeof(T); -#else // !defined(ARM_COMPUTE_ENABLE_SVE) +#else // !defined(__aarch64__) return 16 / sizeof(T); -#endif // defined(ARM_COMPUTE_ENABLE_SVE) +#endif // defined(__aarch64__) } +// get_vector_length(VLType): Returns vector length for type "T". +// +// This has the same requirements and constraints as the SVE-only form above, so we call into that code for SVE. + template inline unsigned long get_vector_length(VLType vl_type) { switch (vl_type) { -#if defined(ARM_COMPUTE_ENABLE_SVE) case VLType::SVE: return get_vector_length(); -#endif // defined(ARM_COMPUTE_ENABLE_SVE) default: return 16 / sizeof(T); } -- cgit v1.2.1