aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/arm_gemm/utils.hpp
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2019-01-09 18:35:17 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2019-01-18 13:41:40 +0000
commit7cd26d4a1b14bc4bf7c61496803416ab3d84791f (patch)
tree12cc4a27d7ecebc69a43e96b1f46c7eb05437978 /src/core/NEON/kernels/arm_gemm/utils.hpp
parent3ac2f3a1d9297220d1b0ce920dd13fdd4edcc187 (diff)
downloadComputeLibrary-7cd26d4a1b14bc4bf7c61496803416ab3d84791f.tar.gz
COMPMID-1867: Add NEON/SVE GEMM Hybrid kernels.
Change-Id: Ib40a9921e7f9a6a8be6c38872d6b3a0f24ed0cd3 Reviewed-on: https://review.mlplatform.org/515 Reviewed-by: Anthony Barbier <Anthony.barbier@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/arm_gemm/utils.hpp')
-rw-r--r--src/core/NEON/kernels/arm_gemm/utils.hpp44
1 files changed, 36 insertions, 8 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/utils.hpp b/src/core/NEON/kernels/arm_gemm/utils.hpp
index a1fc00ea89..8b96c328a6 100644
--- a/src/core/NEON/kernels/arm_gemm/utils.hpp
+++ b/src/core/NEON/kernels/arm_gemm/utils.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,9 +24,7 @@
#pragma once
-#ifdef __ARM_FEATURE_SVE
-#include <arm_sve.h>
-#endif
+#include <cstddef>
// Macro for unreachable code (e.g. impossible default cases on switch)
#define UNREACHABLE(why) __builtin_unreachable()
@@ -49,13 +47,43 @@ inline T roundup(const T a, const T b) {
}
}
+namespace arm_gemm {
+namespace utils {
+namespace {
+
+#ifdef __ARM_FEATURE_SVE
+template<size_t sz>
+inline unsigned long get_vector_length_sz() {
+ unsigned long v;
+
+ __asm (
+ "cntb %0"
+ : "=r" (v)
+ );
+
+ return v / sz;
+}
+
+#define VEC_LEN_SPEC(sz, opcode) template <> inline unsigned long get_vector_length_sz<sz>() { unsigned long v; __asm ( opcode " %0" : "=r" (v)); return v; }
+
+VEC_LEN_SPEC(8, "cntd")
+VEC_LEN_SPEC(4, "cntw")
+VEC_LEN_SPEC(2, "cnth")
+VEC_LEN_SPEC(1, "cntb")
+#endif
+
+} // anonymous namespace
+
template <typename T>
inline unsigned long get_vector_length() {
#ifdef __ARM_FEATURE_SVE
- const unsigned long length = svcntb();
+ return get_vector_length_sz<sizeof(T)>();
#else
- const unsigned long length = 16;
+ return 16 / sizeof(T);
#endif
+}
+
+} // utils namespace
+} // arm_gemm namespace
- return length / sizeof(T);
-} \ No newline at end of file
+using namespace arm_gemm::utils; \ No newline at end of file