aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/arm_gemm/asmlib.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/NEON/kernels/arm_gemm/asmlib.hpp')
-rw-r--r--src/core/NEON/kernels/arm_gemm/asmlib.hpp85
1 files changed, 43 insertions, 42 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/asmlib.hpp b/src/core/NEON/kernels/arm_gemm/asmlib.hpp
index b3fcb33bfb..38f51ae72c 100644
--- a/src/core/NEON/kernels/arm_gemm/asmlib.hpp
+++ b/src/core/NEON/kernels/arm_gemm/asmlib.hpp
@@ -31,21 +31,21 @@
// used by the workaround.
// "Correct" version
-#define ASM_PREFETCH(address) "PRFM PLDL1KEEP, " address "\n"
-#define ASM_PREFETCHL2(address) "PRFM PLDL2KEEP, " address "\n"
-#define ASM_PREFETCHW(address) "PRFM PSTL1KEEP, " address "\n"
+#define ASM_PREFETCH(address) "PRFM PLDL1KEEP, " address "\n"
+#define ASM_PREFETCHL2(address) "PRFM PLDL2KEEP, " address "\n"
+#define ASM_PREFETCHW(address) "PRFM PSTL1KEEP, " address "\n"
#define ASM_PREFETCHWL2(address) "PRFM PSTL2KEEP, " address "\n"
// Lee's uarchsim hack
-//#define ASM_PREFETCH(address) "LDNP x20, x21, " address "\n"
+//#define ASM_PREFETCH(address) "LDNP x20, x21, " address "\n"
// No preload at all
//#define ASM_PREFETCH(address) ""
#else
// "Correct" versions for AArch32
-#define ASM_PREFETCH(address) "PLD " address "\n"
-#define ASM_PREFETCHW(address) "PLDW " address "\n"
+#define ASM_PREFETCH(address) "PLD " address "\n"
+#define ASM_PREFETCHW(address) "PLDW " address "\n"
#endif
@@ -53,76 +53,77 @@
* Do some prefetches.
*/
template <typename T>
-static inline void prefetch_6x(const T *pfp)
-{
- __asm __volatile(
+static inline void prefetch_6x(const T *pfp) {
+ __asm __volatile (
ASM_PREFETCH("[%[pfp]]")
ASM_PREFETCH("[%[pfp], #64]")
ASM_PREFETCH("[%[pfp], #128]")
ASM_PREFETCH("[%[pfp], #192]")
ASM_PREFETCH("[%[pfp], #256]")
ASM_PREFETCH("[%[pfp], #320]")
- :
- : [pfp] "r"(pfp)
- : "memory");
+ :
+ : [pfp] "r" (pfp)
+ : "memory"
+ );
}
template <typename T>
-static inline void prefetch_5x(const T *pfp)
-{
- __asm __volatile(
+static inline void prefetch_5x(const T *pfp) {
+ __asm __volatile (
ASM_PREFETCH("[%[pfp]]")
ASM_PREFETCH("[%[pfp], #64]")
ASM_PREFETCH("[%[pfp], #128]")
ASM_PREFETCH("[%[pfp], #192]")
ASM_PREFETCH("[%[pfp], #256]")
- :
- : [pfp] "r"(pfp)
- : "memory");
+ :
+ : [pfp] "r" (pfp)
+ : "memory"
+ );
}
template <typename T>
-static inline void prefetch_4x(const T *pfp)
-{
- __asm __volatile(
+static inline void prefetch_4x(const T *pfp) {
+ __asm __volatile (
ASM_PREFETCH("[%[pfp]]")
ASM_PREFETCH("[%[pfp], #64]")
ASM_PREFETCH("[%[pfp], #128]")
ASM_PREFETCH("[%[pfp], #192]")
- :
- : [pfp] "r"(pfp)
- : "memory");
+ :
+ : [pfp] "r" (pfp)
+ : "memory"
+ );
}
template <typename T>
-static inline void prefetch_3x(const T *pfp)
-{
- __asm __volatile(
+static inline void prefetch_3x(const T *pfp) {
+ __asm __volatile (
ASM_PREFETCH("[%[pfp]]")
ASM_PREFETCH("[%[pfp], #64]")
ASM_PREFETCH("[%[pfp], #128]")
- :
- : [pfp] "r"(pfp)
- : "memory");
+ :
+ : [pfp] "r" (pfp)
+ : "memory"
+ );
}
template <typename T>
-static inline void prefetch_2x(const T *pfp)
-{
- __asm __volatile(
+static inline void prefetch_2x(const T *pfp) {
+ __asm __volatile (
ASM_PREFETCH("[%[pfp]]")
ASM_PREFETCH("[%[pfp], #64]")
- :
- : [pfp] "r"(pfp)
- : "memory");
+ :
+ : [pfp] "r" (pfp)
+ : "memory"
+ );
}
template <typename T>
-static inline void prefetch_1x(const T *pfp)
-{
- __asm __volatile(
+static inline void prefetch_1x(const T *pfp) {
+ __asm __volatile (
ASM_PREFETCH("[%[pfp]]")
- :
- : [pfp] "r"(pfp)
- : "memory");
+ :
+ : [pfp] "r" (pfp)
+ : "memory"
+ );
}
+