aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave4_block16_s8_s8.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave4_block16_s8_s8.hpp')
-rw-r--r--src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave4_block16_s8_s8.hpp8
1 files changed, 4 insertions, 4 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave4_block16_s8_s8.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave4_block16_s8_s8.hpp
index e4bfc0f6e4..4d065300ae 100644
--- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave4_block16_s8_s8.hpp
+++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave4_block16_s8_s8.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021, 2023 Arm Limited.
+ * Copyright (c) 2019-2021, 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -34,9 +34,9 @@ void interleave_block<4, 16, VLType::None, false>(
"ldr x23, [%x[in], #0x0]\n"
"ldr x22, [%x[in], #0x8]\n"
"cmp %x[height], #0x4\n"
- "add x23, x23, %x[row_offset]\n"
"ldr x21, [%x[in], #0x10]\n"
"ldr x20, [%x[in], #0x18]\n"
+ "add x23, x23, %x[row_offset]\n"
"add x22, x22, %x[row_offset]\n"
"add x21, x21, %x[row_offset]\n"
"add x20, x20, %x[row_offset]\n"
@@ -60,12 +60,12 @@ void interleave_block<4, 16, VLType::None, false>(
"ldr q19, [x23], #0x10\n"
"ldr q18, [x22], #0x10\n"
"subs %x[width], %x[width], #0x10\n"
- "cmp %x[width], #0x10\n"
"ldr q17, [x21], #0x10\n"
"ldr q16, [x20], #0x10\n"
- "str q19, [%x[out_ptr], #0x0]\n"
+ "cmp %x[width], #0x10\n"
"prfm pldl1keep, [x23, #0x70]\n"
"prfm pldl1keep, [x22, #0x70]\n"
+ "str q19, [%x[out_ptr], #0x0]\n"
"str q18, [%x[out_ptr], #0x10]\n"
"prfm pldl1keep, [x21, #0x70]\n"
"prfm pldl1keep, [x20, #0x70]\n"