diff options
Diffstat (limited to 'src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave4_block16_s8_s8.hpp')
-rw-r--r-- | src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave4_block16_s8_s8.hpp | 6 |
1 files changed, 3 insertions, 3 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave4_block16_s8_s8.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave4_block16_s8_s8.hpp index 4d065300ae..d5a41a332d 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave4_block16_s8_s8.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave4_block16_s8_s8.hpp @@ -34,9 +34,9 @@ void interleave_block<4, 16, VLType::None, false>( "ldr x23, [%x[in], #0x0]\n" "ldr x22, [%x[in], #0x8]\n" "cmp %x[height], #0x4\n" + "add x23, x23, %x[row_offset]\n" "ldr x21, [%x[in], #0x10]\n" "ldr x20, [%x[in], #0x18]\n" - "add x23, x23, %x[row_offset]\n" "add x22, x22, %x[row_offset]\n" "add x21, x21, %x[row_offset]\n" "add x20, x20, %x[row_offset]\n" @@ -60,12 +60,12 @@ void interleave_block<4, 16, VLType::None, false>( "ldr q19, [x23], #0x10\n" "ldr q18, [x22], #0x10\n" "subs %x[width], %x[width], #0x10\n" + "cmp %x[width], #0x10\n" "ldr q17, [x21], #0x10\n" "ldr q16, [x20], #0x10\n" - "cmp %x[width], #0x10\n" + "str q19, [%x[out_ptr], #0x0]\n" "prfm pldl1keep, [x23, #0x70]\n" "prfm pldl1keep, [x22, #0x70]\n" - "str q19, [%x[out_ptr], #0x0]\n" "str q18, [%x[out_ptr], #0x10]\n" "prfm pldl1keep, [x21, #0x70]\n" "prfm pldl1keep, [x20, #0x70]\n" |