diff options
Diffstat (limited to 'src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave4_block16_s8_s8.hpp')
-rw-r--r-- | src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave4_block16_s8_s8.hpp | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave4_block16_s8_s8.hpp b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave4_block16_s8_s8.hpp index e4bfc0f6e4..4d065300ae 100644 --- a/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave4_block16_s8_s8.hpp +++ b/src/core/NEON/kernels/arm_gemm/indirect-interleaves/a64_interleave4_block16_s8_s8.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -34,9 +34,9 @@ void interleave_block<4, 16, VLType::None, false>( "ldr x23, [%x[in], #0x0]\n" "ldr x22, [%x[in], #0x8]\n" "cmp %x[height], #0x4\n" - "add x23, x23, %x[row_offset]\n" "ldr x21, [%x[in], #0x10]\n" "ldr x20, [%x[in], #0x18]\n" + "add x23, x23, %x[row_offset]\n" "add x22, x22, %x[row_offset]\n" "add x21, x21, %x[row_offset]\n" "add x20, x20, %x[row_offset]\n" @@ -60,12 +60,12 @@ void interleave_block<4, 16, VLType::None, false>( "ldr q19, [x23], #0x10\n" "ldr q18, [x22], #0x10\n" "subs %x[width], %x[width], #0x10\n" - "cmp %x[width], #0x10\n" "ldr q17, [x21], #0x10\n" "ldr q16, [x20], #0x10\n" - "str q19, [%x[out_ptr], #0x0]\n" + "cmp %x[width], #0x10\n" "prfm pldl1keep, [x23, #0x70]\n" "prfm pldl1keep, [x22, #0x70]\n" + "str q19, [%x[out_ptr], #0x0]\n" "str q18, [%x[out_ptr], #0x10]\n" "prfm pldl1keep, [x21, #0x70]\n" "prfm pldl1keep, [x20, #0x70]\n" |