diff options
author | Pablo Tello <pablo.tello@arm.com> | 2017-12-05 14:28:28 +0000 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:42:17 +0000 |
commit | 3bcf15db673fa927eba34356228865678a979844 (patch) | |
tree | a97c7653b48fa456aa0873bfaa6f38cd6dbbd8ae /arm_compute/core/NEON | |
parent | 0a878ae1bbb13002e50f8287721750d2e4b22680 (diff) | |
download | ComputeLibrary-3bcf15db673fa927eba34356228865678a979844.tar.gz |
COMPMID-675: NEGEMMLowp Assembly, fixed ananke's dot product kernel mismatches
Change-Id: Ie9e9be0b17930164ea7f90a34fa89219f08d31f2
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/111935
Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'arm_compute/core/NEON')
-rw-r--r-- | arm_compute/core/NEON/kernels/assembly/kernels/a64_gemm_u8_12x8/a55r1.hpp | 5 |
1 files changed, 3 insertions, 2 deletions
diff --git a/arm_compute/core/NEON/kernels/assembly/kernels/a64_gemm_u8_12x8/a55r1.hpp b/arm_compute/core/NEON/kernels/assembly/kernels/a64_gemm_u8_12x8/a55r1.hpp index 3ede256f40..c7c2acbb49 100644 --- a/arm_compute/core/NEON/kernels/assembly/kernels/a64_gemm_u8_12x8/a55r1.hpp +++ b/arm_compute/core/NEON/kernels/assembly/kernels/a64_gemm_u8_12x8/a55r1.hpp @@ -206,6 +206,7 @@ inline void a64_gemm_u8_12x8_a55r1(const uint8_t *Apanel, const uint8_t *Bpanel, // Branch to alternative tail for odd K "cbnz %w[oddk], 2f\n" + "ldr %d[b2], [%[b_ptr], #32]\n" // Detached final iteration (even K) "udot v8.4s , %[b0].16b, %[a0].4b[0]\n" @@ -216,14 +217,14 @@ inline void a64_gemm_u8_12x8_a55r1(const uint8_t *Apanel, const uint8_t *Bpanel, "ldr %d[a0a], [%[a_ptr], #32]\n" "udot v12.4s, %[b0].16b, %[a1].4b[0]\n" - "ldr %d[b2], [%[b_ptr], #32]\n" + "ins %[b2].d[1], x20\n" + "udot v13.4s, %[b0].16b, %[a1].4b[1]\n" "ldr x20, [%[a_ptr], #40]\n" "udot v14.4s, %[b0].16b, %[a1].4b[2]\n" "udot v15.4s, %[b0].16b, %[a1].4b[3]\n" "ldr %d[a1a], [%[a_ptr], #48]\n" - "udot v16.4s, %[b1].16b, %[a0].4b[0]\n" "ins %[a0a].d[1], x20\n" "udot v17.4s, %[b1].16b, %[a0].4b[1]\n" |