aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/NEON/kernels/assembly
diff options
context:
space:
mode:
authorPablo Tello <pablo.tello@arm.com>2017-12-05 14:28:28 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:42:17 +0000
commit3bcf15db673fa927eba34356228865678a979844 (patch)
treea97c7653b48fa456aa0873bfaa6f38cd6dbbd8ae /arm_compute/core/NEON/kernels/assembly
parent0a878ae1bbb13002e50f8287721750d2e4b22680 (diff)
downloadComputeLibrary-3bcf15db673fa927eba34356228865678a979844.tar.gz
COMPMID-675: NEGEMMLowp Assembly, fixed ananke's dot product kernel mismatches
Change-Id: Ie9e9be0b17930164ea7f90a34fa89219f08d31f2 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/111935 Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'arm_compute/core/NEON/kernels/assembly')
-rw-r--r--arm_compute/core/NEON/kernels/assembly/kernels/a64_gemm_u8_12x8/a55r1.hpp5
1 files changed, 3 insertions, 2 deletions
diff --git a/arm_compute/core/NEON/kernels/assembly/kernels/a64_gemm_u8_12x8/a55r1.hpp b/arm_compute/core/NEON/kernels/assembly/kernels/a64_gemm_u8_12x8/a55r1.hpp
index 3ede256f40..c7c2acbb49 100644
--- a/arm_compute/core/NEON/kernels/assembly/kernels/a64_gemm_u8_12x8/a55r1.hpp
+++ b/arm_compute/core/NEON/kernels/assembly/kernels/a64_gemm_u8_12x8/a55r1.hpp
@@ -206,6 +206,7 @@ inline void a64_gemm_u8_12x8_a55r1(const uint8_t *Apanel, const uint8_t *Bpanel,
// Branch to alternative tail for odd K
"cbnz %w[oddk], 2f\n"
+ "ldr %d[b2], [%[b_ptr], #32]\n"
// Detached final iteration (even K)
"udot v8.4s , %[b0].16b, %[a0].4b[0]\n"
@@ -216,14 +217,14 @@ inline void a64_gemm_u8_12x8_a55r1(const uint8_t *Apanel, const uint8_t *Bpanel,
"ldr %d[a0a], [%[a_ptr], #32]\n"
"udot v12.4s, %[b0].16b, %[a1].4b[0]\n"
- "ldr %d[b2], [%[b_ptr], #32]\n"
+ "ins %[b2].d[1], x20\n"
+
"udot v13.4s, %[b0].16b, %[a1].4b[1]\n"
"ldr x20, [%[a_ptr], #40]\n"
"udot v14.4s, %[b0].16b, %[a1].4b[2]\n"
"udot v15.4s, %[b0].16b, %[a1].4b[3]\n"
"ldr %d[a1a], [%[a_ptr], #48]\n"
-
"udot v16.4s, %[b1].16b, %[a0].4b[0]\n"
"ins %[a0a].d[1], x20\n"
"udot v17.4s, %[b1].16b, %[a0].4b[1]\n"