From 3bcf15db673fa927eba34356228865678a979844 Mon Sep 17 00:00:00 2001 From: Pablo Tello Date: Tue, 5 Dec 2017 14:28:28 +0000 Subject: COMPMID-675: NEGEMMLowp Assembly, fixed ananke's dot product kernel mismatches Change-Id: Ie9e9be0b17930164ea7f90a34fa89219f08d31f2 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/111935 Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com Reviewed-by: Anthony Barbier --- .../core/NEON/kernels/assembly/kernels/a64_gemm_u8_12x8/a55r1.hpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arm_compute/core/NEON/kernels/assembly/kernels/a64_gemm_u8_12x8/a55r1.hpp b/arm_compute/core/NEON/kernels/assembly/kernels/a64_gemm_u8_12x8/a55r1.hpp index 3ede256f40..c7c2acbb49 100644 --- a/arm_compute/core/NEON/kernels/assembly/kernels/a64_gemm_u8_12x8/a55r1.hpp +++ b/arm_compute/core/NEON/kernels/assembly/kernels/a64_gemm_u8_12x8/a55r1.hpp @@ -206,6 +206,7 @@ inline void a64_gemm_u8_12x8_a55r1(const uint8_t *Apanel, const uint8_t *Bpanel, // Branch to alternative tail for odd K "cbnz %w[oddk], 2f\n" + "ldr %d[b2], [%[b_ptr], #32]\n" // Detached final iteration (even K) "udot v8.4s , %[b0].16b, %[a0].4b[0]\n" @@ -216,14 +217,14 @@ inline void a64_gemm_u8_12x8_a55r1(const uint8_t *Apanel, const uint8_t *Bpanel, "ldr %d[a0a], [%[a_ptr], #32]\n" "udot v12.4s, %[b0].16b, %[a1].4b[0]\n" - "ldr %d[b2], [%[b_ptr], #32]\n" + "ins %[b2].d[1], x20\n" + "udot v13.4s, %[b0].16b, %[a1].4b[1]\n" "ldr x20, [%[a_ptr], #40]\n" "udot v14.4s, %[b0].16b, %[a1].4b[2]\n" "udot v15.4s, %[b0].16b, %[a1].4b[3]\n" "ldr %d[a1a], [%[a_ptr], #48]\n" - "udot v16.4s, %[b1].16b, %[a0].4b[0]\n" "ins %[a0a].d[1], x20\n" "udot v17.4s, %[b1].16b, %[a0].4b[1]\n" -- cgit v1.2.1