From 101de503901f503d8a6f741c3aaea359bc1aafb7 Mon Sep 17 00:00:00 2001 From: David Mansell Date: Tue, 6 Feb 2018 17:11:21 +0000 Subject: COMPMID-896: Replace legacy 4x4 u8 GEMM kernel with safe version. It's not safe to accumulate two u8xu8 results into a u16 accumulator. This changes the kernel to use uadalp after every single multiply. Correct the test fixture as well. Change-Id: I011b90033c4673e55b843d079e3f7d185b1df330 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/119096 Tested-by: Jenkins Reviewed-by: Georgios Pinitas --- .../assembly/kernels/a64_gemm_u8_4x4/generic.hpp | 394 ++++++--------------- 1 file changed, 105 insertions(+), 289 deletions(-) (limited to 'arm_compute/core/NEON') diff --git a/arm_compute/core/NEON/kernels/assembly/kernels/a64_gemm_u8_4x4/generic.hpp b/arm_compute/core/NEON/kernels/assembly/kernels/a64_gemm_u8_4x4/generic.hpp index e48c373f21..aff3faf666 100644 --- a/arm_compute/core/NEON/kernels/assembly/kernels/a64_gemm_u8_4x4/generic.hpp +++ b/arm_compute/core/NEON/kernels/assembly/kernels/a64_gemm_u8_4x4/generic.hpp @@ -31,7 +31,6 @@ inline void a64_gemm_u8_4x4(const uint8_t *Apanel, const uint8_t *Bpanel, uint32 const uint8_t *a_ptr = Apanel; uint32_t *c_ptr = Cpanel; K /= 16; - int oddk = (K & 1); for (int yb=0; yb