From cfa2bba98169cb5ab1945462514be1b6badf7d98 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Thu, 27 Jun 2019 17:00:52 +0100 Subject: COMPMID-2178: Update GEMM assembly code. Perform offset reduction and requantization within the assembly wrapper. Change-Id: I5d5b3e1f6f9ef4c71805362c57f88ff199c027a3 Signed-off-by: Georgios Pinitas Reviewed-on: https://review.mlplatform.org/c/1541 Comments-Addressed: Pablo Marquez Reviewed-by: Gian Marco Iodice Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- .../NEON/kernels/arm_gemm/kernels/a64_gemm_u16_12x8/generic.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u16_12x8/generic.cpp') diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u16_12x8/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u16_12x8/generic.cpp index 4c21620218..83f9028065 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u16_12x8/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u16_12x8/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -100,7 +100,7 @@ void a64_gemm_u16_asimd_12x8(const uint16_t *Apanel, const uint16_t *Bpanel, uin "1:\n" // Main loop // First unroll - "smlal v5.4s, %[b0].4h, %[aa].h[0]\n" + "umlal v5.4s, %[b0].4h, %[aa].h[0]\n" "ldr x20, [%x[b_ptr]]\n" // Load B[1].upper "umlal v6.4s, %[b0].4h, %[aa].h[1]\n" "umlal v7.4s, %[b0].4h, %[aa].h[2]\n" @@ -312,7 +312,7 @@ void a64_gemm_u16_asimd_12x8(const uint16_t *Apanel, const uint16_t *Bpanel, uin : [a_ptr] "+r" (a_ptr), [b_ptr] "+r" (b_ptr), [c_ptr] "+r" (c_ptr), [k] "+r" (k), [aa] "+w" (aa), [ab] "+w" (ab), [b0] "+w" (b0), [b1] "+w" (b1), [b2] "+w" (b2) : [odd_k] "r" (odd_k) - : "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "x20", "cc" + : "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "x20", "cc", "memory" ); } } -- cgit v1.2.1