From 5aa1a0b7ca5eed010e4b297a95b1c4851f741328 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Thu, 2 Jul 2020 20:02:20 +0100 Subject: COMPID-3324: Clean GEMM kernels Signed-off-by: Georgios Pinitas Change-Id: I170de1671e061a78740caee31fb4a1b8642c1369 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3505 Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins Reviewed-by: Michele Di Giorgio --- .../kernels/a64_hybrid_fp32_mla_16x4/generic.cpp | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_16x4/generic.cpp') diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_16x4/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_16x4/generic.cpp index 03f65889ea..43ff3a98dc 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_16x4/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_16x4/generic.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -61,12 +61,23 @@ void a64_hybrid_fp32_mla_16x4(const float *A, int lda, const float *B, float *C, break; } - for (int y=0; y 4) { + if (rows_to_compute % 4) { + rows_to_compute = 4 - 1; + } else { + rows_to_compute = 4; + } + } + for (int x0=0; x0