From 63d4dbddf9d7b6cdd0e682ae838dc3ef37eb95ce Mon Sep 17 00:00:00 2001
From: Georgios Pinitas <georgios.pinitas@arm.com>
Date: Fri, 8 Nov 2019 11:51:56 +0000
Subject: COMPMID-2452: Fix 32-bit per-channel convolution for NEON.

Rearrange the kernels in run to ensure type conversion takes place
before the matrix transformations.

Change-Id: Ibf47788fe71a84fd7549f8667549552e15ca8aab
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-on: https://review.mlplatform.org/c/2251
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
---
 .../NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp    | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

(limited to 'src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp')

diff --git a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
index 01a99f7aca..5b9d0551e2 100644
--- a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
+++ b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
@@ -97,6 +97,9 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b,
         output_stage_corr.gemmlowp_min_bound -= offset_correction;
         output_stage_corr.gemmlowp_max_bound -= offset_correction;
         info.set_gemmlowp_output_stage(output_stage_corr);
+
+        // Update matrix a
+        matrix_a = &_signed_a;
     }
 
     // If GEMMLowpOutputStage != NONE, fuse the offset contribution with the output stage
@@ -329,6 +332,9 @@ Status NEGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITenso
         output_stage_corr.gemmlowp_min_bound -= offset_correction;
         output_stage_corr.gemmlowp_max_bound -= offset_correction;
         info.set_gemmlowp_output_stage(output_stage_corr);
+
+        // Update matrix a
+        matrix_a_info = &signed_a;
     }
 
     // Check if we need to run the optimized assembly kernel
@@ -463,6 +469,12 @@ void NEGEMMLowpMatrixMultiplyCore::run()
 
     MemoryGroupResourceScope scope_mg(_memory_group);
 
+    // Convert QASYMM8->QASYMM8_SIGNED
+    if(_flip_signedness)
+    {
+        NEScheduler::get().schedule(&_convert_to_signed_asymm, Window::DimY);
+    }
+
     // Reshape inputs
     if(_mtx_a_reshape_kernel)
     {
@@ -473,12 +485,6 @@ void NEGEMMLowpMatrixMultiplyCore::run()
         NEScheduler::get().schedule(_mtx_b_reshape_kernel.get(), Window::DimY);
     }
 
-    // Convert QASYMM8->QASYMM8_SIGNED
-    if(_flip_signedness)
-    {
-        NEScheduler::get().schedule(&_convert_to_signed_asymm, Window::DimY);
-    }
-
     // Run GEMM
     if(_asm_glue.is_configured())
     {
-- 
cgit v1.2.1