From 63d4dbddf9d7b6cdd0e682ae838dc3ef37eb95ce Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Fri, 8 Nov 2019 11:51:56 +0000 Subject: COMPMID-2452: Fix 32-bit per-channel convolution for NEON. Rearrange the kernels in run to ensure type conversion takes place before the matrix transformations. Change-Id: Ibf47788fe71a84fd7549f8667549552e15ca8aab Signed-off-by: Georgios Pinitas Reviewed-on: https://review.mlplatform.org/c/2251 Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- .../NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp') diff --git a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp index 01a99f7aca..5b9d0551e2 100644 --- a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp +++ b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp @@ -97,6 +97,9 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b, output_stage_corr.gemmlowp_min_bound -= offset_correction; output_stage_corr.gemmlowp_max_bound -= offset_correction; info.set_gemmlowp_output_stage(output_stage_corr); + + // Update matrix a + matrix_a = &_signed_a; } // If GEMMLowpOutputStage != NONE, fuse the offset contribution with the output stage @@ -329,6 +332,9 @@ Status NEGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITenso output_stage_corr.gemmlowp_min_bound -= offset_correction; output_stage_corr.gemmlowp_max_bound -= offset_correction; info.set_gemmlowp_output_stage(output_stage_corr); + + // Update matrix a + matrix_a_info = &signed_a; } // Check if we need to run the optimized assembly kernel @@ -463,6 +469,12 @@ void NEGEMMLowpMatrixMultiplyCore::run() MemoryGroupResourceScope scope_mg(_memory_group); + // Convert QASYMM8->QASYMM8_SIGNED + if(_flip_signedness) + { + NEScheduler::get().schedule(&_convert_to_signed_asymm, Window::DimY); + } + // Reshape inputs if(_mtx_a_reshape_kernel) { @@ -473,12 +485,6 @@ void NEGEMMLowpMatrixMultiplyCore::run() NEScheduler::get().schedule(_mtx_b_reshape_kernel.get(), Window::DimY); } - // Convert QASYMM8->QASYMM8_SIGNED - if(_flip_signedness) - { - NEScheduler::get().schedule(&_convert_to_signed_asymm, Window::DimY); - } - // Run GEMM if(_asm_glue.is_configured()) { -- cgit v1.2.1