diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2019-11-08 11:51:56 +0000 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2019-11-08 15:21:44 +0000 |
commit | 63d4dbddf9d7b6cdd0e682ae838dc3ef37eb95ce (patch) | |
tree | d72936e2dcd267cdc241e75597a23492612d3189 /src/runtime | |
parent | 15396ff07911fb806b468962f746c38d206bac7e (diff) | |
download | ComputeLibrary-63d4dbddf9d7b6cdd0e682ae838dc3ef37eb95ce.tar.gz |
COMPMID-2452: Fix 32-bit per-channel convolution for NEON.
Rearrange the kernels in run to ensure type conversion takes place
before the matrix transformations.
Change-Id: Ibf47788fe71a84fd7549f8667549552e15ca8aab
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-on: https://review.mlplatform.org/c/2251
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/runtime')
-rw-r--r-- | src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp | 18 |
1 files changed, 12 insertions, 6 deletions
diff --git a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp index 01a99f7aca..5b9d0551e2 100644 --- a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp +++ b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp @@ -97,6 +97,9 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b, output_stage_corr.gemmlowp_min_bound -= offset_correction; output_stage_corr.gemmlowp_max_bound -= offset_correction; info.set_gemmlowp_output_stage(output_stage_corr); + + // Update matrix a + matrix_a = &_signed_a; } // If GEMMLowpOutputStage != NONE, fuse the offset contribution with the output stage @@ -329,6 +332,9 @@ Status NEGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITenso output_stage_corr.gemmlowp_min_bound -= offset_correction; output_stage_corr.gemmlowp_max_bound -= offset_correction; info.set_gemmlowp_output_stage(output_stage_corr); + + // Update matrix a + matrix_a_info = &signed_a; } // Check if we need to run the optimized assembly kernel @@ -463,6 +469,12 @@ void NEGEMMLowpMatrixMultiplyCore::run() MemoryGroupResourceScope scope_mg(_memory_group); + // Convert QASYMM8->QASYMM8_SIGNED + if(_flip_signedness) + { + NEScheduler::get().schedule(&_convert_to_signed_asymm, Window::DimY); + } + // Reshape inputs if(_mtx_a_reshape_kernel) { @@ -473,12 +485,6 @@ void NEGEMMLowpMatrixMultiplyCore::run() NEScheduler::get().schedule(_mtx_b_reshape_kernel.get(), Window::DimY); } - // Convert QASYMM8->QASYMM8_SIGNED - if(_flip_signedness) - { - NEScheduler::get().schedule(&_convert_to_signed_asymm, Window::DimY); - } - // Run GEMM if(_asm_glue.is_configured()) { |