From 63d4dbddf9d7b6cdd0e682ae838dc3ef37eb95ce Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Fri, 8 Nov 2019 11:51:56 +0000 Subject: COMPMID-2452: Fix 32-bit per-channel convolution for NEON. Rearrange the kernels in run to ensure type conversion takes place before the matrix transformations. Change-Id: Ibf47788fe71a84fd7549f8667549552e15ca8aab Signed-off-by: Georgios Pinitas Reviewed-on: https://review.mlplatform.org/c/2251 Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- .../NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp | 1 + .../NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp | 18 ++++++++++++------ tests/validation/NEON/ConvolutionLayer.cpp | 2 +- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp index 8f5a208cbb..3082ff25d7 100644 --- a/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp @@ -870,6 +870,7 @@ void NEGEMMLowpMatrixMultiplyKernel::run(const Window &window, const ThreadInfo switch(_input0->info()->data_type()) { case DataType::S8: + case DataType::QASYMM8_SIGNED: { vector_matrix_multiply_s8(ina, inb, out, width_matrix_a, width_matrix_b, in_b_stride, window); break; diff --git a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp index 01a99f7aca..5b9d0551e2 100644 --- a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp +++ b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp @@ -97,6 +97,9 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b, output_stage_corr.gemmlowp_min_bound -= offset_correction; output_stage_corr.gemmlowp_max_bound -= offset_correction; info.set_gemmlowp_output_stage(output_stage_corr); + + // Update matrix a + matrix_a = &_signed_a; } // If GEMMLowpOutputStage != NONE, fuse the offset contribution with the output stage @@ -329,6 +332,9 @@ Status NEGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITenso output_stage_corr.gemmlowp_min_bound -= offset_correction; output_stage_corr.gemmlowp_max_bound -= offset_correction; info.set_gemmlowp_output_stage(output_stage_corr); + + // Update matrix a + matrix_a_info = &signed_a; } // Check if we need to run the optimized assembly kernel @@ -463,6 +469,12 @@ void NEGEMMLowpMatrixMultiplyCore::run() MemoryGroupResourceScope scope_mg(_memory_group); + // Convert QASYMM8->QASYMM8_SIGNED + if(_flip_signedness) + { + NEScheduler::get().schedule(&_convert_to_signed_asymm, Window::DimY); + } + // Reshape inputs if(_mtx_a_reshape_kernel) { @@ -473,12 +485,6 @@ void NEGEMMLowpMatrixMultiplyCore::run() NEScheduler::get().schedule(_mtx_b_reshape_kernel.get(), Window::DimY); } - // Convert QASYMM8->QASYMM8_SIGNED - if(_flip_signedness) - { - NEScheduler::get().schedule(&_convert_to_signed_asymm, Window::DimY); - } - // Run GEMM if(_asm_glue.is_configured()) { diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp index df52d8065b..c2a0cb56a2 100644 --- a/tests/validation/NEON/ConvolutionLayer.cpp +++ b/tests/validation/NEON/ConvolutionLayer.cpp @@ -476,7 +476,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerQuantizedPerChannelFixtur validate(Accessor(_target), _reference, tolerance_qasymm8); } FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMConvolutionLayerQuantizedPerChannelFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(combine(combine(framework::dataset::concat(datasets::SmallConvolutionLayerDataset(), datasets::LargeConvolutionLayerDataset()), + combine(combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(), framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", { DataType::QASYMM8 })), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), -- cgit v1.2.1