aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2019-11-08 11:51:56 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2019-11-08 15:21:44 +0000
commit63d4dbddf9d7b6cdd0e682ae838dc3ef37eb95ce (patch)
treed72936e2dcd267cdc241e75597a23492612d3189
parent15396ff07911fb806b468962f746c38d206bac7e (diff)
downloadComputeLibrary-63d4dbddf9d7b6cdd0e682ae838dc3ef37eb95ce.tar.gz
COMPMID-2452: Fix 32-bit per-channel convolution for NEON.
Rearrange the kernels in run to ensure type conversion takes place before the matrix transformations. Change-Id: Ibf47788fe71a84fd7549f8667549552e15ca8aab Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-on: https://review.mlplatform.org/c/2251 Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp1
-rw-r--r--src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp18
-rw-r--r--tests/validation/NEON/ConvolutionLayer.cpp2
3 files changed, 14 insertions, 7 deletions
diff --git a/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp
index 8f5a208cb..3082ff25d 100644
--- a/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp
@@ -870,6 +870,7 @@ void NEGEMMLowpMatrixMultiplyKernel::run(const Window &window, const ThreadInfo
switch(_input0->info()->data_type())
{
case DataType::S8:
+ case DataType::QASYMM8_SIGNED:
{
vector_matrix_multiply_s8(ina, inb, out, width_matrix_a, width_matrix_b, in_b_stride, window);
break;
diff --git a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
index 01a99f7ac..5b9d0551e 100644
--- a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
+++ b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
@@ -97,6 +97,9 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b,
output_stage_corr.gemmlowp_min_bound -= offset_correction;
output_stage_corr.gemmlowp_max_bound -= offset_correction;
info.set_gemmlowp_output_stage(output_stage_corr);
+
+ // Update matrix a
+ matrix_a = &_signed_a;
}
// If GEMMLowpOutputStage != NONE, fuse the offset contribution with the output stage
@@ -329,6 +332,9 @@ Status NEGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITenso
output_stage_corr.gemmlowp_min_bound -= offset_correction;
output_stage_corr.gemmlowp_max_bound -= offset_correction;
info.set_gemmlowp_output_stage(output_stage_corr);
+
+ // Update matrix a
+ matrix_a_info = &signed_a;
}
// Check if we need to run the optimized assembly kernel
@@ -463,6 +469,12 @@ void NEGEMMLowpMatrixMultiplyCore::run()
MemoryGroupResourceScope scope_mg(_memory_group);
+ // Convert QASYMM8->QASYMM8_SIGNED
+ if(_flip_signedness)
+ {
+ NEScheduler::get().schedule(&_convert_to_signed_asymm, Window::DimY);
+ }
+
// Reshape inputs
if(_mtx_a_reshape_kernel)
{
@@ -473,12 +485,6 @@ void NEGEMMLowpMatrixMultiplyCore::run()
NEScheduler::get().schedule(_mtx_b_reshape_kernel.get(), Window::DimY);
}
- // Convert QASYMM8->QASYMM8_SIGNED
- if(_flip_signedness)
- {
- NEScheduler::get().schedule(&_convert_to_signed_asymm, Window::DimY);
- }
-
// Run GEMM
if(_asm_glue.is_configured())
{
diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp
index df52d8065..c2a0cb56a 100644
--- a/tests/validation/NEON/ConvolutionLayer.cpp
+++ b/tests/validation/NEON/ConvolutionLayer.cpp
@@ -476,7 +476,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerQuantizedPerChannelFixtur
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(combine(combine(combine(framework::dataset::concat(datasets::SmallConvolutionLayerDataset(), datasets::LargeConvolutionLayerDataset()),
+ combine(combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
framework::dataset::make("ReshapeWeights", { true })),
framework::dataset::make("DataType", { DataType::QASYMM8 })),
framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),