aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp1
-rw-r--r--src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp18
-rw-r--r--tests/validation/NEON/ConvolutionLayer.cpp2
3 files changed, 14 insertions, 7 deletions
diff --git a/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp
index 8f5a208cbb..3082ff25d7 100644
--- a/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp
@@ -870,6 +870,7 @@ void NEGEMMLowpMatrixMultiplyKernel::run(const Window &window, const ThreadInfo
switch(_input0->info()->data_type())
{
case DataType::S8:
+ case DataType::QASYMM8_SIGNED:
{
vector_matrix_multiply_s8(ina, inb, out, width_matrix_a, width_matrix_b, in_b_stride, window);
break;
diff --git a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
index 01a99f7aca..5b9d0551e2 100644
--- a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
+++ b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
@@ -97,6 +97,9 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b,
output_stage_corr.gemmlowp_min_bound -= offset_correction;
output_stage_corr.gemmlowp_max_bound -= offset_correction;
info.set_gemmlowp_output_stage(output_stage_corr);
+
+ // Update matrix a
+ matrix_a = &_signed_a;
}
// If GEMMLowpOutputStage != NONE, fuse the offset contribution with the output stage
@@ -329,6 +332,9 @@ Status NEGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITenso
output_stage_corr.gemmlowp_min_bound -= offset_correction;
output_stage_corr.gemmlowp_max_bound -= offset_correction;
info.set_gemmlowp_output_stage(output_stage_corr);
+
+ // Update matrix a
+ matrix_a_info = &signed_a;
}
// Check if we need to run the optimized assembly kernel
@@ -463,6 +469,12 @@ void NEGEMMLowpMatrixMultiplyCore::run()
MemoryGroupResourceScope scope_mg(_memory_group);
+ // Convert QASYMM8->QASYMM8_SIGNED
+ if(_flip_signedness)
+ {
+ NEScheduler::get().schedule(&_convert_to_signed_asymm, Window::DimY);
+ }
+
// Reshape inputs
if(_mtx_a_reshape_kernel)
{
@@ -473,12 +485,6 @@ void NEGEMMLowpMatrixMultiplyCore::run()
NEScheduler::get().schedule(_mtx_b_reshape_kernel.get(), Window::DimY);
}
- // Convert QASYMM8->QASYMM8_SIGNED
- if(_flip_signedness)
- {
- NEScheduler::get().schedule(&_convert_to_signed_asymm, Window::DimY);
- }
-
// Run GEMM
if(_asm_glue.is_configured())
{
diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp
index df52d8065b..c2a0cb56a2 100644
--- a/tests/validation/NEON/ConvolutionLayer.cpp
+++ b/tests/validation/NEON/ConvolutionLayer.cpp
@@ -476,7 +476,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerQuantizedPerChannelFixtur
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(combine(combine(combine(framework::dataset::concat(datasets::SmallConvolutionLayerDataset(), datasets::LargeConvolutionLayerDataset()),
+ combine(combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
framework::dataset::make("ReshapeWeights", { true })),
framework::dataset::make("DataType", { DataType::QASYMM8 })),
framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),