aboutsummaryrefslogtreecommitdiff
path: root/src/cpu/operators/CpuGemmLowpMatrixMultiplyCore.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/cpu/operators/CpuGemmLowpMatrixMultiplyCore.cpp')
-rw-r--r--src/cpu/operators/CpuGemmLowpMatrixMultiplyCore.cpp75
1 files changed, 28 insertions, 47 deletions
diff --git a/src/cpu/operators/CpuGemmLowpMatrixMultiplyCore.cpp b/src/cpu/operators/CpuGemmLowpMatrixMultiplyCore.cpp
index 1dbe3d8a31..f3396fbb5c 100644
--- a/src/cpu/operators/CpuGemmLowpMatrixMultiplyCore.cpp
+++ b/src/cpu/operators/CpuGemmLowpMatrixMultiplyCore.cpp
@@ -128,31 +128,24 @@ void CpuGemmLowpMatrixMultiplyCore::configure(
_reshape_b_only_on_first_run;
_gemm_info = gemm_info;
- const ITensorInfo *a_to_use = a;
-
- // Initialize assembly kernel meta-data
- const cpu::AsmGemmInfo asm_info = init_assembly_metadata(gemm_info);
-
- const int32_t offset_correction = 128;
- const DataType dt = DataType::QASYMM8_SIGNED;
- const UniformQuantizationInfo iqinfo = a_to_use->quantization_info().uniform();
-
- _signed_a = a_to_use->clone()->set_data_type(dt).set_quantization_info(
- QuantizationInfo(iqinfo.scale, iqinfo.offset + offset_correction));
-
- // If inputs are mixed-sign but this machine does not support mixed sign kernels,
- // flip the sign so matched-sign kernels can be used.
- if (!_flip_signedness && a->data_type() == DataType::QASYMM8 && b->data_type() == DataType::QASYMM8_SIGNED &&
- !bool(CpuGemmAssemblyDispatch::validate(a_to_use, b, c, dst, asm_info)))
- {
- _flip_signedness = true;
- }
+ // Offset kernel is need if offset is non-zero or it may change (i.e. dynamic).
+ // It is not needed if the datatype is symmetric, because there is no offset
+ bool a_offset_kernel_needed = _a_offset != 0 || a->quantization_info().is_dynamic();
+ bool b_offset_kernel_needed = _b_offset != 0 || b->quantization_info().is_dynamic();
_asm_glue = std::make_unique<cpu::CpuGemmAssemblyDispatch>();
+ const ITensorInfo *a_to_use = a;
+
// Convert to QASYMM8 -> QASYMM8_SIGNED and back
if (_flip_signedness)
{
+ const int32_t offset_correction = 128;
+ const DataType dt = DataType::QASYMM8_SIGNED;
+ const UniformQuantizationInfo iqinfo = a_to_use->quantization_info().uniform();
+
+ _signed_a = a_to_use->clone()->set_data_type(dt).set_quantization_info(
+ QuantizationInfo(iqinfo.scale, iqinfo.offset + offset_correction));
_convert_to_signed_asymm = std::make_unique<kernels::CpuConvertQuantizedSignednessKernel>();
_convert_to_signed_asymm->configure(a_to_use, &_signed_a);
a_to_use = &_signed_a;
@@ -173,11 +166,6 @@ void CpuGemmLowpMatrixMultiplyCore::configure(
matrix_a = &_signed_a;
}
- // Offset kernel is need if offset is non-zero or it may change (i.e. dynamic).
- // It is not needed if the datatype is symmetric, because there is no offset
- bool a_offset_kernel_needed = _a_offset != 0 || a->quantization_info().is_dynamic();
- bool b_offset_kernel_needed = _b_offset != 0 || b->quantization_info().is_dynamic();
-
// If GEMMLowpOutputStage != NONE, fuse the offset contribution with the output stage
if (info.gemmlowp_output_stage().type != GEMMLowpOutputStageType::NONE)
{
@@ -185,6 +173,8 @@ void CpuGemmLowpMatrixMultiplyCore::configure(
_mm_result_s32 = TensorInfo(dst->tensor_shape(), 1, DataType::S32);
}
+ // Initialize assembly kernel meta-data
+ const cpu::AsmGemmInfo asm_info = init_assembly_metadata(gemm_info);
#ifdef __aarch64__
if (!(!b->are_values_constant() &&
b->tensor_shape().z() > 1)) // Disable batch matmul as optimized GeMM handles batching differently.
@@ -385,6 +375,10 @@ Status CpuGemmLowpMatrixMultiplyCore::validate(const ITensorInfo *a,
int32_t a_offset = a->quantization_info().uniform().offset;
int32_t b_offset = b->quantization_info().uniform().offset;
+ // Offset kernel is need if offset is non-zero or it may change (i.e. dynamic).
+ bool a_offset_kernel_needed = a_offset != 0 || a->quantization_info().is_dynamic();
+ bool b_offset_kernel_needed = b_offset != 0 || b->quantization_info().is_dynamic();
+
bool fuse_output_stage = info.gemmlowp_output_stage().type != GEMMLowpOutputStageType::NONE;
if (fuse_output_stage)
{
@@ -392,31 +386,19 @@ Status CpuGemmLowpMatrixMultiplyCore::validate(const ITensorInfo *a,
a->clone()->set_tensor_shape(output->tensor_shape()).set_data_type(DataType::S32));
}
- // Initialize assembly kernel meta-data
- const AsmGemmInfo asm_info = init_assembly_metadata(info);
-
// Convert QASYMM8->QASYMM8_SIGNED
- const int32_t offset_correction = 128;
- const DataType dt = DataType::QASYMM8_SIGNED;
- const UniformQuantizationInfo iqinfo = a_to_use->quantization_info().uniform();
-
- TensorInfo signed_a = a_to_use->clone()->set_data_type(dt).set_quantization_info(
- QuantizationInfo(iqinfo.scale, iqinfo.offset + offset_correction));
+ TensorInfo signed_a{};
TensorInfo signed_output{};
-
- bool flip_signedness = is_data_type_quantized_per_channel(b->data_type()) &&
+ bool flip_signedness = is_data_type_quantized_per_channel(b->data_type()) &&
(a->data_type() == DataType::QASYMM8) && info.reshape_b_only_on_first_run();
-
- // If inputs are mixed-sign but this machine does not support mixed sign kernels,
- // flip the sign so matched-sign kernels can be used.
- if (!flip_signedness && a->data_type() == DataType::QASYMM8 && b->data_type() == DataType::QASYMM8_SIGNED &&
- !bool(CpuGemmAssemblyDispatch::validate(a_to_use, b, c, output, asm_info)))
- {
- flip_signedness = true;
- }
-
if (flip_signedness)
{
+ const int32_t offset_correction = 128;
+ const DataType dt = DataType::QASYMM8_SIGNED;
+ const UniformQuantizationInfo iqinfo = a_to_use->quantization_info().uniform();
+
+ signed_a = a_to_use->clone()->set_data_type(dt).set_quantization_info(
+ QuantizationInfo(iqinfo.scale, iqinfo.offset + offset_correction));
ARM_COMPUTE_RETURN_ON_ERROR(kernels::CpuConvertQuantizedSignednessKernel::validate(a_to_use, &signed_a));
a_to_use = &signed_a;
a_offset = signed_a.quantization_info().uniform().offset;
@@ -436,9 +418,8 @@ Status CpuGemmLowpMatrixMultiplyCore::validate(const ITensorInfo *a,
matrix_a_info = &signed_a;
}
- // Offset kernel is need if offset is non-zero or it may change (i.e. dynamic).
- bool a_offset_kernel_needed = a_offset != 0 || a->quantization_info().is_dynamic();
- bool b_offset_kernel_needed = b_offset != 0 || b->quantization_info().is_dynamic();
+ // Initialize assembly kernel meta-data
+ const AsmGemmInfo asm_info = init_assembly_metadata(info);
// Check if we need to run the optimized assembly kernel
bool run_optimised = false;