diff options
author | Manuel Bottini <manuel.bottini@arm.com> | 2019-10-17 18:37:26 +0100 |
---|---|---|
committer | Pablo Marquez <pablo.tello@arm.com> | 2019-10-24 09:11:40 +0000 |
commit | 07263980e66059a91ce57612e4ca8f4b2a2a206a (patch) | |
tree | 138dc3ecf835df9f38a60959379a52eca08f8b0f /src | |
parent | 05069f07bcf95676597698a79926327555276362 (diff) | |
download | ComputeLibrary-07263980e66059a91ce57612e4ca8f4b2a2a206a.tar.gz |
COMPMID-2501: Support multiplier > 1 during QASYMM8 requantization for Quantized LSTM
Change-Id: I7eddbdf77881f313b707b9e59428245f1330a2cf
Signed-off-by: Manuel Bottini <manuel.bottini@arm.com>
Reviewed-on: https://review.mlplatform.org/c/2119
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Pablo Marquez <pablo.tello@arm.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/core/CL/cl_kernels/gemmlowp.cl | 4 | ||||
-rw-r--r-- | src/core/utils/quantization/AsymmHelpers.cpp | 14 | ||||
-rw-r--r-- | src/runtime/CL/functions/CLLSTMLayerQuantized.cpp | 12 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NELSTMLayerQuantized.cpp | 12 |
4 files changed, 30 insertions, 12 deletions
diff --git a/src/core/CL/cl_kernels/gemmlowp.cl b/src/core/CL/cl_kernels/gemmlowp.cl index fc90dbd16c..214c7a4825 100644 --- a/src/core/CL/cl_kernels/gemmlowp.cl +++ b/src/core/CL/cl_kernels/gemmlowp.cl @@ -1888,7 +1888,11 @@ __kernel void gemmlowp_output_stage_quantize_down_fixedpoint_qsymm16(TENSOR3D_DE #endif // defined(ADD_BIAS) // Multiply by result_mult_int and shift +#if RESULT_SHIFT < 0 + input_values = ASYMM_MULT(input_values * (1 << (-RESULT_SHIFT)), RESULT_FIXEDPOINT_MULTIPLIER, 4); +#else // RESULT_SHIFT >= 0 input_values = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(input_values, RESULT_FIXEDPOINT_MULTIPLIER, RESULT_SHIFT, 4); +#endif // RESULT_SHIFT < 0 short4 res = convert_short4_sat(input_values); diff --git a/src/core/utils/quantization/AsymmHelpers.cpp b/src/core/utils/quantization/AsymmHelpers.cpp index 59052449af..42bd84db47 100644 --- a/src/core/utils/quantization/AsymmHelpers.cpp +++ b/src/core/utils/quantization/AsymmHelpers.cpp @@ -34,6 +34,20 @@ namespace quantization constexpr int64_t fixed_point_one_Q0 = (1LL << 31); constexpr float epsilon = 0.00001f; +Status calculate_quantized_multiplier(float multiplier, int *quant_multiplier, int *shift) +{ + if(multiplier > 1.f) + { + Status status = calculate_quantized_multiplier_greater_than_one(multiplier, quant_multiplier, shift); + *shift *= -1; + return status; + } + else + { + return calculate_quantized_multiplier_less_than_one(multiplier, quant_multiplier, shift); + } +} + Status calculate_quantized_multiplier_less_than_one(float multiplier, int *quant_multiplier, int *right_shift) diff --git a/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp b/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp index 4e6df1d1cb..e5f127825b 100644 --- a/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp +++ b/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp @@ -159,8 +159,7 @@ void CLLSTMLayerQuantized::configure(const ICLTensor *input, const float multiplier = 4096.f * qasymm.uniform().scale * qweights.uniform().scale; int output_multiplier = 0; int output_shift = 0; - - quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); + quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift); _memory_group.manage(&_output_lowp); _output_stage.configure(&_output_highp, &_bias, &_output_lowp, output_multiplier, output_shift); @@ -361,12 +360,13 @@ Status CLLSTMLayerQuantized::validate(const ITensorInfo *input, input_concatenated.set_quantization_info(QuantizationInfo(qasymm.uniform().scale, qasymm.uniform().offset)); weights_transposed.set_quantization_info(QuantizationInfo(qweights.uniform().scale, qweights.uniform().offset)); - // multiplier = (input_scale * weights_scale) / output_scale (2 ^ (-12)) const TensorInfo output_lowp(output_highp.tensor_shape(), 1, DataType::QSYMM16, qsymm_3); - const float multiplier = 4096.f * qasymm.uniform().scale * qweights.uniform().scale; - ARM_COMPUTE_UNUSED(multiplier); - ARM_COMPUTE_RETURN_ERROR_ON(multiplier > 1.0f); + const float multiplier = 4096.f * qasymm.uniform().scale * qweights.uniform().scale; + int output_multiplier = 0; + int output_shift = 0; + ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift)); + // _output_stage ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint::validate(&output_highp, &bias_concatenated, &output_lowp)); diff --git a/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp b/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp index e325619ae4..cfd996b538 100644 --- a/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp +++ b/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp @@ -138,8 +138,7 @@ void NELSTMLayerQuantized::configure(const ITensor *input, const float multiplier = 4096.f * qasymm.uniform().scale * qweights.uniform().scale; int output_multiplier = 0; int output_shift = 0; - - quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); + quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift); _memory_group.manage(&_output_lowp); _output_stage.configure(&_output_highp, &_bias, &_output_lowp, output_multiplier, output_shift); @@ -340,12 +339,13 @@ Status NELSTMLayerQuantized::validate(const ITensorInfo *input, input_concatenated.set_quantization_info(QuantizationInfo(qasymm.uniform().scale, qasymm.uniform().offset)); weights_transposed.set_quantization_info(QuantizationInfo(qweights.uniform().scale, qweights.uniform().offset)); - // multiplier = (input_scale * weights_scale) / output_scale (2 ^ (-12)) const TensorInfo output_lowp(output_highp.tensor_shape(), 1, DataType::QSYMM16, qsymm_3); - const float multiplier = 4096.f * qasymm.uniform().scale * qweights.uniform().scale; - ARM_COMPUTE_UNUSED(multiplier); - ARM_COMPUTE_RETURN_ERROR_ON(multiplier > 1.0f); + const float multiplier = 4096.f * qasymm.uniform().scale * qweights.uniform().scale; + int output_multiplier = 0; + int output_shift = 0; + ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift)); + // _output_stage ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint::validate(&output_highp, &bias_concatenated, &output_lowp)); |