aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/core/CL/cl_kernels/gemmlowp.cl4
-rw-r--r--src/core/utils/quantization/AsymmHelpers.cpp14
-rw-r--r--src/runtime/CL/functions/CLLSTMLayerQuantized.cpp12
-rw-r--r--src/runtime/NEON/functions/NELSTMLayerQuantized.cpp12
4 files changed, 30 insertions, 12 deletions
diff --git a/src/core/CL/cl_kernels/gemmlowp.cl b/src/core/CL/cl_kernels/gemmlowp.cl
index fc90dbd16c..214c7a4825 100644
--- a/src/core/CL/cl_kernels/gemmlowp.cl
+++ b/src/core/CL/cl_kernels/gemmlowp.cl
@@ -1888,7 +1888,11 @@ __kernel void gemmlowp_output_stage_quantize_down_fixedpoint_qsymm16(TENSOR3D_DE
#endif // defined(ADD_BIAS)
// Multiply by result_mult_int and shift
+#if RESULT_SHIFT < 0
+ input_values = ASYMM_MULT(input_values * (1 << (-RESULT_SHIFT)), RESULT_FIXEDPOINT_MULTIPLIER, 4);
+#else // RESULT_SHIFT >= 0
input_values = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(input_values, RESULT_FIXEDPOINT_MULTIPLIER, RESULT_SHIFT, 4);
+#endif // RESULT_SHIFT < 0
short4 res = convert_short4_sat(input_values);
diff --git a/src/core/utils/quantization/AsymmHelpers.cpp b/src/core/utils/quantization/AsymmHelpers.cpp
index 59052449af..42bd84db47 100644
--- a/src/core/utils/quantization/AsymmHelpers.cpp
+++ b/src/core/utils/quantization/AsymmHelpers.cpp
@@ -34,6 +34,20 @@ namespace quantization
constexpr int64_t fixed_point_one_Q0 = (1LL << 31);
constexpr float epsilon = 0.00001f;
+Status calculate_quantized_multiplier(float multiplier, int *quant_multiplier, int *shift)
+{
+ if(multiplier > 1.f)
+ {
+ Status status = calculate_quantized_multiplier_greater_than_one(multiplier, quant_multiplier, shift);
+ *shift *= -1;
+ return status;
+ }
+ else
+ {
+ return calculate_quantized_multiplier_less_than_one(multiplier, quant_multiplier, shift);
+ }
+}
+
Status calculate_quantized_multiplier_less_than_one(float multiplier,
int *quant_multiplier,
int *right_shift)
diff --git a/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp b/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp
index 4e6df1d1cb..e5f127825b 100644
--- a/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp
+++ b/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp
@@ -159,8 +159,7 @@ void CLLSTMLayerQuantized::configure(const ICLTensor *input,
const float multiplier = 4096.f * qasymm.uniform().scale * qweights.uniform().scale;
int output_multiplier = 0;
int output_shift = 0;
-
- quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
+ quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift);
_memory_group.manage(&_output_lowp);
_output_stage.configure(&_output_highp, &_bias, &_output_lowp, output_multiplier, output_shift);
@@ -361,12 +360,13 @@ Status CLLSTMLayerQuantized::validate(const ITensorInfo *input,
input_concatenated.set_quantization_info(QuantizationInfo(qasymm.uniform().scale, qasymm.uniform().offset));
weights_transposed.set_quantization_info(QuantizationInfo(qweights.uniform().scale, qweights.uniform().offset));
- // multiplier = (input_scale * weights_scale) / output_scale (2 ^ (-12))
const TensorInfo output_lowp(output_highp.tensor_shape(), 1, DataType::QSYMM16, qsymm_3);
- const float multiplier = 4096.f * qasymm.uniform().scale * qweights.uniform().scale;
- ARM_COMPUTE_UNUSED(multiplier);
- ARM_COMPUTE_RETURN_ERROR_ON(multiplier > 1.0f);
+ const float multiplier = 4096.f * qasymm.uniform().scale * qweights.uniform().scale;
+ int output_multiplier = 0;
+ int output_shift = 0;
+ ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift));
+
// _output_stage
ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint::validate(&output_highp, &bias_concatenated, &output_lowp));
diff --git a/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp b/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp
index e325619ae4..cfd996b538 100644
--- a/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp
+++ b/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp
@@ -138,8 +138,7 @@ void NELSTMLayerQuantized::configure(const ITensor *input,
const float multiplier = 4096.f * qasymm.uniform().scale * qweights.uniform().scale;
int output_multiplier = 0;
int output_shift = 0;
-
- quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
+ quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift);
_memory_group.manage(&_output_lowp);
_output_stage.configure(&_output_highp, &_bias, &_output_lowp, output_multiplier, output_shift);
@@ -340,12 +339,13 @@ Status NELSTMLayerQuantized::validate(const ITensorInfo *input,
input_concatenated.set_quantization_info(QuantizationInfo(qasymm.uniform().scale, qasymm.uniform().offset));
weights_transposed.set_quantization_info(QuantizationInfo(qweights.uniform().scale, qweights.uniform().offset));
- // multiplier = (input_scale * weights_scale) / output_scale (2 ^ (-12))
const TensorInfo output_lowp(output_highp.tensor_shape(), 1, DataType::QSYMM16, qsymm_3);
- const float multiplier = 4096.f * qasymm.uniform().scale * qweights.uniform().scale;
- ARM_COMPUTE_UNUSED(multiplier);
- ARM_COMPUTE_RETURN_ERROR_ON(multiplier > 1.0f);
+ const float multiplier = 4096.f * qasymm.uniform().scale * qweights.uniform().scale;
+ int output_multiplier = 0;
+ int output_shift = 0;
+ ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift));
+
// _output_stage
ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint::validate(&output_highp, &bias_concatenated, &output_lowp));