From f450caa7d2ac9a2a90407fb81203228dc82ef4a1 Mon Sep 17 00:00:00 2001 From: Chunosov Date: Wed, 8 Nov 2017 16:09:35 +0700 Subject: COMPMID-661: softmax-uint8 implementation (#16) Change-Id: Iad11ce70a8a0878a48e445a092035c49c926cece Reviewed-on: http://mpd-gerrit.cambridge.arm.com/94855 Tested-by: Kaizen Reviewed-by: Anthony Barbier --- src/core/utils/quantization/AsymmHelpers.cpp | 34 ++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 5 deletions(-) (limited to 'src/core/utils') diff --git a/src/core/utils/quantization/AsymmHelpers.cpp b/src/core/utils/quantization/AsymmHelpers.cpp index 4ba5f44efa..848ee566f0 100644 --- a/src/core/utils/quantization/AsymmHelpers.cpp +++ b/src/core/utils/quantization/AsymmHelpers.cpp @@ -29,6 +29,8 @@ using namespace arm_compute::quantization; +constexpr int64_t fixed_point_one_Q0 = (1ll << 31); + arm_compute::Error arm_compute::quantization::calculate_quantized_multiplier_less_than_one(double multiplier, int *quant_multiplier, int *right_shift) @@ -45,16 +47,38 @@ arm_compute::Error arm_compute::quantization::calculate_quantized_multiplier_les } const double q = std::frexp(multiplier, right_shift); *right_shift *= -1; - auto q_fixed = static_cast(round(q * (1ll << 31))); - ARM_COMPUTE_RETURN_ERROR_ON(q_fixed > (1ll << 31)); - if(q_fixed == (1ll << 31)) + auto q_fixed = static_cast(round(q * fixed_point_one_Q0)); + ARM_COMPUTE_RETURN_ERROR_ON(q_fixed > fixed_point_one_Q0); + if(q_fixed == fixed_point_one_Q0) { q_fixed /= 2; --*right_shift; } ARM_COMPUTE_RETURN_ERROR_ON(*right_shift < 0); ARM_COMPUTE_RETURN_ERROR_ON(q_fixed > std::numeric_limits::max()); - *quant_multiplier = static_cast(q_fixed); + *quant_multiplier = static_cast(q_fixed); + + return arm_compute::Error{}; +} + +arm_compute::Error arm_compute::quantization::calculate_quantized_multiplier_greater_than_one(double multiplier, + int *quantized_multiplier, + int *left_shift) +{ + ARM_COMPUTE_RETURN_ERROR_ON(quantized_multiplier == nullptr); + ARM_COMPUTE_RETURN_ERROR_ON(left_shift == nullptr); + ARM_COMPUTE_RETURN_ERROR_ON(multiplier < 1.f); + const double q = std::frexp(multiplier, left_shift); + auto q_fixed = static_cast(round(q * fixed_point_one_Q0)); + ARM_COMPUTE_RETURN_ERROR_ON(q_fixed > fixed_point_one_Q0); + if(q_fixed == fixed_point_one_Q0) + { + q_fixed /= 2; + ++*left_shift; + } + ARM_COMPUTE_RETURN_ERROR_ON(*left_shift < 0); + ARM_COMPUTE_RETURN_ERROR_ON(q_fixed > std::numeric_limits::max()); + *quantized_multiplier = static_cast(q_fixed); return arm_compute::Error{}; -} \ No newline at end of file +} -- cgit v1.2.1