diff options
author | Chunosov <N.Chunosov@yandex.ru> | 2017-11-08 16:09:35 +0700 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:35:24 +0000 |
commit | f450caa7d2ac9a2a90407fb81203228dc82ef4a1 (patch) | |
tree | ed34d43943cd36cbd6776ddc6ac87e92d6f7dcc0 /src/core/utils | |
parent | 7068f9900d136312318ff430aef588b14e0c87ad (diff) | |
download | ComputeLibrary-f450caa7d2ac9a2a90407fb81203228dc82ef4a1.tar.gz |
COMPMID-661: softmax-uint8 implementation (#16)
Change-Id: Iad11ce70a8a0878a48e445a092035c49c926cece
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/94855
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/utils')
-rw-r--r-- | src/core/utils/quantization/AsymmHelpers.cpp | 34 |
1 files changed, 29 insertions, 5 deletions
diff --git a/src/core/utils/quantization/AsymmHelpers.cpp b/src/core/utils/quantization/AsymmHelpers.cpp index 4ba5f44efa..848ee566f0 100644 --- a/src/core/utils/quantization/AsymmHelpers.cpp +++ b/src/core/utils/quantization/AsymmHelpers.cpp @@ -29,6 +29,8 @@ using namespace arm_compute::quantization; +constexpr int64_t fixed_point_one_Q0 = (1ll << 31); + arm_compute::Error arm_compute::quantization::calculate_quantized_multiplier_less_than_one(double multiplier, int *quant_multiplier, int *right_shift) @@ -45,16 +47,38 @@ arm_compute::Error arm_compute::quantization::calculate_quantized_multiplier_les } const double q = std::frexp(multiplier, right_shift); *right_shift *= -1; - auto q_fixed = static_cast<int64_t>(round(q * (1ll << 31))); - ARM_COMPUTE_RETURN_ERROR_ON(q_fixed > (1ll << 31)); - if(q_fixed == (1ll << 31)) + auto q_fixed = static_cast<int64_t>(round(q * fixed_point_one_Q0)); + ARM_COMPUTE_RETURN_ERROR_ON(q_fixed > fixed_point_one_Q0); + if(q_fixed == fixed_point_one_Q0) { q_fixed /= 2; --*right_shift; } ARM_COMPUTE_RETURN_ERROR_ON(*right_shift < 0); ARM_COMPUTE_RETURN_ERROR_ON(q_fixed > std::numeric_limits<int32_t>::max()); - *quant_multiplier = static_cast<int>(q_fixed); + *quant_multiplier = static_cast<int32_t>(q_fixed); + + return arm_compute::Error{}; +} + +arm_compute::Error arm_compute::quantization::calculate_quantized_multiplier_greater_than_one(double multiplier, + int *quantized_multiplier, + int *left_shift) +{ + ARM_COMPUTE_RETURN_ERROR_ON(quantized_multiplier == nullptr); + ARM_COMPUTE_RETURN_ERROR_ON(left_shift == nullptr); + ARM_COMPUTE_RETURN_ERROR_ON(multiplier < 1.f); + const double q = std::frexp(multiplier, left_shift); + auto q_fixed = static_cast<int64_t>(round(q * fixed_point_one_Q0)); + ARM_COMPUTE_RETURN_ERROR_ON(q_fixed > fixed_point_one_Q0); + if(q_fixed == fixed_point_one_Q0) + { + q_fixed /= 2; + ++*left_shift; + } + ARM_COMPUTE_RETURN_ERROR_ON(*left_shift < 0); + ARM_COMPUTE_RETURN_ERROR_ON(q_fixed > std::numeric_limits<int32_t>::max()); + *quantized_multiplier = static_cast<int32_t>(q_fixed); return arm_compute::Error{}; -}
\ No newline at end of file +} |