From 396cb95774bd7627254e3befec5e34844de701c9 Mon Sep 17 00:00:00 2001 From: Sang-Hoon Park Date: Thu, 26 Mar 2020 14:02:37 +0000 Subject: COMPMID-3284 add utilities for layer normalization of NEON QLSTM Change-Id: Ie98a8c4c30ac7859a989a29cbe7602c1c6fec26b Signed-off-by: Sang-Hoon Park Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2934 Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins Reviewed-by: Georgios Pinitas --- arm_compute/core/NEON/NESymm.h | 24 +++++++++- arm_compute/core/utils/quantization/AsymmHelpers.h | 42 +++++++++++++++++- src/core/utils/quantization/AsymmHelpers.cpp | 51 ++++++++++++++++++++++ 3 files changed, 115 insertions(+), 2 deletions(-) diff --git a/arm_compute/core/NEON/NESymm.h b/arm_compute/core/NEON/NESymm.h index 924840930a..0cc2a963cf 100644 --- a/arm_compute/core/NEON/NESymm.h +++ b/arm_compute/core/NEON/NESymm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -25,6 +25,7 @@ #define ARM_COMPUTE_NESYMM_H #include "arm_compute/core/NEON/NEMath.h" +#include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include namespace arm_compute @@ -230,5 +231,26 @@ inline qsymm16x8x2_t vquantize_qsymm16(const float32x4x4_t &qv, const UniformQua return res; } +/** Multiply a neon vector using quantized multiplier and shift + * + * @param[in] input Input vector to mutiply values to be quantized. + * @param[in] qmul Quantized multipler + * @param[in] shift Left bit shift + * + * @return A neon vector holding the multiplied value + */ +inline int32x4x2_t multiply_by_quantized_multipler_2row(int32x4x2_t input, int32_t qmul, int32_t shift) +{ + const auto left_shift = shift > 0 ? shift : 0; + const auto right_shift = shift > 0 ? 0 : -shift; + const auto one_shifted = 1 << left_shift; + + int32x4x2_t result; + result.val[0] = rounding_divide_by_pow2(vqrdmulhq_n_s32(vmulq_n_s32(input.val[0], one_shifted), qmul), right_shift); + result.val[1] = rounding_divide_by_pow2(vqrdmulhq_n_s32(vmulq_n_s32(input.val[1], one_shifted), qmul), right_shift); + + return result; +} + } // namespace arm_compute #endif // ARM_COMPUTE_NESYMM_H diff --git a/arm_compute/core/utils/quantization/AsymmHelpers.h b/arm_compute/core/utils/quantization/AsymmHelpers.h index 94876fb02f..0f0ec72b60 100644 --- a/arm_compute/core/utils/quantization/AsymmHelpers.h +++ b/arm_compute/core/utils/quantization/AsymmHelpers.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -99,6 +99,46 @@ void compute_quantized_multipliers_and_shifts(const ITensorInfo *input, unsigned int idx_ofms, int32_t *output_multipliers_ptr, int32_t *output_shifts_ptr); + +/** Round to the nearest division by a power-of-two using exponent, copied from NEMath + * + * @note This function calculates the following expression: (x + 2^n -1 ) / 2^n where n = exponent + * + * @param[in] x Element to divide. + * @param[in] exponent Integer value used to round to nearest division by a power-of-two + * + * @return the nearest division by a power-of-two using exponent + */ +int32_t rounding_divide_by_pow2(int32_t x, int exponent); + +/** Compute multiplication of two integers + * + * @param[in] a One integer to multiply + * @param[in] b Another integer to multiply + * + * @return The multiplied value + */ +int32_t saturating_rounding_doubling_highmul(int32_t a, int32_t b); + +/** Compute the value multiplied by given quantized multiplier and shift + * + * @param[in] input Target value to multiply. + * @param[in] qmul Quantized multipler + * @param[in] shift Left bit shift + * + * @return The multiplied value + */ +int32_t multiply_by_quantized_multipler(int32_t input, int32_t qmul, int32_t shift); + +/** Compute the value multiplied the power-of-two + * + * @param[in] exponent Exponent used to calculate power-of-two + * @param[in] v Target value to multiply + * + * @return The multiplied value + */ +int32_t saturating_rounding_multiply_by_pow2(int exponent, int32_t v); + } // namespace quantization } // namespace arm_compute #endif /* ARM_COMPUTE_IO_FILE_HANDLER_H */ diff --git a/src/core/utils/quantization/AsymmHelpers.cpp b/src/core/utils/quantization/AsymmHelpers.cpp index e1ba6413b4..c5eef9dd77 100644 --- a/src/core/utils/quantization/AsymmHelpers.cpp +++ b/src/core/utils/quantization/AsymmHelpers.cpp @@ -196,5 +196,56 @@ void compute_quantized_multipliers_and_shifts(const ITensorInfo *input, output_shifts_ptr[i] = output_shift; } } + +int32_t saturating_rounding_doubling_highmul(int32_t a, int32_t b) +{ + bool overflow = a == b && a == std::numeric_limits::min(); + int64_t a_64(a); + int64_t b_64(b); + int64_t ab_64 = a_64 * b_64; + int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30)); + int32_t ab_x2_high32 = static_cast((ab_64 + nudge) / (1ll << 31)); + return overflow ? std::numeric_limits::max() : ab_x2_high32; +} + +inline int32_t rounding_divide_by_pow2(int32_t x, int exponent) +{ + const int32_t mask = (1 << exponent) - 1; + const int32_t threshold = (mask >> 1) + (x < 0 ? 1 : 0); + return (x >> exponent) + ((x & mask) > threshold ? 1 : 0); +} + +int32_t multiply_by_quantized_multipler(int32_t input, int32_t qmul, int32_t shift) +{ + const auto left_shift = shift > 0 ? shift : 0; + const auto right_shift = shift > 0 ? 0 : -shift; + return rounding_divide_by_pow2(saturating_rounding_doubling_highmul(input * (1 << left_shift), qmul), right_shift); +} + +int32_t saturating_rounding_multiply_by_pow2(int32_t exponent, int32_t v) +{ + if(exponent == 0) + { + return v; + } + else if(exponent < 0) + { + return rounding_divide_by_pow2(v, -exponent); + } + else + { + constexpr auto min = std::numeric_limits::min(); + constexpr auto max = std::numeric_limits::max(); + const auto width = sizeof(int32_t) * 8; + + const int32_t threshold = ((1 << (width - 1 - exponent)) - 1); + bool pos_mask = v > threshold; + bool neg_mask = v < -threshold; + int32_t result = v << exponent; + result = pos_mask ? max : result; + result = neg_mask ? min : result; + return result; + } +} } // quantization } // arm_compute -- cgit v1.2.1