From dbdea0d1c025b18d4d82c278c87454427918f5b4 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Wed, 16 Oct 2019 19:21:40 +0100 Subject: COMPMID-2308: NEConvolutionLayer: support QUANT8_SYMM_PER_CHANNEL filters Change-Id: Ic1bf5f0d21ccd525f84213a360f7e199d7f50577 Signed-off-by: Georgios Pinitas Reviewed-on: https://review.mlplatform.org/c/2177 Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins --- arm_compute/core/NEON/NEAsymm.h | 99 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) (limited to 'arm_compute/core/NEON/NEAsymm.h') diff --git a/arm_compute/core/NEON/NEAsymm.h b/arm_compute/core/NEON/NEAsymm.h index 56d4c09f92..a3bd7e28f0 100644 --- a/arm_compute/core/NEON/NEAsymm.h +++ b/arm_compute/core/NEON/NEAsymm.h @@ -115,6 +115,66 @@ uint8x16_t finalize_quantization(int32x4x4_t &in_s32, return out_u8; } +/** Performs final quantization step on 16 elements for symmetric quantization + * + * @tparam is_bounded_relu Specified if a fused bounded relu should be applied + * + * @param in_s32 Input to be quantized. + * @param result_fixedpoint_multiplier Result multiplier parameter + * @param result_shift Result shift parameter + * @param result_offset_after_shift_s32 Result offset parameter + * @param min_s8 Relu lower bound + * @param max_s8 Relu upper bound + * + * @return Quantized values + */ +template +inline int8x16_t finalize_quantization_symm(int32x4x4_t &in_s32, + const int32x4x4_t &result_fixedpoint_multiplier, + const int32x4x4_t &result_shift, + const int32x4_t &result_offset_after_shift_s32, + const int8x16_t &min_s8, + const int8x16_t &max_s8) +{ + // Fixed point multiplication with vector saturating rounding doubling multiply high with scalar + in_s32.val[0] = vqrdmulhq_s32(in_s32.val[0], result_fixedpoint_multiplier.val[0]); + in_s32.val[1] = vqrdmulhq_s32(in_s32.val[1], result_fixedpoint_multiplier.val[1]); + in_s32.val[2] = vqrdmulhq_s32(in_s32.val[2], result_fixedpoint_multiplier.val[2]); + in_s32.val[3] = vqrdmulhq_s32(in_s32.val[3], result_fixedpoint_multiplier.val[3]); + + // Round to the nearest division by a power-of-two using result_shift_s32 + in_s32.val[0] = rounding_divide_by_pow2(in_s32.val[0], result_shift.val[0]); + in_s32.val[1] = rounding_divide_by_pow2(in_s32.val[1], result_shift.val[1]); + in_s32.val[2] = rounding_divide_by_pow2(in_s32.val[2], result_shift.val[2]); + in_s32.val[3] = rounding_divide_by_pow2(in_s32.val[3], result_shift.val[3]); + + // Add the offset terms + in_s32.val[0] = vaddq_s32(in_s32.val[0], result_offset_after_shift_s32); + in_s32.val[1] = vaddq_s32(in_s32.val[1], result_offset_after_shift_s32); + in_s32.val[2] = vaddq_s32(in_s32.val[2], result_offset_after_shift_s32); + in_s32.val[3] = vaddq_s32(in_s32.val[3], result_offset_after_shift_s32); + + // Convert S32 to S16 + const int16x8x2_t in_s16 = + { + { + vcombine_s16(vqmovn_s32(in_s32.val[0]), vqmovn_s32(in_s32.val[1])), + vcombine_s16(vqmovn_s32(in_s32.val[2]), vqmovn_s32(in_s32.val[3])) + } + }; + + // Convert S16 to S8 + int8x16_t out_s8 = vcombine_s8(vqmovn_s16(in_s16.val[0]), vqmovn_s16(in_s16.val[1])); + + if(is_bounded_relu) + { + out_s8 = vmaxq_s8(out_s8, min_s8); + out_s8 = vminq_s8(out_s8, max_s8); + } + + return out_s8; +} + /** Performs final quantization step on single element * * @tparam is_bounded_relu Specified if a fused bounded relu should be applied @@ -154,6 +214,45 @@ inline uint8_t finalize_quantization(int32_t in_value, int result_fixedpoint_mul return out_u8; } +/** Performs final quantization step on single element + * + * @tparam is_bounded_relu Specified if a fused bounded relu should be applied + * + * @param[in] in_value Input to be quantized. + * @param[in] result_fixedpoint_multiplier Result multiplier parameter + * @param[in] result_shift Result shift parameter + * @param[in] result_offset_after_shift_s32 Result offset parameter + * @param[in] min_s8 Relu lower bound + * @param[in] max_s8 Relu upper bound + * + * @return Quantized value + */ +template +inline int8_t finalize_quantization(int32_t in_value, int result_fixedpoint_multiplier, + int32_t result_shift, int32_t result_offset_after_shift_s32, + int8_t min_s8, int8_t max_s8) +{ + int32x4_t in_s32 = vdupq_n_s32(in_value); + + // Fixed point multiplication with vector saturating rounding doubling multiply high with scalar + in_value = vgetq_lane_s32(vqrdmulhq_n_s32(in_s32, result_fixedpoint_multiplier), 0); + + // Shift value by result_shift_s32 + in_value = rounding_divide_by_pow2(in_value, result_shift); + + // Add the offset term + in_value += result_offset_after_shift_s32; + + // Bound the result + int8_t out_s8 = static_cast(std::max(-128, std::min(127, in_value))); + if(is_bounded_relu) + { + out_s8 = static_cast(std::max(min_s8, std::min(max_s8, out_s8))); + } + + return out_s8; +} + /** Dequantize a neon vector holding 8 quantized values. * * @param[in] qv Input values to be dequantized. -- cgit v1.2.1