From d66094e37ecd747e85f30130e1a678bdbaf30788 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Mon, 15 Apr 2019 15:44:17 +0100 Subject: COMPMID-1995: Fix NEPoolingLayer for quantized 3x3 Quantized 3x3 pooling layer on NEON did not support different quantization information for the input and output. Change-Id: I38f8da6ec91c91ba37a21d9d0e1a14fd5bb99f86 Signed-off-by: Georgios Pinitas Reviewed-on: https://review.mlplatform.org/c/992 Reviewed-by: Isabella Gottardi Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- arm_compute/core/NEON/NEAsymm.h | 60 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 55 insertions(+), 5 deletions(-) (limited to 'arm_compute') diff --git a/arm_compute/core/NEON/NEAsymm.h b/arm_compute/core/NEON/NEAsymm.h index f71626182c..253d0fdff7 100644 --- a/arm_compute/core/NEON/NEAsymm.h +++ b/arm_compute/core/NEON/NEAsymm.h @@ -175,10 +175,33 @@ inline uint8_t finalize_quantization(int32_t in_value, int result_fixedpoint_mul return out_u8; } +/** Dequantize a neon vector holding 8 quantized values. + * + * @param[in] qv Input values to be dequantized. + * @param[in] qi Quantization information to be used in the computation. + * + * @return Dequantized values in a neon vector + */ +inline float32x4x2_t vdequantize(const uint8x8_t &qv, const QuantizationInfo &qi) +{ + const float scale = qi.scale; + const int offset = qi.offset; + const int32x4_t voffset = vdupq_n_s32(offset); + const float32x4_t vscale = vdupq_n_f32(scale); + const float32x4x2_t vdequantized_input = + { + { + vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(qv)))), voffset)), vscale), + vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(qv)))), voffset)), vscale), + } + }; + return vdequantized_input; +} + /** Dequantize a neon vector holding 16 quantized values. * - * @param qv Input values to be dequantized. - * @param qi Quantization information to be used in the computation. + * @param[in] qv Input values to be dequantized. + * @param[in] qi Quantization information to be used in the computation. * * @return Dequantized values in a neon vector */ @@ -200,10 +223,38 @@ inline float32x4x4_t vdequantize(const uint8x16_t &qv, const QuantizationInfo &q return vdequantized_input; } +/** Quantize a neon vector holding 8 floating point values. + * + * @param[in] qv Input values to be quantized. + * @param[in] qi Quantization information to be used in the computation. + * + * @return A neon vector holding the quantized values + */ +inline uint8x8_t vquantize(const float32x4x2_t &qv, const QuantizationInfo &qi) +{ + const float scale = qi.scale; + const int offset = qi.offset; + const float32x4_t voffset = vdupq_n_f32(offset); + const float32x4_t vinvscale = vdupq_n_f32(1.f / scale); + const int32x4x4_t rf = + { + { +#ifdef __aarch64__ + vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)), + vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)), +#else //__aarch64__ + vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)), + vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)), +#endif //__aarch64__ + } + }; + return vqmovun_s16(vcombine_s16(vqmovn_s32(rf.val[0]), vqmovn_s32(rf.val[1]))); +} + /** Quantize a neon vector holding 16 floating point values. * - * @param qv Input values to be quantized. - * @param qi Quantization information to be used in the computation. + * @param[in] qv Input values to be quantized. + * @param[in] qi Quantization information to be used in the computation. * * @return A neon vector holding the quantized values */ @@ -233,7 +284,6 @@ inline uint8x16_t vquantize(const float32x4x4_t &qv, const QuantizationInfo &qi) const uint8x8_t pb = vqmovun_s16(vcombine_s16(vqmovn_s32(rf.val[2]), vqmovn_s32(rf.val[3]))); return vcombine_u8(pa, pb); } - } // namespace arm_compute #include "arm_compute/core/NEON/NEAsymm.inl" #endif // __ARM_COMPUTE_NEASYMM_H__ -- cgit v1.2.1