From 48f120c64c21d983318c6e65f6d5609a8f8e92e6 Mon Sep 17 00:00:00 2001 From: Jonathan Deakin Date: Wed, 8 May 2024 09:20:50 +0000 Subject: Make quantization rounding consistent In NEQuantizeLayer for QASYMM8_SIGNED, the rounding was inconsistent between the unrolled loop and the leftover loop, which meant identical values (e.g. 0.5) at different indices of a Tensor could round to different values (0 or 1 in this case). We have changed vcvtaq to vcvtnq to round to the nearest, with ties to even. This matches the default fegetround setting, so it is a sensible default. Relates-to: COMPMID-6994 Signed-off-by: Jonathan Deakin Change-Id: I8e7ecb1b8dbdd3e887697a92046af99ed33fc78f Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11532 Reviewed-by: Gunes Bayir Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins Tested-by: Arm Jenkins --- src/core/NEON/NEAsymm.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/core/NEON/NEAsymm.h b/src/core/NEON/NEAsymm.h index 5f4d08d0f6..b93e64a0ef 100644 --- a/src/core/NEON/NEAsymm.h +++ b/src/core/NEON/NEAsymm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020, 2023 Arm Limited. + * Copyright (c) 2017-2020, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_NEASYMM_H -#define ARM_COMPUTE_NEASYMM_H +#ifndef ACL_SRC_CORE_NEON_NEASYMM_H +#define ACL_SRC_CORE_NEON_NEASYMM_H #include "src/core/NEON/NEMath.h" #include "src/core/NEON/wrapper/intrinsics/intrinsics.h" @@ -637,10 +637,10 @@ inline int32x4x4_t vquantize_internal(const float32x4x4_t &qv, float scale, int3 const float32x4_t vinvscale = vdupq_n_f32(1.f / scale); const int32x4x4_t rf = {{ #ifdef __aarch64__ - vaddq_s32(vcvtaq_s32_f32(vmulq_f32(qv.val[0], vinvscale)), voffset), - vaddq_s32(vcvtaq_s32_f32(vmulq_f32(qv.val[1], vinvscale)), voffset), - vaddq_s32(vcvtaq_s32_f32(vmulq_f32(qv.val[2], vinvscale)), voffset), - vaddq_s32(vcvtaq_s32_f32(vmulq_f32(qv.val[3], vinvscale)), voffset), + vaddq_s32(vcvtnq_s32_f32(vmulq_f32(qv.val[0], vinvscale)), voffset), + vaddq_s32(vcvtnq_s32_f32(vmulq_f32(qv.val[1], vinvscale)), voffset), + vaddq_s32(vcvtnq_s32_f32(vmulq_f32(qv.val[2], vinvscale)), voffset), + vaddq_s32(vcvtnq_s32_f32(vmulq_f32(qv.val[3], vinvscale)), voffset), #else //__aarch64__ vaddq_s32(vcvtq_s32_f32(vmulq_f32(qv.val[0], vinvscale)), voffset), vaddq_s32(vcvtq_s32_f32(vmulq_f32(qv.val[1], vinvscale)), voffset), @@ -698,4 +698,4 @@ inline uint16x8x2_t vquantize_qasymm16(const float32x4x4_t &qv, const UniformQua } // namespace arm_compute #include "src/core/NEON/NEAsymm.inl" -#endif // ARM_COMPUTE_NEASYMM_H +#endif // ACL_SRC_CORE_NEON_NEASYMM_H -- cgit v1.2.1