From ecaa10a5594b58f14f5962cdda71d9313d4f3aa8 Mon Sep 17 00:00:00 2001 From: Mohammed Suhail Munshi Date: Thu, 9 Feb 2023 11:52:06 +0000 Subject: =?UTF-8?q?Fix=20Intermittent=20Neon=E2=84=A2=20ReduceMean=20QASYM?= =?UTF-8?q?M8=20Mismatch?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Dividing scale by number of elements causes accuracy loss due to limitations in float datatype and truncation to int - Adds rounding after division on aarch64 to negate this. Resolves: [COMPMID-5839] Signed-off-by: Mohammed Suhail Munshi Change-Id: I54ef0f7e56f39da1fa5f30378f551b5ca419a61d Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/c/VisualCompute/ComputeLibrary/+/492456 Tested-by: bsgcomp Comments-Addressed: bsgcomp Reviewed-by: Viet-Hoa Do Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9110 Reviewed-by: Gunes Bayir Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- .../NEON/kernels/NEReductionOperationKernel.cpp | 28 +++++++++++++++------- 1 file changed, 20 insertions(+), 8 deletions(-) (limited to 'src/core/NEON/kernels/NEReductionOperationKernel.cpp') diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.cpp b/src/core/NEON/kernels/NEReductionOperationKernel.cpp index e0f43ab176..19955af493 100644 --- a/src/core/NEON/kernels/NEReductionOperationKernel.cpp +++ b/src/core/NEON/kernels/NEReductionOperationKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2022 Arm Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -625,13 +625,13 @@ struct RedOpX_quantized Iterator input(in, in_win_no_pad); Iterator output(out, out_window); - const float in_offset = static_cast(iq_info.offset); + const auto in_offset = static_cast(iq_info.offset); const float in_scale = iq_info.scale; - const float out_offset = static_cast(oq_info.offset); + const auto out_offset = static_cast(oq_info.offset); const float out_scale = oq_info.scale; - const float num_elements = static_cast(in_info.dimension(0)); + const auto num_elements = static_cast(in_info.dimension(0)); const float A = in_scale / (out_scale * num_elements); const float B = out_offset - (in_scale * in_offset) / (out_scale); @@ -1382,10 +1382,17 @@ struct RedOpYZW_quantized vec_res_value3_f = wrapper::vmla(vec_B, wrapper::vcvt(vec_res_value3), vec_A); vec_res_value4_f = wrapper::vmla(vec_B, wrapper::vcvt(vec_res_value4), vec_A); - vec_res_value1 = wrapper::vcvt(vec_res_value1_f); - vec_res_value2 = wrapper::vcvt(vec_res_value2_f); - vec_res_value3 = wrapper::vcvt(vec_res_value3_f); - vec_res_value4 = wrapper::vcvt(vec_res_value4_f); +#ifdef __aarch64__ + vec_res_value1 = wrapper::vcvta(vec_res_value1_f); + vec_res_value2 = wrapper::vcvta(vec_res_value2_f); + vec_res_value3 = wrapper::vcvta(vec_res_value3_f); + vec_res_value4 = wrapper::vcvta(vec_res_value4_f); +#else // defined(__aarch64__) + vec_res_value1 = wrapper::vcvt(vec_res_value1_f); + vec_res_value2 = wrapper::vcvt(vec_res_value2_f); + vec_res_value3 = wrapper::vcvt(vec_res_value3_f); + vec_res_value4 = wrapper::vcvt(vec_res_value4_f); +#endif // __aarch64__ const auto temp16x8t_1 = wrapper::vcombine(wrapper::vqmovn(vec_res_value1), wrapper::vqmovn(vec_res_value2)); const auto temp16x8t_2 = wrapper::vcombine(wrapper::vqmovn(vec_res_value3), wrapper::vqmovn(vec_res_value4)); @@ -1521,7 +1528,12 @@ struct RedOpYZW_quantized { case ReductionOperation::MEAN_SUM: { + // Apply previously calculated coefficients (with rounding on aarch64) +#ifdef __aarch64__ + const int32_t res = arm_compute::support::cpp11::round(A * (static_cast(res_value_q)) + B); +#else // defined(__aarch64__) const int32_t res = A * (static_cast(res_value_q)) + B; +#endif // __aarch64__ *reinterpret_cast(output.ptr() + x) = utils::cast::saturate_cast(res); break; } -- cgit v1.2.1