From 96fb194c610d06cff97cef3fa4a55572ea83580e Mon Sep 17 00:00:00 2001 From: Pablo Marquez Tello Date: Tue, 1 Nov 2022 09:32:20 +0000 Subject: =?UTF-8?q?Optimize=20T=5FQUANTIZE8=5FASYMMETRIC=20for=20Mali?= =?UTF-8?q?=E2=84=A2=20G52?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Resolves MLCE-842 Signed-off-by: Pablo Marquez Tello Change-Id: Iae0521b25a5e6c9cc8046830f397d523dfbcc66e Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8542 Benchmark: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Gunes Bayir Comments-Addressed: Arm Jenkins --- src/core/CL/cl_kernels/tile_helpers.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/core/CL/cl_kernels/tile_helpers.h b/src/core/CL/cl_kernels/tile_helpers.h index 6279fb4fb6..01d49b5032 100644 --- a/src/core/CL/cl_kernels/tile_helpers.h +++ b/src/core/CL/cl_kernels/tile_helpers.h @@ -855,6 +855,7 @@ LOOP_UNROLLING(int, _n0, 0, 1, N0, \ { \ SRC_DATA_TYPE _tmp = 0; \ + SRC_DATA_TYPE _tmp2 = 0; \ SRC_DATA_TYPE _src = src[_m0].s[_n0]; \ SRC_DATA_TYPE _dst_multiplier = dst_multipliers[0].s[_n0]; \ SRC_DATA_TYPE _dst_shift = dst_shifts[0].s[_n0]; \ @@ -869,12 +870,11 @@ long nudge = select(mask2, mask1, is_positive_or_zero); \ SRC_DATA_TYPE ab_x2_high32 = CONVERT((ab_64 + nudge) / (long)(1ll << 31), SRC_DATA_TYPE); \ _tmp = select(ab_x2_high32, (SRC_DATA_TYPE)INT_MAX, overflow); \ - if(_dst_shift >= 0) \ - { \ - long mask = ((((int)1) << _dst_shift) - (int)1); \ - long threshold = _tmp < (int)0 ? (mask >> 1) + (long)1 : (mask >> 1) + 0; \ - _tmp = (_tmp & mask) > threshold ? (_tmp >> _dst_shift) + (int)1 : (_tmp >> _dst_shift); \ - } \ + long mask = ((((int)1) << _dst_shift) - (int)1); \ + long threshold = (mask >> 1) + any(_tmp); \ + _tmp2 = _tmp >> _dst_shift; \ + _tmp2 += select(0, 1, (_tmp & mask) > threshold); \ + _tmp = select(_tmp, _tmp2, _dst_shift >= 0); \ _tmp += DST_OFFSET; \ dst[_m0].s[_n0] = CONVERT_SAT(_tmp, DST_DATA_TYPE); \ }) \ @@ -1179,4 +1179,4 @@ }) \ }) -#endif // ARM_COMPUTE_TILE_HELPERS_H \ No newline at end of file +#endif // ARM_COMPUTE_TILE_HELPERS_H -- cgit v1.2.1