diff options
author | Pablo Marquez Tello <pablo.tello@arm.com> | 2022-11-01 09:32:20 +0000 |
---|---|---|
committer | Pablo Marquez Tello <pablo.tello@arm.com> | 2022-11-14 08:19:09 +0000 |
commit | 96fb194c610d06cff97cef3fa4a55572ea83580e (patch) | |
tree | 892371f5fbf7bec9728bbd058a5112e729b62298 /src/core/CL/cl_kernels/tile_helpers.h | |
parent | fd76611f50b0abe5effbe4b7f7129a745115fdf9 (diff) | |
download | ComputeLibrary-96fb194c610d06cff97cef3fa4a55572ea83580e.tar.gz |
Optimize T_QUANTIZE8_ASYMMETRIC for Maliā¢ G52
* Resolves MLCE-842
Signed-off-by: Pablo Marquez Tello <pablo.tello@arm.com>
Change-Id: Iae0521b25a5e6c9cc8046830f397d523dfbcc66e
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8542
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/tile_helpers.h')
-rw-r--r-- | src/core/CL/cl_kernels/tile_helpers.h | 14 |
1 files changed, 7 insertions, 7 deletions
diff --git a/src/core/CL/cl_kernels/tile_helpers.h b/src/core/CL/cl_kernels/tile_helpers.h index 6279fb4fb6..01d49b5032 100644 --- a/src/core/CL/cl_kernels/tile_helpers.h +++ b/src/core/CL/cl_kernels/tile_helpers.h @@ -855,6 +855,7 @@ LOOP_UNROLLING(int, _n0, 0, 1, N0, \ { \ SRC_DATA_TYPE _tmp = 0; \ + SRC_DATA_TYPE _tmp2 = 0; \ SRC_DATA_TYPE _src = src[_m0].s[_n0]; \ SRC_DATA_TYPE _dst_multiplier = dst_multipliers[0].s[_n0]; \ SRC_DATA_TYPE _dst_shift = dst_shifts[0].s[_n0]; \ @@ -869,12 +870,11 @@ long nudge = select(mask2, mask1, is_positive_or_zero); \ SRC_DATA_TYPE ab_x2_high32 = CONVERT((ab_64 + nudge) / (long)(1ll << 31), SRC_DATA_TYPE); \ _tmp = select(ab_x2_high32, (SRC_DATA_TYPE)INT_MAX, overflow); \ - if(_dst_shift >= 0) \ - { \ - long mask = ((((int)1) << _dst_shift) - (int)1); \ - long threshold = _tmp < (int)0 ? (mask >> 1) + (long)1 : (mask >> 1) + 0; \ - _tmp = (_tmp & mask) > threshold ? (_tmp >> _dst_shift) + (int)1 : (_tmp >> _dst_shift); \ - } \ + long mask = ((((int)1) << _dst_shift) - (int)1); \ + long threshold = (mask >> 1) + any(_tmp); \ + _tmp2 = _tmp >> _dst_shift; \ + _tmp2 += select(0, 1, (_tmp & mask) > threshold); \ + _tmp = select(_tmp, _tmp2, _dst_shift >= 0); \ _tmp += DST_OFFSET; \ dst[_m0].s[_n0] = CONVERT_SAT(_tmp, DST_DATA_TYPE); \ }) \ @@ -1179,4 +1179,4 @@ }) \ }) -#endif // ARM_COMPUTE_TILE_HELPERS_H
\ No newline at end of file +#endif // ARM_COMPUTE_TILE_HELPERS_H |