aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPablo Marquez Tello <pablo.tello@arm.com>2022-11-01 09:32:20 +0000
committerPablo Marquez Tello <pablo.tello@arm.com>2022-11-14 08:19:09 +0000
commit96fb194c610d06cff97cef3fa4a55572ea83580e (patch)
tree892371f5fbf7bec9728bbd058a5112e729b62298
parentfd76611f50b0abe5effbe4b7f7129a745115fdf9 (diff)
downloadComputeLibrary-96fb194c610d06cff97cef3fa4a55572ea83580e.tar.gz
Optimize T_QUANTIZE8_ASYMMETRIC for Maliā„¢ G52
* Resolves MLCE-842 Signed-off-by: Pablo Marquez Tello <pablo.tello@arm.com> Change-Id: Iae0521b25a5e6c9cc8046830f397d523dfbcc66e Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8542 Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/core/CL/cl_kernels/tile_helpers.h14
1 files changed, 7 insertions, 7 deletions
diff --git a/src/core/CL/cl_kernels/tile_helpers.h b/src/core/CL/cl_kernels/tile_helpers.h
index 6279fb4fb6..01d49b5032 100644
--- a/src/core/CL/cl_kernels/tile_helpers.h
+++ b/src/core/CL/cl_kernels/tile_helpers.h
@@ -855,6 +855,7 @@
LOOP_UNROLLING(int, _n0, 0, 1, N0, \
{ \
SRC_DATA_TYPE _tmp = 0; \
+ SRC_DATA_TYPE _tmp2 = 0; \
SRC_DATA_TYPE _src = src[_m0].s[_n0]; \
SRC_DATA_TYPE _dst_multiplier = dst_multipliers[0].s[_n0]; \
SRC_DATA_TYPE _dst_shift = dst_shifts[0].s[_n0]; \
@@ -869,12 +870,11 @@
long nudge = select(mask2, mask1, is_positive_or_zero); \
SRC_DATA_TYPE ab_x2_high32 = CONVERT((ab_64 + nudge) / (long)(1ll << 31), SRC_DATA_TYPE); \
_tmp = select(ab_x2_high32, (SRC_DATA_TYPE)INT_MAX, overflow); \
- if(_dst_shift >= 0) \
- { \
- long mask = ((((int)1) << _dst_shift) - (int)1); \
- long threshold = _tmp < (int)0 ? (mask >> 1) + (long)1 : (mask >> 1) + 0; \
- _tmp = (_tmp & mask) > threshold ? (_tmp >> _dst_shift) + (int)1 : (_tmp >> _dst_shift); \
- } \
+ long mask = ((((int)1) << _dst_shift) - (int)1); \
+ long threshold = (mask >> 1) + any(_tmp); \
+ _tmp2 = _tmp >> _dst_shift; \
+ _tmp2 += select(0, 1, (_tmp & mask) > threshold); \
+ _tmp = select(_tmp, _tmp2, _dst_shift >= 0); \
_tmp += DST_OFFSET; \
dst[_m0].s[_n0] = CONVERT_SAT(_tmp, DST_DATA_TYPE); \
}) \
@@ -1179,4 +1179,4 @@
}) \
})
-#endif // ARM_COMPUTE_TILE_HELPERS_H \ No newline at end of file
+#endif // ARM_COMPUTE_TILE_HELPERS_H