aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/helpers_asymm.h
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2018-02-12 14:46:00 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:47:18 +0000
commit6232d04ff2afafba1171f61c164fa891471894a9 (patch)
tree15cf81aadee216220e4316852ee6f4f961adf599 /src/core/CL/cl_kernels/helpers_asymm.h
parentc67bb3d23ca7aa0e36f8c7c3c4eacbc0e2dbb36a (diff)
downloadComputeLibrary-6232d04ff2afafba1171f61c164fa891471894a9.tar.gz
COMPMID-907 Optimizing FixedPoint calculation in the output stage of GEMMLowp
Change-Id: Ic26fed30f9a54e6adef7861c05c9d55d23ca52ca Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/119913 Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/helpers_asymm.h')
-rw-r--r--src/core/CL/cl_kernels/helpers_asymm.h15
1 files changed, 5 insertions, 10 deletions
diff --git a/src/core/CL/cl_kernels/helpers_asymm.h b/src/core/CL/cl_kernels/helpers_asymm.h
index b44d0f1fd2..f07e7c950d 100644
--- a/src/core/CL/cl_kernels/helpers_asymm.h
+++ b/src/core/CL/cl_kernels/helpers_asymm.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -45,6 +45,7 @@
}
ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(2)
+ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(4)
ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(8)
ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(16)
@@ -68,20 +69,14 @@ ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(16)
b_64 = convert_long##size(b); \
VEC_DATA_TYPE(long, size) \
ab_64 = a_64 * b_64; \
- VEC_DATA_TYPE(long, size) \
- mask1 = 1 << 30; \
- VEC_DATA_TYPE(long, size) \
- mask2 = 1 - (1 << 30); \
- VEC_DATA_TYPE(long, size) \
- nudge = select(mask2, mask1, ab_64 >= 0); \
- VEC_DATA_TYPE(long, size) \
- mask = 1ll << 31; \
VEC_DATA_TYPE(int, size) \
- ab_x2_high32 = convert_int##size((ab_64 + nudge) / mask); \
+ /* COMPMID-907 */ \
+ ab_x2_high32 = convert_int##size(((ab_64 + (1 << 30)) >> 31)); \
return select(ab_x2_high32, INT_MAX, overflow); \
}
ASYMM_MULT_IMP(2)
+ASYMM_MULT_IMP(4)
ASYMM_MULT_IMP(8)
ASYMM_MULT_IMP(16)