diff options
author | Giorgio Arena <giorgio.arena@arm.com> | 2018-02-12 14:46:00 +0000 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:47:18 +0000 |
commit | 6232d04ff2afafba1171f61c164fa891471894a9 (patch) | |
tree | 15cf81aadee216220e4316852ee6f4f961adf599 /src/core/CL/cl_kernels | |
parent | c67bb3d23ca7aa0e36f8c7c3c4eacbc0e2dbb36a (diff) | |
download | ComputeLibrary-6232d04ff2afafba1171f61c164fa891471894a9.tar.gz |
COMPMID-907 Optimizing FixedPoint calculation in the output stage of GEMMLowp
Change-Id: Ic26fed30f9a54e6adef7861c05c9d55d23ca52ca
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/119913
Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
Tested-by: Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels')
-rw-r--r-- | src/core/CL/cl_kernels/helpers_asymm.h | 15 |
1 files changed, 5 insertions, 10 deletions
diff --git a/src/core/CL/cl_kernels/helpers_asymm.h b/src/core/CL/cl_kernels/helpers_asymm.h index b44d0f1fd2..f07e7c950d 100644 --- a/src/core/CL/cl_kernels/helpers_asymm.h +++ b/src/core/CL/cl_kernels/helpers_asymm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -45,6 +45,7 @@ } ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(2) +ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(4) ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(8) ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(16) @@ -68,20 +69,14 @@ ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(16) b_64 = convert_long##size(b); \ VEC_DATA_TYPE(long, size) \ ab_64 = a_64 * b_64; \ - VEC_DATA_TYPE(long, size) \ - mask1 = 1 << 30; \ - VEC_DATA_TYPE(long, size) \ - mask2 = 1 - (1 << 30); \ - VEC_DATA_TYPE(long, size) \ - nudge = select(mask2, mask1, ab_64 >= 0); \ - VEC_DATA_TYPE(long, size) \ - mask = 1ll << 31; \ VEC_DATA_TYPE(int, size) \ - ab_x2_high32 = convert_int##size((ab_64 + nudge) / mask); \ + /* COMPMID-907 */ \ + ab_x2_high32 = convert_int##size(((ab_64 + (1 << 30)) >> 31)); \ return select(ab_x2_high32, INT_MAX, overflow); \ } ASYMM_MULT_IMP(2) +ASYMM_MULT_IMP(4) ASYMM_MULT_IMP(8) ASYMM_MULT_IMP(16) |