diff options
author | Pablo Marquez Tello <pablo.tello@arm.com> | 2023-03-20 16:29:21 +0000 |
---|---|---|
committer | Pablo Marquez Tello <pablo.tello@arm.com> | 2023-03-23 12:01:05 +0000 |
commit | 20cfa45faefbf56f62c8b1aa95dfd0b4f52e5641 (patch) | |
tree | 7c8b7d17ce04714a8214b89adac5e25cafd2c730 /src/core/NEON/NEAsymm.h | |
parent | 251b5145309e941a32c6c7691062544f2fd243fb (diff) | |
download | ComputeLibrary-20cfa45faefbf56f62c8b1aa95dfd0b4f52e5641.tar.gz |
Round to nearest with ties to away from zero in Relu
* This patch adds support for rounding modes in vmlaq_qasymm8_signed
which is used to compute Relu for quantized types
* Partially resolves MLCE-1018
Change-Id: I2a267b84745430e1ffe92b8bc79828a39332db18
Signed-off-by: Pablo Marquez Tello <pablo.tello@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9354
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/NEAsymm.h')
-rw-r--r-- | src/core/NEON/NEAsymm.h | 8 |
1 files changed, 7 insertions, 1 deletions
diff --git a/src/core/NEON/NEAsymm.h b/src/core/NEON/NEAsymm.h index 9b92a865d0..5b8d2be04b 100644 --- a/src/core/NEON/NEAsymm.h +++ b/src/core/NEON/NEAsymm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -715,6 +715,12 @@ inline uint16x8x2_t vquantize_qasymm16(const float32x4x4_t &qv, const UniformQua const uint16x8_t pb = vcombine_u16(vqmovun_s32(rf.val[2]), vqmovun_s32(rf.val[3])); return { pa, pb }; } + +template <RoundingPolicy round_policy = RoundingPolicy::TO_ZERO> +qasymm8x16_signed_t vmlaq_qasymm8(qasymm8x16_signed_t vd, float32x4_t vs, float32x4_t vo); + +template <RoundingPolicy round_policy = RoundingPolicy::TO_ZERO> +qasymm8x16_signed_t vmlaq_qasymm8_signed(qasymm8x16_signed_t vd, float32x4_t vs, float32x4_t vo); } // namespace arm_compute #include "src/core/NEON/NEAsymm.inl" #endif // ARM_COMPUTE_NEASYMM_H |