From dbdea0d1c025b18d4d82c278c87454427918f5b4 Mon Sep 17 00:00:00 2001
From: Georgios Pinitas <georgios.pinitas@arm.com>
Date: Wed, 16 Oct 2019 19:21:40 +0100
Subject: COMPMID-2308: NEConvolutionLayer: support QUANT8_SYMM_PER_CHANNEL
 filters

Change-Id: Ic1bf5f0d21ccd525f84213a360f7e199d7f50577
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-on: https://review.mlplatform.org/c/2177
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
---
 arm_compute/core/NEON/NEMath.inl | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arm_compute/core/NEON/NEMath.inl')

diff --git a/arm_compute/core/NEON/NEMath.inl b/arm_compute/core/NEON/NEMath.inl
index 61315e8dbc..f1c9c2024b 100644
--- a/arm_compute/core/NEON/NEMath.inl
+++ b/arm_compute/core/NEON/NEMath.inl
@@ -294,6 +294,14 @@ inline float32x2_t vsin_f32(float32x2_t val)
 
 #endif /* DOXYGEN_SKIP_THIS */
 
+inline int32x4_t rounding_divide_by_pow2(int32x4_t x, int32x4_t exponent)
+{
+    const int32x4_t shift_vec  = vnegq_s32(exponent);
+    const int32x4_t fixup      = vshrq_n_s32(vandq_s32(x, shift_vec), 31);
+    const int32x4_t fixed_up_x = vqaddq_s32(x, fixup);
+    return vrshlq_s32(fixed_up_x, shift_vec);
+}
+
 inline int32x4_t rounding_divide_by_pow2(int32x4_t x, int exponent)
 {
     const int32x4_t shift_vec  = vdupq_n_s32(-exponent);
-- 
cgit v1.2.1