From f87cc7f6fef95f9b022725304118796a6a764a7c Mon Sep 17 00:00:00 2001 From: Pablo Tello Date: Wed, 26 Jul 2017 10:28:40 +0100 Subject: COMPMID-417: Port NEDirectConvolution 1x1 to QS16. Change-Id: Icae6a5091e836d0aca24375f43cca9e6d3a2090f Reviewed-on: http://mpd-gerrit.cambridge.arm.com/81662 Reviewed-by: Moritz Pflanzer Tested-by: Kaizen Reviewed-by: Anthony Barbier --- arm_compute/core/NEON/NEFixedPoint.inl | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arm_compute/core/NEON/NEFixedPoint.inl') diff --git a/arm_compute/core/NEON/NEFixedPoint.inl b/arm_compute/core/NEON/NEFixedPoint.inl index c879d3e275..dd1066d6bc 100644 --- a/arm_compute/core/NEON/NEFixedPoint.inl +++ b/arm_compute/core/NEON/NEFixedPoint.inl @@ -624,6 +624,20 @@ inline qint16x8_t vmull_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position) return vqrshlq_s16(res, fixed_point_position_s16); } +inline qint32x4_t vmull_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position) +{ + const int32x4_t fixed_point_position_s32 = vdupq_n_s32(-fixed_point_position); + + // Initialize the temporary results with a constant used to round up the result + qint32x4_t tmp = vdupq_n_s32(1 << (fixed_point_position - 1)); + + // Vector multiply-accumulate long + tmp = vmull_s16(a, b); + + // Shift right by fixed_point_position + return vqshlq_s32(tmp, fixed_point_position_s32); +} + inline qint8x8_t vmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position) { const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); -- cgit v1.2.1