aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/NEON/NEFixedPoint.inl
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2017-07-26 17:09:17 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-09-17 14:16:42 +0100
commitd5e65c71261fd42d3e69478507fbfcc8cf36befc (patch)
tree4892d179782b61f4198b45741d84b7d7fb30a011 /arm_compute/core/NEON/NEFixedPoint.inl
parentbaa656d41a9ef9027fca866c890a07b15747feda (diff)
downloadComputeLibrary-d5e65c71261fd42d3e69478507fbfcc8cf36befc.tar.gz
COMPMID-456: Add support for QS16 NEON Normalization Layer.
Change-Id: I1e542808cfd7774c67cc4e9a58e42449e4fb29aa Reviewed-on: http://mpd-gerrit.cambridge.arm.com/81735 Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'arm_compute/core/NEON/NEFixedPoint.inl')
-rw-r--r--arm_compute/core/NEON/NEFixedPoint.inl17
1 files changed, 17 insertions, 0 deletions
diff --git a/arm_compute/core/NEON/NEFixedPoint.inl b/arm_compute/core/NEON/NEFixedPoint.inl
index 7cebfad924..c879d3e275 100644
--- a/arm_compute/core/NEON/NEFixedPoint.inl
+++ b/arm_compute/core/NEON/NEFixedPoint.inl
@@ -250,6 +250,18 @@ inline qint8x16_t vdupq_n_qs8_f32(float a, int fixed_point_position)
return vqcvtq_qs8_f32(res, fixed_point_position);
}
+inline qint16x8_t vdupq_n_qs16_f32(float a, int fixed_point_position)
+{
+ float32x4x2_t res =
+ {
+ {
+ vdupq_n_f32(a),
+ vdupq_n_f32(a),
+ }
+ };
+ return vqcvtq_qs16_f32(res, fixed_point_position);
+}
+
inline qint16x8_t vdupq_n_qs16(qint16_t a)
{
return vdupq_n_s16(a);
@@ -1941,6 +1953,11 @@ inline qint8x16_t vqpowq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_positio
return vqexpq_qs8(vqmulq_qs8(b, vlogq_qs8(a, fixed_point_position), fixed_point_position), fixed_point_position);
}
+inline qint16x8_t vqpowq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position)
+{
+ return vqexpq_qs16(vqmulq_qs16(b, vlogq_qs16(a, fixed_point_position), fixed_point_position), fixed_point_position);
+}
+
inline float32x4x2_t vmax2q_f32(float32x4x2_t a, float32x4x2_t b)
{
float32x4x2_t res =