aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2017-07-26 17:09:17 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-09-17 14:16:42 +0100
commitd5e65c71261fd42d3e69478507fbfcc8cf36befc (patch)
tree4892d179782b61f4198b45741d84b7d7fb30a011 /arm_compute/core
parentbaa656d41a9ef9027fca866c890a07b15747feda (diff)
downloadComputeLibrary-d5e65c71261fd42d3e69478507fbfcc8cf36befc.tar.gz
COMPMID-456: Add support for QS16 NEON Normalization Layer.
Change-Id: I1e542808cfd7774c67cc4e9a58e42449e4fb29aa Reviewed-on: http://mpd-gerrit.cambridge.arm.com/81735 Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'arm_compute/core')
-rw-r--r--arm_compute/core/NEON/NEFixedPoint.h25
-rw-r--r--arm_compute/core/NEON/NEFixedPoint.inl17
-rw-r--r--arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h4
3 files changed, 42 insertions, 4 deletions
diff --git a/arm_compute/core/NEON/NEFixedPoint.h b/arm_compute/core/NEON/NEFixedPoint.h
index 50463b5efe..08f680801d 100644
--- a/arm_compute/core/NEON/NEFixedPoint.h
+++ b/arm_compute/core/NEON/NEFixedPoint.h
@@ -235,13 +235,22 @@ qint8x16_t vdupq_n_qs8(qint8_t a);
/** Duplicate a float and convert it to 8 bit fixed point vector (16 elements)
*
- * @param[in] a 8 bit fixed point to duplicate
+ * @param[in] a floating point value to convert and duplicate
* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
*
* @return The result of the vector duplication
*/
qint8x16_t vdupq_n_qs8_f32(float a, int fixed_point_position);
+/** Duplicate a float and convert it to 16 bit fixed point vector (8 elements)
+ *
+ * @param[in] a floating point value to convert and duplicate
+ * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
+ *
+ * @return The result of the vector duplication
+ */
+qint16x8_t vdupq_n_qs16_f32(float a, int fixed_point_position);
+
/** 16 bit fixed point vector duplicate (8 elements)
*
* @param[in] a 16 bit fixed point to duplicate
@@ -1178,7 +1187,19 @@ qint16x8_t vqtanhq_qs16(qint16x8_t a, int fixed_point_position);
*
* @return The result of the 8bit power.
*/
-qint8x8_t vqpowq_qs8(qint8x8_t a, qint8x16_t b, int fixed_point_position);
+qint8x16_t vqpowq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position);
+
+/** Calculate saturating n power for fixed point 16bit (8 elements).
+ *
+ * pow(a,b) = e^(b*log(a))
+ *
+ * @param[in] a 16bit fixed point input vector
+ * @param[in] b 16bit fixed point power vector
+ * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
+ *
+ * @return The result of the 16bit power.
+ */
+qint16x8_t vqpowq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position);
/** Compute lane-by-lane maximum between elements of a float vector with 4x2 elements
*
diff --git a/arm_compute/core/NEON/NEFixedPoint.inl b/arm_compute/core/NEON/NEFixedPoint.inl
index 7cebfad924..c879d3e275 100644
--- a/arm_compute/core/NEON/NEFixedPoint.inl
+++ b/arm_compute/core/NEON/NEFixedPoint.inl
@@ -250,6 +250,18 @@ inline qint8x16_t vdupq_n_qs8_f32(float a, int fixed_point_position)
return vqcvtq_qs8_f32(res, fixed_point_position);
}
+inline qint16x8_t vdupq_n_qs16_f32(float a, int fixed_point_position)
+{
+ float32x4x2_t res =
+ {
+ {
+ vdupq_n_f32(a),
+ vdupq_n_f32(a),
+ }
+ };
+ return vqcvtq_qs16_f32(res, fixed_point_position);
+}
+
inline qint16x8_t vdupq_n_qs16(qint16_t a)
{
return vdupq_n_s16(a);
@@ -1941,6 +1953,11 @@ inline qint8x16_t vqpowq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_positio
return vqexpq_qs8(vqmulq_qs8(b, vlogq_qs8(a, fixed_point_position), fixed_point_position), fixed_point_position);
}
+inline qint16x8_t vqpowq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position)
+{
+ return vqexpq_qs16(vqmulq_qs16(b, vlogq_qs16(a, fixed_point_position), fixed_point_position), fixed_point_position);
+}
+
inline float32x4x2_t vmax2q_f32(float32x4x2_t a, float32x4x2_t b)
{
float32x4x2_t res =
diff --git a/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h
index b1bc594e4c..e24e481f46 100644
--- a/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h
@@ -50,7 +50,7 @@ public:
/** Set the input and output tensors.
*
* @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
- * and an optional 4th dimension for batch of inputs. Data types supported: QS8/F32.
+ * and an optional 4th dimension for batch of inputs. Data types supported: QS8/QS16/FP16/F32.
* @param[in] input_squared Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM],
* Data type supported: same as @p input
* @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
@@ -86,7 +86,7 @@ private:
*
* @param[in] window Region on which to execute the kernel.
*/
- template <unsigned int dim, bool do_2D_norm>
+ template <DataType dt, unsigned int dim, bool do_2D_norm>
void normalize_fixed_point(const Window &window);
/** Common signature for all the specialised normalization functions
*