From c9573f35c4267aa55648df4a134ebec82c5af93b Mon Sep 17 00:00:00 2001
From: giuros01 <giuseppe.rossini@arm.com>
Date: Thu, 20 Jun 2019 10:30:17 +0100
Subject: COMPMID-2407: Add (logistic and tanh) activation support for QSYMM16
 for NEON

Change-Id: Ib89c9cfe12975e51d1710af736c73ce79e667363
Signed-off-by: giuros01 <giuseppe.rossini@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1412
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Manuel Bottini <manuel.bottini@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
---
 arm_compute/core/NEON/NESymm.h                     | 47 ++++++++++++++++++++++
 .../core/NEON/kernels/NEActivationLayerKernel.h    | 10 ++++-
 arm_compute/core/QuantizationInfo.h                |  4 +-
 .../runtime/NEON/functions/NEActivationLayer.h     |  6 +--
 4 files changed, 60 insertions(+), 7 deletions(-)

(limited to 'arm_compute')

diff --git a/arm_compute/core/NEON/NESymm.h b/arm_compute/core/NEON/NESymm.h
index 0479753426..364a317bc7 100644
--- a/arm_compute/core/NEON/NESymm.h
+++ b/arm_compute/core/NEON/NESymm.h
@@ -102,5 +102,52 @@ inline int16_t finalize_quantization_int16(int32_t in_value, int result_fixedpoi
 
     return out_s16;
 }
+
+/** Dequantize a neon vector holding 8 16-bit quantized values.
+ *
+ * @param[in] qv    Input values to be dequantized.
+ * @param[in] scale Quantization scale
+ *
+ * @return Dequantized values in a neon vector
+ */
+inline float32x4x2_t vdequantize_int16(const int16x8_t &qv, float scale)
+{
+    const float32x4_t   vscale = vdupq_n_f32(scale);
+    const float32x4x2_t vdequantized_input =
+    {
+        {
+            vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(qv))), vscale),
+            vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(qv))), vscale)
+        }
+    };
+    return vdequantized_input;
+}
+
+/** Quantize a neon vector holding 8 floating point values.
+ *
+ * @param[in] qv    Input values to be quantized.
+ * @param[in] scale Quantization scale
+ *
+ * @return A neon vector holding the quantized values
+ */
+inline int16x8_t vquantize_int16(const float32x4x2_t &qv, float scale)
+{
+    const float32x4_t vinvscale = vdupq_n_f32(1.f / scale);
+
+    const int32x4x2_t rf =
+    {
+        {
+#ifdef __aarch64__
+            vcvtnq_s32_f32(vmulq_f32(qv.val[0], vinvscale)),
+            vcvtnq_s32_f32(vmulq_f32(qv.val[1], vinvscale))
+#else  //__aarch64__
+            vcvtq_s32_f32(vmulq_f32(qv.val[0], vinvscale)),
+            vcvtq_s32_f32(vmulq_f32(qv.val[1], vinvscale))
+#endif //__aarch64__
+        }
+    };
+    return vcombine_s16(vqmovn_s32(rf.val[0]), vqmovn_s32(rf.val[1]));
+}
+
 } // namespace arm_compute
 #endif // __ARM_COMPUTE_NESYMM_H__
diff --git a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h
index 9381beaded..5e87bd76a5 100644
--- a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h
@@ -58,7 +58,7 @@ public:
      * @note If the output tensor is a nullptr, the activation function will be performed in-place
      *
      * @param[in, out] input           Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
-     *                                 of the activation function. Data types supported: QASYMM8/F16/F32.
+     *                                 of the activation function. Data types supported: QASYMM8/QSYMM16/F16/F32.
      * @param[out]     output          Destination tensor. Data type supported: same as @p input
      * @param[in]      activation_info Activation layer information.
      */
@@ -66,7 +66,7 @@ public:
     /** Static function to check if given info will lead to a valid configuration of @ref NEActivationLayerKernel
      *
      * @param[in] input    Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
-     *                     of the activation function. Data types supported: QASYMM8/F16/F32.
+     *                     of the activation function. Data types supported: QASYMM8/QSYMM16/F16/F32.
      * @param[in] output   Destination tensor info. Data type supported: same as @p input
      * @param[in] act_info Activation layer information.
      *
@@ -97,6 +97,12 @@ private:
      */
     template <ActivationLayerInfo::ActivationFunction F, typename T>
     typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type activation(const Window &window);
+    /** Function to apply an activation function on a tensor.
+     *
+     * @param[in] window Region on which to execute the kernel
+     */
+    template <ActivationLayerInfo::ActivationFunction F, typename T>
+    typename std::enable_if<std::is_same<T, qsymm16_t>::value, void>::type activation(const Window &window);
 
 private:
     ITensor                      *_input;
diff --git a/arm_compute/core/QuantizationInfo.h b/arm_compute/core/QuantizationInfo.h
index dcfdd6ba16..1c49cd29ed 100644
--- a/arm_compute/core/QuantizationInfo.h
+++ b/arm_compute/core/QuantizationInfo.h
@@ -34,6 +34,7 @@ namespace arm_compute
 {
 using qasymm8_t = uint8_t; /**< 8 bit quantized asymmetric scalar value */
 using qsymm8_t  = int8_t;  /**< 8 bit quantized symmetric scalar value */
+using qsymm16_t = int16_t; /**< 16 bit quantized symmetric scalar value */
 
 /** Quantization info when assuming per layer quantization */
 struct UniformQuantizationInfo
@@ -350,6 +351,5 @@ inline float dequantize_qsymm16(int16_t value, const QuantizationInfo &qinfo)
 {
     return dequantize_qsymm16(value, qinfo.uniform());
 }
-
 } // namespace arm_compute
-#endif /*__ARM_COMPUTE_QUANTIZATION_INFO_H__ */
\ No newline at end of file
+#endif /*__ARM_COMPUTE_QUANTIZATION_INFO_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEActivationLayer.h b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
index 588de04332..c0b5f7ab37 100644
--- a/arm_compute/runtime/NEON/functions/NEActivationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -44,7 +44,7 @@ public:
      * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place
      *
      * @param[in, out] input           Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
-     *                                 of the activation function. Data types supported: QASYMM8/F16/F32.
+     *                                 of the activation function. Data types supported: QASYMM8/QSYMM16/F16/F32.
      * @param[out]     output          Destination tensor. Data type supported: same as @p input
      * @param[in]      activation_info Activation layer parameters.
      */
@@ -52,7 +52,7 @@ public:
     /** Static function to check if given info will lead to a valid configuration of @ref NEActivationLayer
      *
      * @param[in] input    Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
-     *                     of the activation function. Data types supported: QASYMM8/F16/F32.
+     *                     of the activation function. Data types supported: QASYMM8/QSYMM16/F16/F32.
      * @param[in] output   Destination tensor info. Data type supported: same as @p input
      * @param[in] act_info Activation layer information.
      *
-- 
cgit v1.2.1