From d64a46c6dfa81ce4607fc3de57bc9d9ac7e01e4a Mon Sep 17 00:00:00 2001
From: Michele Di Giorgio <michele.digiorgio@arm.com>
Date: Tue, 1 Oct 2019 12:25:49 +0100
Subject: COMPMID-2699: Add support for QASYMM16 in NEQuantizationLayer

Change-Id: Icb968e37551a9048040e9aaff5329e874c53a2ee
Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-on: https://review.mlplatform.org/c/2016
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
---
 arm_compute/core/NEON/NEAsymm.h                    | 34 ++++++++++++++++++++++
 .../core/NEON/kernels/NEQuantizationLayerKernel.h  | 25 ++++++++++++++--
 .../runtime/NEON/functions/NEQuantizationLayer.h   |  4 +--
 3 files changed, 58 insertions(+), 5 deletions(-)

(limited to 'arm_compute')

diff --git a/arm_compute/core/NEON/NEAsymm.h b/arm_compute/core/NEON/NEAsymm.h
index f2d20d373a..56d4c09f92 100644
--- a/arm_compute/core/NEON/NEAsymm.h
+++ b/arm_compute/core/NEON/NEAsymm.h
@@ -331,6 +331,40 @@ inline uint8x16_t vquantize(const float32x4x4_t &qv, const UniformQuantizationIn
     const uint8x8_t pb = vqmovun_s16(vcombine_s16(vqmovn_s32(rf.val[2]), vqmovn_s32(rf.val[3])));
     return vcombine_u8(pa, pb);
 }
+
+/** Quantize to QASYMM16 a neon vector holding 16 floating point values.
+ *
+ * @param[in] qv Input values to be quantized.
+ * @param[in] qi Quantization information to be used in the computation.
+ *
+ * @return A neon vector holding the quantized values
+ */
+inline uint16x8x2_t vquantize_qasymm16(const float32x4x4_t &qv, const UniformQuantizationInfo &qi)
+{
+    const float       scale     = qi.scale;
+    const int         offset    = qi.offset;
+    const float32x4_t voffset   = vdupq_n_f32(offset);
+    const float32x4_t vinvscale = vdupq_n_f32(1.f / scale);
+    const int32x4x4_t rf =
+    {
+        {
+#ifdef __aarch64__
+            vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)),
+            vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)),
+            vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[2], vinvscale)),
+            vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[3], vinvscale)),
+#else  //__aarch64__
+            vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)),
+            vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)),
+            vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[2], vinvscale)),
+            vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[3], vinvscale)),
+#endif //__aarch64__
+        }
+    };
+    const uint16x8_t pa = vcombine_u16(vqmovun_s32(rf.val[0]), vqmovun_s32(rf.val[1]));
+    const uint16x8_t pb = vcombine_u16(vqmovun_s32(rf.val[2]), vqmovun_s32(rf.val[3]));
+    return { pa, pb };
+}
 } // namespace arm_compute
 #include "arm_compute/core/NEON/NEAsymm.inl"
 #endif // __ARM_COMPUTE_NEASYMM_H__
diff --git a/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h
index 391a72c6db..e1aaad5094 100644
--- a/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h
@@ -57,13 +57,15 @@ public:
     /** Set the input, output.
      *
      * @param[in]  input  Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: F32/F16.
-     * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8.
+     * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM16.
+     *
+     * @note Output auto initialization is not supported by this kernel
      */
     void configure(const ITensor *input, ITensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref NEQuantizationLayerKernel
      *
      * @param[in] input  Input tensor info. Data types supported: F32/F16.
-     * @param[in] output Output tensor info. Data types supported: QASYMM8.
+     * @param[in] output Output tensor info. Data types supported: QASYMM8/QASYMM16.
      *
      * @return a status
      */
@@ -73,11 +75,28 @@ public:
     void run(const Window &window, const ThreadInfo &info) override;
 
 private:
+    /** Common signature for all the specialised @ref NEQuantizationLayerKernel functions
+     *
+     * @param[in] window Region on which to execute the kernel.
+     */
+    using QuantizationFunctionExecutorPtr = void (NEQuantizationLayerKernel::*)(const Window &window);
+    /** Function to apply QASYMM8 quantization on a tensor.
+     *
+     * @param[in] window Region on which to execute the kernel.
+     */
     template <typename T>
-    void quantize(const Window &window, const QuantizationInfo &qinfo);
+    void run_quantize_qasymm8(const Window &window);
+    /** Function to apply QASYMM16 quantization on a tensor.
+     *
+     * @param[in] window Region on which to execute the kernel.
+     */
+    template <typename T>
+    void run_quantize_qasymm16(const Window &window);
 
     const ITensor *_input;
     ITensor       *_output;
+
+    QuantizationFunctionExecutorPtr _func;
 };
 } // namespace arm_compute
 #endif /*__ARM_COMPUTE_NEQUANTIZATIONLAYERKERNEL_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
index 46a62bd903..25609324a0 100644
--- a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
@@ -49,13 +49,13 @@ public:
     /** Set the input and output tensors.
      *
      * @param[in]  input  Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: F32/F16.
-     * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QSYMM16
+     * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM16
      */
     void configure(const ITensor *input, ITensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref NEQuantizationLayer
      *
      * @param[in] input  Input tensor info. The dimensions over the third will be interpreted as batches. Data types supported: F32/F16.
-     * @param[in] output Output tensor info. Data types supported: QASYMM8/QSYMM16
+     * @param[in] output Output tensor info. Data types supported: QASYMM8/QASYMM16
      *
      * @return a status
      */
-- 
cgit v1.2.1