COMPMID-2235: Extend type support for CL/NEON DequantizationLayer.

Adds support for: - QSYMM8 Change-Id: Ia0b839fc844ce0f968dad1b69a001f9a660dbcd5 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-on: https://review.mlplatform.org/c/1378 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
author: Georgios Pinitas <georgios.pinitas@arm.com> 2019-06-04 13:04:16 +0100
committer: Georgios Pinitas <georgios.pinitas@arm.com> 2019-06-24 14:56:23 +0000
commit: 3d13af8a39f408318328a95d5329bc17fd923438 (patch)
tree: b0d9c82062e229f8938d2c9f762ee67758196bf3 /arm_compute/core/NEON
parent: db09b3783ff9af67c6d373b12aa9a6aff3c5d0f1 (diff)
download: ComputeLibrary-3d13af8a39f408318328a95d5329bc17fd923438.tar.gz
2 files changed, 48 insertions, 2 deletions
diff --git a/arm_compute/core/NEON/NEAsymm.h b/arm_compute/core/NEON/NEAsymm.h
index 2347c468ab..4c8f797360 100644
--- a/arm_compute/core/NEON/NEAsymm.h
+++ b/arm_compute/core/NEON/NEAsymm.h
@@ -223,6 +223,52 @@ inline float32x4x4_t vdequantize(const uint8x16_t &qv, const UniformQuantization
     return vdequantized_input;
 }
 
+/** Dequantize following an asymmetric quantization scheme a neon vector holding 16 quantized values.
+ *
+ * @param[in] qv     Input values to be dequantized.
+ * @param[in] scale  Quantization scaling factor.
+ * @param[in] offset Zero quantization offset.
+ *
+ * @return Dequantized values in a neon vector
+ */
+inline float32x4x4_t vdequantize(const uint8x16_t &qv, float scale, int32_t offset)
+{
+    const int32x4_t     voffset = vdupq_n_s32(offset);
+    const float32x4_t   vscale  = vdupq_n_f32(scale);
+    const float32x4x4_t vdequantized_input =
+    {
+        {
+            vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(qv))))), voffset)), vscale),
+            vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_low_u8(qv))))), voffset)), vscale),
+            vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_high_u8(qv))))), voffset)), vscale),
+            vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_high_u8(qv))))), voffset)), vscale),
+        }
+    };
+    return vdequantized_input;
+}
+
+/** Dequantize following a symmetric quantization scheme a neon vector holding 16 quantized values.
+ *
+ * @param[in] qv    Input values to be dequantized.
+ * @param[in] scale Quantization scaling factor.
+ *
+ * @return Dequantized values in a neon vector
+ */
+inline float32x4x4_t vdequantize(const int8x16_t &qv, float scale)
+{
+    const float32x4_t   vscale = vdupq_n_f32(scale);
+    const float32x4x4_t vdequantized_input =
+    {
+        {
+            vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_low_s8(qv))))), vscale),
+            vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8(vget_low_s8(qv))))), vscale),
+            vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_high_s8(qv))))), vscale),
+            vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8(vget_high_s8(qv))))), vscale),
+        }
+    };
+    return vdequantized_input;
+}
+
 /** Quantize a neon vector holding 8 floating point values.
  *
  * @param[in] qv Input values to be quantized.
diff --git a/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h
index 7d215f5f7b..3320ba6889 100644
--- a/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h
@@ -52,13 +52,13 @@ public:
     ~NEDequantizationLayerKernel() = default;
     /** Set input, output tensors.
      *
-     * @param[in]  input  Source tensor. Data type supported: QASYMM8.
+     * @param[in]  input  Source tensor. Data type supported: QASYMM8/QSYMM8.
      * @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32.
      */
     void configure(const ITensor *input, ITensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref NEDequantizationLayerKernel
      *
-     * @param[in] input  Input tensor info. Data types supported: QASYMM8.
+     * @param[in] input  Input tensor info. Data types supported: QASYMM8/QSYMM8.
      * @param[in] output Output tensor info. Data types supported: F16/F32.
      *
      * @return a status
author	Georgios Pinitas <georgios.pinitas@arm.com>	2019-06-04 13:04:16 +0100
committer	Georgios Pinitas <georgios.pinitas@arm.com>	2019-06-24 14:56:23 +0000
commit	3d13af8a39f408318328a95d5329bc17fd923438 (patch)
tree	b0d9c82062e229f8938d2c9f762ee67758196bf3 /arm_compute/core/NEON
parent	db09b3783ff9af67c6d373b12aa9a6aff3c5d0f1 (diff)
download	ComputeLibrary-3d13af8a39f408318328a95d5329bc17fd923438.tar.gz