aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2019-08-22 16:52:00 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2019-08-30 13:39:54 +0000
commit3f632f3f16e29ebeb7065b30008060fd4bfd09f1 (patch)
treece15e27cf559bd1828b302e1257c187fece0b477 /arm_compute/core
parent351bd137e48c5276963274ac741b172483e98d21 (diff)
downloadComputeLibrary-3f632f3f16e29ebeb7065b30008060fd4bfd09f1.tar.gz
COMPMID-2418: CLDequantizationLayer support for QASYMM8_PER_CHANNEL
Add support for QASYMM8_PER_CHANNEL in CLDequantiazationLayer. Added tests for NHWC and also updated NEON code to work with NHWC data layout. Cleaned up the reference implementation. Change-Id: Ic1d51f16f7f625503fffdbbb66f6487aa588f08c Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com> Reviewed-on: https://review.mlplatform.org/c/1828 Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'arm_compute/core')
-rw-r--r--arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h4
-rw-r--r--arm_compute/core/NEON/NEAsymm.h22
2 files changed, 24 insertions, 2 deletions
diff --git a/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h b/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h
index 0ee5a13638..830d7518ce 100644
--- a/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h
@@ -48,13 +48,13 @@ public:
~CLDequantizationLayerKernel() = default;
/** Set the input, output, min and max.
*
- * @param[in] input Source tensor. Data types supported: QASYMM8/QSYMM8/QSYMM16.
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
* @param[out] output Destination tensor. Data types supported: F16/F32.
*/
void configure(const ICLTensor *input, ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLDequantizationLayerKernel
*
- * @param[in] input Input tensor info. Data types supported: QASYMM8/QSYMM8/QSYMM16.
+ * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
* @param[in] output Output tensor info. Data types supported: F16/F32.
*
* @return a status
diff --git a/arm_compute/core/NEON/NEAsymm.h b/arm_compute/core/NEON/NEAsymm.h
index 981c7b075c..f2d20d373a 100644
--- a/arm_compute/core/NEON/NEAsymm.h
+++ b/arm_compute/core/NEON/NEAsymm.h
@@ -226,6 +226,28 @@ inline float32x4x4_t vdequantize(const uint8x16_t &qv, float scale, int32_t offs
return vdequantized_input;
}
+/** Dequantize following an asymmetric quantization scheme a neon vector holding 16 quantized values.
+ *
+ * @param[in] qv Input values to be dequantized.
+ * @param[in] vscale Vector containing quantization scaling factors.
+ * @param[in] voffset Vector containing quantization offset.
+ *
+ * @return Dequantized values in a neon vector
+ */
+inline float32x4x4_t vdequantize(const uint8x16_t &qv, const float32x4x4_t vscale, const int32x4x4_t voffset)
+{
+ const float32x4x4_t vdequantized_input =
+ {
+ {
+ vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(qv))))), voffset.val[0])), vscale.val[0]),
+ vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_low_u8(qv))))), voffset.val[1])), vscale.val[1]),
+ vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_high_u8(qv))))), voffset.val[2])), vscale.val[2]),
+ vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_high_u8(qv))))), voffset.val[3])), vscale.val[3]),
+ }
+ };
+ return vdequantized_input;
+}
+
/** Dequantize following a symmetric quantization scheme a neon vector holding 16 quantized values.
*
* @param[in] qv Input values to be dequantized.