diff options
author | Michalis Spyrou <michalis.spyrou@arm.com> | 2019-08-22 16:52:00 +0100 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2019-08-30 13:39:54 +0000 |
commit | 3f632f3f16e29ebeb7065b30008060fd4bfd09f1 (patch) | |
tree | ce15e27cf559bd1828b302e1257c187fece0b477 /arm_compute/core/NEON/NEAsymm.h | |
parent | 351bd137e48c5276963274ac741b172483e98d21 (diff) | |
download | ComputeLibrary-3f632f3f16e29ebeb7065b30008060fd4bfd09f1.tar.gz |
COMPMID-2418: CLDequantizationLayer support for QASYMM8_PER_CHANNEL
Add support for QASYMM8_PER_CHANNEL in CLDequantiazationLayer.
Added tests for NHWC and also updated NEON code to work with NHWC
data layout.
Cleaned up the reference implementation.
Change-Id: Ic1d51f16f7f625503fffdbbb66f6487aa588f08c
Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1828
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'arm_compute/core/NEON/NEAsymm.h')
-rw-r--r-- | arm_compute/core/NEON/NEAsymm.h | 22 |
1 files changed, 22 insertions, 0 deletions
diff --git a/arm_compute/core/NEON/NEAsymm.h b/arm_compute/core/NEON/NEAsymm.h index 981c7b075c..f2d20d373a 100644 --- a/arm_compute/core/NEON/NEAsymm.h +++ b/arm_compute/core/NEON/NEAsymm.h @@ -226,6 +226,28 @@ inline float32x4x4_t vdequantize(const uint8x16_t &qv, float scale, int32_t offs return vdequantized_input; } +/** Dequantize following an asymmetric quantization scheme a neon vector holding 16 quantized values. + * + * @param[in] qv Input values to be dequantized. + * @param[in] vscale Vector containing quantization scaling factors. + * @param[in] voffset Vector containing quantization offset. + * + * @return Dequantized values in a neon vector + */ +inline float32x4x4_t vdequantize(const uint8x16_t &qv, const float32x4x4_t vscale, const int32x4x4_t voffset) +{ + const float32x4x4_t vdequantized_input = + { + { + vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(qv))))), voffset.val[0])), vscale.val[0]), + vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_low_u8(qv))))), voffset.val[1])), vscale.val[1]), + vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_high_u8(qv))))), voffset.val[2])), vscale.val[2]), + vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_high_u8(qv))))), voffset.val[3])), vscale.val[3]), + } + }; + return vdequantized_input; +} + /** Dequantize following a symmetric quantization scheme a neon vector holding 16 quantized values. * * @param[in] qv Input values to be dequantized. |