diff options
author | Michele Di Giorgio <michele.digiorgio@arm.com> | 2019-10-01 12:25:49 +0100 |
---|---|---|
committer | Michele Di Giorgio <michele.digiorgio@arm.com> | 2019-10-01 17:26:16 +0000 |
commit | d64a46c6dfa81ce4607fc3de57bc9d9ac7e01e4a (patch) | |
tree | e4b2a1e670a6002cd70e920ad7043c090b5d25f1 /arm_compute/core/NEON/NEAsymm.h | |
parent | 79f88e6d825402388bb79fc123ee2dfe01985bda (diff) | |
download | ComputeLibrary-d64a46c6dfa81ce4607fc3de57bc9d9ac7e01e4a.tar.gz |
COMPMID-2699: Add support for QASYMM16 in NEQuantizationLayer
Change-Id: Icb968e37551a9048040e9aaff5329e874c53a2ee
Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-on: https://review.mlplatform.org/c/2016
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'arm_compute/core/NEON/NEAsymm.h')
-rw-r--r-- | arm_compute/core/NEON/NEAsymm.h | 34 |
1 files changed, 34 insertions, 0 deletions
diff --git a/arm_compute/core/NEON/NEAsymm.h b/arm_compute/core/NEON/NEAsymm.h index f2d20d373a..56d4c09f92 100644 --- a/arm_compute/core/NEON/NEAsymm.h +++ b/arm_compute/core/NEON/NEAsymm.h @@ -331,6 +331,40 @@ inline uint8x16_t vquantize(const float32x4x4_t &qv, const UniformQuantizationIn const uint8x8_t pb = vqmovun_s16(vcombine_s16(vqmovn_s32(rf.val[2]), vqmovn_s32(rf.val[3]))); return vcombine_u8(pa, pb); } + +/** Quantize to QASYMM16 a neon vector holding 16 floating point values. + * + * @param[in] qv Input values to be quantized. + * @param[in] qi Quantization information to be used in the computation. + * + * @return A neon vector holding the quantized values + */ +inline uint16x8x2_t vquantize_qasymm16(const float32x4x4_t &qv, const UniformQuantizationInfo &qi) +{ + const float scale = qi.scale; + const int offset = qi.offset; + const float32x4_t voffset = vdupq_n_f32(offset); + const float32x4_t vinvscale = vdupq_n_f32(1.f / scale); + const int32x4x4_t rf = + { + { +#ifdef __aarch64__ + vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)), + vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)), + vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[2], vinvscale)), + vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[3], vinvscale)), +#else //__aarch64__ + vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)), + vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)), + vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[2], vinvscale)), + vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[3], vinvscale)), +#endif //__aarch64__ + } + }; + const uint16x8_t pa = vcombine_u16(vqmovun_s32(rf.val[0]), vqmovun_s32(rf.val[1])); + const uint16x8_t pb = vcombine_u16(vqmovun_s32(rf.val[2]), vqmovun_s32(rf.val[3])); + return { pa, pb }; +} } // namespace arm_compute #include "arm_compute/core/NEON/NEAsymm.inl" #endif // __ARM_COMPUTE_NEASYMM_H__ |