From d64a46c6dfa81ce4607fc3de57bc9d9ac7e01e4a Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Tue, 1 Oct 2019 12:25:49 +0100 Subject: COMPMID-2699: Add support for QASYMM16 in NEQuantizationLayer Change-Id: Icb968e37551a9048040e9aaff5329e874c53a2ee Signed-off-by: Michele Di Giorgio Reviewed-on: https://review.mlplatform.org/c/2016 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas --- arm_compute/core/NEON/NEAsymm.h | 34 ++++++++++++++++++++++ .../core/NEON/kernels/NEQuantizationLayerKernel.h | 25 ++++++++++++++-- .../runtime/NEON/functions/NEQuantizationLayer.h | 4 +-- 3 files changed, 58 insertions(+), 5 deletions(-) (limited to 'arm_compute') diff --git a/arm_compute/core/NEON/NEAsymm.h b/arm_compute/core/NEON/NEAsymm.h index f2d20d373a..56d4c09f92 100644 --- a/arm_compute/core/NEON/NEAsymm.h +++ b/arm_compute/core/NEON/NEAsymm.h @@ -331,6 +331,40 @@ inline uint8x16_t vquantize(const float32x4x4_t &qv, const UniformQuantizationIn const uint8x8_t pb = vqmovun_s16(vcombine_s16(vqmovn_s32(rf.val[2]), vqmovn_s32(rf.val[3]))); return vcombine_u8(pa, pb); } + +/** Quantize to QASYMM16 a neon vector holding 16 floating point values. + * + * @param[in] qv Input values to be quantized. + * @param[in] qi Quantization information to be used in the computation. + * + * @return A neon vector holding the quantized values + */ +inline uint16x8x2_t vquantize_qasymm16(const float32x4x4_t &qv, const UniformQuantizationInfo &qi) +{ + const float scale = qi.scale; + const int offset = qi.offset; + const float32x4_t voffset = vdupq_n_f32(offset); + const float32x4_t vinvscale = vdupq_n_f32(1.f / scale); + const int32x4x4_t rf = + { + { +#ifdef __aarch64__ + vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)), + vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)), + vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[2], vinvscale)), + vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[3], vinvscale)), +#else //__aarch64__ + vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)), + vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)), + vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[2], vinvscale)), + vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[3], vinvscale)), +#endif //__aarch64__ + } + }; + const uint16x8_t pa = vcombine_u16(vqmovun_s32(rf.val[0]), vqmovun_s32(rf.val[1])); + const uint16x8_t pb = vcombine_u16(vqmovun_s32(rf.val[2]), vqmovun_s32(rf.val[3])); + return { pa, pb }; +} } // namespace arm_compute #include "arm_compute/core/NEON/NEAsymm.inl" #endif // __ARM_COMPUTE_NEASYMM_H__ diff --git a/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h index 391a72c6db..e1aaad5094 100644 --- a/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h @@ -57,13 +57,15 @@ public: /** Set the input, output. * * @param[in] input Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: F32/F16. - * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8. + * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM16. + * + * @note Output auto initialization is not supported by this kernel */ void configure(const ITensor *input, ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NEQuantizationLayerKernel * * @param[in] input Input tensor info. Data types supported: F32/F16. - * @param[in] output Output tensor info. Data types supported: QASYMM8. + * @param[in] output Output tensor info. Data types supported: QASYMM8/QASYMM16. * * @return a status */ @@ -73,11 +75,28 @@ public: void run(const Window &window, const ThreadInfo &info) override; private: + /** Common signature for all the specialised @ref NEQuantizationLayerKernel functions + * + * @param[in] window Region on which to execute the kernel. + */ + using QuantizationFunctionExecutorPtr = void (NEQuantizationLayerKernel::*)(const Window &window); + /** Function to apply QASYMM8 quantization on a tensor. + * + * @param[in] window Region on which to execute the kernel. + */ template - void quantize(const Window &window, const QuantizationInfo &qinfo); + void run_quantize_qasymm8(const Window &window); + /** Function to apply QASYMM16 quantization on a tensor. + * + * @param[in] window Region on which to execute the kernel. + */ + template + void run_quantize_qasymm16(const Window &window); const ITensor *_input; ITensor *_output; + + QuantizationFunctionExecutorPtr _func; }; } // namespace arm_compute #endif /*__ARM_COMPUTE_NEQUANTIZATIONLAYERKERNEL_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h index 46a62bd903..25609324a0 100644 --- a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h @@ -49,13 +49,13 @@ public: /** Set the input and output tensors. * * @param[in] input Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: F32/F16. - * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QSYMM16 + * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM16 */ void configure(const ITensor *input, ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NEQuantizationLayer * * @param[in] input Input tensor info. The dimensions over the third will be interpreted as batches. Data types supported: F32/F16. - * @param[in] output Output tensor info. Data types supported: QASYMM8/QSYMM16 + * @param[in] output Output tensor info. Data types supported: QASYMM8/QASYMM16 * * @return a status */ -- cgit v1.2.1