From c9573f35c4267aa55648df4a134ebec82c5af93b Mon Sep 17 00:00:00 2001 From: giuros01 Date: Thu, 20 Jun 2019 10:30:17 +0100 Subject: COMPMID-2407: Add (logistic and tanh) activation support for QSYMM16 for NEON Change-Id: Ib89c9cfe12975e51d1710af736c73ce79e667363 Signed-off-by: giuros01 Reviewed-on: https://review.mlplatform.org/c/1412 Comments-Addressed: Arm Jenkins Reviewed-by: Manuel Bottini Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas --- arm_compute/core/NEON/NESymm.h | 47 ++++++++++++++++++++++ .../core/NEON/kernels/NEActivationLayerKernel.h | 10 ++++- arm_compute/core/QuantizationInfo.h | 4 +- .../runtime/NEON/functions/NEActivationLayer.h | 6 +-- 4 files changed, 60 insertions(+), 7 deletions(-) (limited to 'arm_compute') diff --git a/arm_compute/core/NEON/NESymm.h b/arm_compute/core/NEON/NESymm.h index 0479753426..364a317bc7 100644 --- a/arm_compute/core/NEON/NESymm.h +++ b/arm_compute/core/NEON/NESymm.h @@ -102,5 +102,52 @@ inline int16_t finalize_quantization_int16(int32_t in_value, int result_fixedpoi return out_s16; } + +/** Dequantize a neon vector holding 8 16-bit quantized values. + * + * @param[in] qv Input values to be dequantized. + * @param[in] scale Quantization scale + * + * @return Dequantized values in a neon vector + */ +inline float32x4x2_t vdequantize_int16(const int16x8_t &qv, float scale) +{ + const float32x4_t vscale = vdupq_n_f32(scale); + const float32x4x2_t vdequantized_input = + { + { + vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(qv))), vscale), + vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(qv))), vscale) + } + }; + return vdequantized_input; +} + +/** Quantize a neon vector holding 8 floating point values. + * + * @param[in] qv Input values to be quantized. + * @param[in] scale Quantization scale + * + * @return A neon vector holding the quantized values + */ +inline int16x8_t vquantize_int16(const float32x4x2_t &qv, float scale) +{ + const float32x4_t vinvscale = vdupq_n_f32(1.f / scale); + + const int32x4x2_t rf = + { + { +#ifdef __aarch64__ + vcvtnq_s32_f32(vmulq_f32(qv.val[0], vinvscale)), + vcvtnq_s32_f32(vmulq_f32(qv.val[1], vinvscale)) +#else //__aarch64__ + vcvtq_s32_f32(vmulq_f32(qv.val[0], vinvscale)), + vcvtq_s32_f32(vmulq_f32(qv.val[1], vinvscale)) +#endif //__aarch64__ + } + }; + return vcombine_s16(vqmovn_s32(rf.val[0]), vqmovn_s32(rf.val[1])); +} + } // namespace arm_compute #endif // __ARM_COMPUTE_NESYMM_H__ diff --git a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h index 9381beaded..5e87bd76a5 100644 --- a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h @@ -58,7 +58,7 @@ public: * @note If the output tensor is a nullptr, the activation function will be performed in-place * * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result - * of the activation function. Data types supported: QASYMM8/F16/F32. + * of the activation function. Data types supported: QASYMM8/QSYMM16/F16/F32. * @param[out] output Destination tensor. Data type supported: same as @p input * @param[in] activation_info Activation layer information. */ @@ -66,7 +66,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEActivationLayerKernel * * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result - * of the activation function. Data types supported: QASYMM8/F16/F32. + * of the activation function. Data types supported: QASYMM8/QSYMM16/F16/F32. * @param[in] output Destination tensor info. Data type supported: same as @p input * @param[in] act_info Activation layer information. * @@ -97,6 +97,12 @@ private: */ template typename std::enable_if::value, void>::type activation(const Window &window); + /** Function to apply an activation function on a tensor. + * + * @param[in] window Region on which to execute the kernel + */ + template + typename std::enable_if::value, void>::type activation(const Window &window); private: ITensor *_input; diff --git a/arm_compute/core/QuantizationInfo.h b/arm_compute/core/QuantizationInfo.h index dcfdd6ba16..1c49cd29ed 100644 --- a/arm_compute/core/QuantizationInfo.h +++ b/arm_compute/core/QuantizationInfo.h @@ -34,6 +34,7 @@ namespace arm_compute { using qasymm8_t = uint8_t; /**< 8 bit quantized asymmetric scalar value */ using qsymm8_t = int8_t; /**< 8 bit quantized symmetric scalar value */ +using qsymm16_t = int16_t; /**< 16 bit quantized symmetric scalar value */ /** Quantization info when assuming per layer quantization */ struct UniformQuantizationInfo @@ -350,6 +351,5 @@ inline float dequantize_qsymm16(int16_t value, const QuantizationInfo &qinfo) { return dequantize_qsymm16(value, qinfo.uniform()); } - } // namespace arm_compute -#endif /*__ARM_COMPUTE_QUANTIZATION_INFO_H__ */ \ No newline at end of file +#endif /*__ARM_COMPUTE_QUANTIZATION_INFO_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEActivationLayer.h b/arm_compute/runtime/NEON/functions/NEActivationLayer.h index 588de04332..c0b5f7ab37 100644 --- a/arm_compute/runtime/NEON/functions/NEActivationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEActivationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -44,7 +44,7 @@ public: * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place * * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result - * of the activation function. Data types supported: QASYMM8/F16/F32. + * of the activation function. Data types supported: QASYMM8/QSYMM16/F16/F32. * @param[out] output Destination tensor. Data type supported: same as @p input * @param[in] activation_info Activation layer parameters. */ @@ -52,7 +52,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEActivationLayer * * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result - * of the activation function. Data types supported: QASYMM8/F16/F32. + * of the activation function. Data types supported: QASYMM8/QSYMM16/F16/F32. * @param[in] output Destination tensor info. Data type supported: same as @p input * @param[in] act_info Activation layer information. * -- cgit v1.2.1