From 903f8cca78502a9e3835e6ec42caa1f816274600 Mon Sep 17 00:00:00 2001 From: SiCong Li Date: Thu, 27 Aug 2020 10:17:10 +0100 Subject: COMPMID-3580 Add S32 support to NEArithmeticSubtraction * Fix convert policy validate logics and add missing validate test * Add S32 support to NEArithmeticSubtraction and NEArithmeticSubtractionKernel * Add S32 validation tests Change-Id: I1b6cb15b024613c202fe9f17747a83da43a5ddcf Signed-off-by: SiCong Li Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3908 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio --- .../NEON/kernels/NEArithmeticSubtractionKernel.h | 22 +++++++++++----------- arm_compute/core/NEON/wrapper/scalar/sub.h | 7 +++++++ 2 files changed, 18 insertions(+), 11 deletions(-) (limited to 'arm_compute/core/NEON') diff --git a/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h index e3a41a2b1c..7d00d1f7d0 100644 --- a/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h +++ b/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h @@ -63,12 +63,13 @@ public: * - (S16,U8) -> S16 * - (U8,S16) -> S16 * - (S16,S16) -> S16 + * - (S32,S32) -> S32 * - (F16,F16) -> F16 * - (F32,F32) -> F32 * - * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32 - * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32 - * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32. + * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 + * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 + * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32. * @param[in] policy Overflow policy. Convert policy cannot be WRAP if datatype is quantized. */ void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy); @@ -83,14 +84,13 @@ public: * - (S16,U8) -> S16 * - (U8,S16) -> S16 * - (S16,S16) -> S16 + * - (S32,S32) -> S32 * - (F16,F16) -> F16 * - (F32,F32) -> F32 * - * @note Convert policy cannot be WRAP if datatype is QASYMM8 - * - * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32 - * @param[in] input2 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32 - * @param[in] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32. + * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 + * @param[in] input2 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 + * @param[in] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32. * @param[in] policy Policy to use to handle overflow. Convert policy cannot be WRAP if datatype is quantized. * * @return a status @@ -103,9 +103,9 @@ public: private: /** Common signature for all the specialised sub functions * - * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32 - * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32 - * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32. + * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 + * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32 + * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32. * @param[in] window Region on which to execute the kernel. * @param[in] is_sat Flag to indicate if the policy is SATURATE. */ diff --git a/arm_compute/core/NEON/wrapper/scalar/sub.h b/arm_compute/core/NEON/wrapper/scalar/sub.h index 9abda26224..1fe51d75fc 100644 --- a/arm_compute/core/NEON/wrapper/scalar/sub.h +++ b/arm_compute/core/NEON/wrapper/scalar/sub.h @@ -44,6 +44,13 @@ inline int16_t sub_sat(const int16_t &a, const int16_t &b) return vget_lane_s16(vqsub_s16(va, vb), 0); } +inline int32_t sub_sat(const int32_t &a, const int32_t &b) +{ + const int32x2_t va = { a, 0 }; + const int32x2_t vb = { b, 0 }; + return vget_lane_s32(vqsub_s32(va, vb), 0); +} + inline float sub_sat(const float &a, const float &b) { // No notion of saturation exists in floating point -- cgit v1.2.1