aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/NEON
diff options
context:
space:
mode:
authorSiCong Li <sicong.li@arm.com>2020-08-27 10:17:10 +0100
committerSiCong Li <sicong.li@arm.com>2020-09-07 10:57:52 +0000
commit903f8cca78502a9e3835e6ec42caa1f816274600 (patch)
treee8e104990b0b718550797bfe7c7c67c2a722e849 /arm_compute/core/NEON
parent2d2213920ba5ab95052a557dd20594a6ccb7d562 (diff)
downloadComputeLibrary-903f8cca78502a9e3835e6ec42caa1f816274600.tar.gz
COMPMID-3580 Add S32 support to NEArithmeticSubtraction
* Fix convert policy validate logics and add missing validate test * Add S32 support to NEArithmeticSubtraction and NEArithmeticSubtractionKernel * Add S32 validation tests Change-Id: I1b6cb15b024613c202fe9f17747a83da43a5ddcf Signed-off-by: SiCong Li <sicong.li@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3908 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Diffstat (limited to 'arm_compute/core/NEON')
-rw-r--r--arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h22
-rw-r--r--arm_compute/core/NEON/wrapper/scalar/sub.h7
2 files changed, 18 insertions, 11 deletions
diff --git a/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h
index e3a41a2b1c..7d00d1f7d0 100644
--- a/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h
+++ b/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h
@@ -63,12 +63,13 @@ public:
* - (S16,U8) -> S16
* - (U8,S16) -> S16
* - (S16,S16) -> S16
+ * - (S32,S32) -> S32
* - (F16,F16) -> F16
* - (F32,F32) -> F32
*
- * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32
- * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32
- * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32.
+ * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32
+ * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32
+ * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32.
* @param[in] policy Overflow policy. Convert policy cannot be WRAP if datatype is quantized.
*/
void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy);
@@ -83,14 +84,13 @@ public:
* - (S16,U8) -> S16
* - (U8,S16) -> S16
* - (S16,S16) -> S16
+ * - (S32,S32) -> S32
* - (F16,F16) -> F16
* - (F32,F32) -> F32
*
- * @note Convert policy cannot be WRAP if datatype is QASYMM8
- *
- * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32
- * @param[in] input2 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32
- * @param[in] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32.
+ * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32
+ * @param[in] input2 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32
+ * @param[in] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32.
* @param[in] policy Policy to use to handle overflow. Convert policy cannot be WRAP if datatype is quantized.
*
* @return a status
@@ -103,9 +103,9 @@ public:
private:
/** Common signature for all the specialised sub functions
*
- * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32
- * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32
- * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32.
+ * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32
+ * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32
+ * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32.
* @param[in] window Region on which to execute the kernel.
* @param[in] is_sat Flag to indicate if the policy is SATURATE.
*/
diff --git a/arm_compute/core/NEON/wrapper/scalar/sub.h b/arm_compute/core/NEON/wrapper/scalar/sub.h
index 9abda26224..1fe51d75fc 100644
--- a/arm_compute/core/NEON/wrapper/scalar/sub.h
+++ b/arm_compute/core/NEON/wrapper/scalar/sub.h
@@ -44,6 +44,13 @@ inline int16_t sub_sat(const int16_t &a, const int16_t &b)
return vget_lane_s16(vqsub_s16(va, vb), 0);
}
+inline int32_t sub_sat(const int32_t &a, const int32_t &b)
+{
+ const int32x2_t va = { a, 0 };
+ const int32x2_t vb = { b, 0 };
+ return vget_lane_s32(vqsub_s32(va, vb), 0);
+}
+
inline float sub_sat(const float &a, const float &b)
{
// No notion of saturation exists in floating point