aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/wrapper/intrinsics/sub.h
diff options
context:
space:
mode:
authorOmar Al Khatib <omar.alkhatib@arm.com>2022-11-01 17:01:24 +0000
committerOmar Al Khatib <omar.alkhatib@arm.com>2022-11-07 15:45:21 +0000
commit605a928960c0c36384925a9064d12addc8f43a41 (patch)
tree71896f7d4683751c7c87c5606b95e0c9c421384a /src/core/NEON/wrapper/intrinsics/sub.h
parenta2b131bf4680f83b10a7e3544b6183279d8c2691 (diff)
downloadComputeLibrary-605a928960c0c36384925a9064d12addc8f43a41.tar.gz
Optimize CPU mul layer on quantized data
Resolves : [COMPMID-5461] Signed-off-by: Omar Al Khatib <omar.alkhatib@arm.com> Change-Id: I89b99d267c32b00ef44f9bb6e7c714dfe4a0d29d Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8420 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/wrapper/intrinsics/sub.h')
-rw-r--r--src/core/NEON/wrapper/intrinsics/sub.h17
1 files changed, 16 insertions, 1 deletions
diff --git a/src/core/NEON/wrapper/intrinsics/sub.h b/src/core/NEON/wrapper/intrinsics/sub.h
index 475986d0f6..20436714ef 100644
--- a/src/core/NEON/wrapper/intrinsics/sub.h
+++ b/src/core/NEON/wrapper/intrinsics/sub.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2020, 2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -98,6 +98,21 @@ VQSUB_IMPL(float16x8_t, float16x8_t, vsubq, f16)
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
#undef VQSUB_IMPL
+#define VSUBL_IMPL(rtype, vtype, prefix, postfix) \
+ inline rtype vsubl(const vtype &a, const vtype &b) \
+ { \
+ return prefix##_##postfix(a, b); \
+ }
+
+VSUBL_IMPL(int16x8_t, int8x8_t, vsubl, s8)
+VSUBL_IMPL(int32x4_t, int16x4_t, vsubl, s16)
+VSUBL_IMPL(int64x2_t, int32x2_t, vsubl, s32)
+VSUBL_IMPL(uint16x8_t, uint8x8_t, vsubl, u8)
+VSUBL_IMPL(uint32x4_t, uint16x4_t, vsubl, u16)
+VSUBL_IMPL(uint64x2_t, uint32x2_t, vsubl, u32)
+
+#undef VSUB_IMPL
+
} // namespace wrapper
} // namespace arm_compute
#endif /* ARM_COMPUTE_WRAPPER_SUB_H */