aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/wrapper/intrinsics/shr.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/NEON/wrapper/intrinsics/shr.h')
-rw-r--r--src/core/NEON/wrapper/intrinsics/shr.h28
1 files changed, 22 insertions, 6 deletions
diff --git a/src/core/NEON/wrapper/intrinsics/shr.h b/src/core/NEON/wrapper/intrinsics/shr.h
index d740091464..e41e9b8b31 100644
--- a/src/core/NEON/wrapper/intrinsics/shr.h
+++ b/src/core/NEON/wrapper/intrinsics/shr.h
@@ -25,21 +25,19 @@
#ifndef ARM_COMPUTE_WRAPPER_SHR_H
#define ARM_COMPUTE_WRAPPER_SHR_H
-#include <type_traits>
#include <arm_neon.h>
+#include <type_traits>
namespace arm_compute
{
namespace wrapper
{
-
#define VQRSHRN_IMPL(half_vtype, vtype, prefix, postfix) \
template <int b> \
inline half_vtype vqrshrn(const vtype &a) \
{ \
return prefix##_##postfix(a, b); \
}
-
VQRSHRN_IMPL(int8x8_t, int16x8_t, vqrshrn_n, s16)
VQRSHRN_IMPL(uint8x8_t, uint16x8_t, vqrshrn_n, u16)
VQRSHRN_IMPL(int16x4_t, int32x4_t, vqrshrn_n, s32)
@@ -77,20 +75,38 @@ VQRSHRN_SCALAR_IMPL(uint32_t, uint64_t, vqrshrnd_n, u64)
{ \
return prefix_signed##_##postfix(a, b); \
} \
- \
+ \
template <int b, typename T> \
inline typename std::enable_if<std::is_integral<T>::value && !std::is_signed<T>::value, u##half_vtype>::type \
vqrshrn_ex(const vtype &a) \
{ \
return prefix_unsigned##_##postfix(a, b); \
}
-
VQRSHRN_EX_IMPL(int8x8_t, int16x8_t, vqrshrn_n, vqrshrun_n, s16)
VQRSHRN_EX_IMPL(int16x4_t, int32x4_t, vqrshrn_n, vqrshrun_n, s32)
VQRSHRN_EX_IMPL(int32x2_t, int64x2_t, vqrshrn_n, vqrshrun_n, s64)
-
#undef VQRSHRN_EX_IMPL
+#define VSHR_IMPL(vtype, prefix, postfix) \
+ template <int b> \
+ inline vtype vshr_n(const vtype &a) \
+ { \
+ return prefix##_##postfix(a, b); \
+ }
+VSHR_IMPL(uint8x8_t, vshr_n, u8)
+VSHR_IMPL(int8x8_t, vshr_n, s8)
+#undef VSHR_IMPL
+
+#define VSHRQ_IMPL(vtype, prefix, postfix) \
+ template <int b> \
+ inline vtype vshrq_n(const vtype &a) \
+ { \
+ return prefix##_##postfix(a, b); \
+ }
+VSHRQ_IMPL(uint32x4_t, vshrq_n, u32)
+VSHRQ_IMPL(int32x4_t, vshrq_n, s32)
+#undef VSHRQ_IMPL
+
#ifdef __aarch64__
#define VQRSHRN_EX_SCALAR_IMPL(half_vtype, vtype, prefix_signed, prefix_unsigned, postfix) \
template <int b, typename T> \