From aaba4c626bcc6365e0108130633ce43fafe9da45 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Wed, 22 Aug 2018 16:20:21 +0100 Subject: COMPMID-1188: Add support for activation in NEBatchNormalization. Change-Id: I1e206574dac6433218db6e138adb7bf5f66a536d Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/145222 Tested-by: Jenkins Reviewed-by: Anthony Barbier --- arm_compute/core/NEON/wrapper/intrinsics/dup_n.h | 6 ++++++ arm_compute/core/NEON/wrapper/intrinsics/max.h | 6 ++++++ arm_compute/core/NEON/wrapper/intrinsics/min.h | 6 ++++++ arm_compute/core/NEON/wrapper/traits.h | 4 ++++ 4 files changed, 22 insertions(+) (limited to 'arm_compute/core/NEON/wrapper') diff --git a/arm_compute/core/NEON/wrapper/intrinsics/dup_n.h b/arm_compute/core/NEON/wrapper/intrinsics/dup_n.h index 1c07b4f3ff..4d9a7952c0 100644 --- a/arm_compute/core/NEON/wrapper/intrinsics/dup_n.h +++ b/arm_compute/core/NEON/wrapper/intrinsics/dup_n.h @@ -45,6 +45,9 @@ VDUP_N_IMPL(int16_t, int16x4_t, vdup_n, s16, traits::vector_64_tag) VDUP_N_IMPL(uint32_t, uint32x2_t, vdup_n, u32, traits::vector_64_tag) VDUP_N_IMPL(int32_t, int32x2_t, vdup_n, s32, traits::vector_64_tag) VDUP_N_IMPL(float, float32x2_t, vdup_n, f32, traits::vector_64_tag) +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +VDUP_N_IMPL(float16_t, float16x4_t, vdup_n, f16, traits::vector_64_tag) +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC VDUP_N_IMPL(uint8_t, uint8x16_t, vdupq_n, u8, traits::vector_128_tag) VDUP_N_IMPL(int8_t, int8x16_t, vdupq_n, s8, traits::vector_128_tag) @@ -53,6 +56,9 @@ VDUP_N_IMPL(int16_t, int16x8_t, vdupq_n, s16, traits::vector_128_tag) VDUP_N_IMPL(uint32_t, uint32x4_t, vdupq_n, u32, traits::vector_128_tag) VDUP_N_IMPL(int32_t, int32x4_t, vdupq_n, s32, traits::vector_128_tag) VDUP_N_IMPL(float, float32x4_t, vdupq_n, f32, traits::vector_128_tag) +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +VDUP_N_IMPL(float16_t, float16x8_t, vdupq_n, f16, traits::vector_128_tag) +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC #undef VDUP_N_IMPL } // namespace wrapper diff --git a/arm_compute/core/NEON/wrapper/intrinsics/max.h b/arm_compute/core/NEON/wrapper/intrinsics/max.h index 1a8e95de87..05ed051c62 100644 --- a/arm_compute/core/NEON/wrapper/intrinsics/max.h +++ b/arm_compute/core/NEON/wrapper/intrinsics/max.h @@ -43,6 +43,9 @@ VMAX_IMPL(int16_t, int16x4_t, vmax, s16) VMAX_IMPL(uint32_t, uint32x2_t, vmax, u32) VMAX_IMPL(int32_t, int32x2_t, vmax, s32) VMAX_IMPL(float, float32x2_t, vmax, f32) +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +VMAX_IMPL(float16_t, float16x4_t, vmax, f16) +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC VMAX_IMPL(uint8_t, uint8x16_t, vmaxq, u8) VMAX_IMPL(int8_t, int8x16_t, vmaxq, s8) @@ -51,6 +54,9 @@ VMAX_IMPL(int16_t, int16x8_t, vmaxq, s16) VMAX_IMPL(uint32_t, uint32x4_t, vmaxq, u32) VMAX_IMPL(int32_t, int32x4_t, vmaxq, s32) VMAX_IMPL(float, float32x4_t, vmaxq, f32) +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +VMAX_IMPL(float16_t, float16x8_t, vmaxq, f16) +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC #undef VMAX_IMPL } // namespace wrapper diff --git a/arm_compute/core/NEON/wrapper/intrinsics/min.h b/arm_compute/core/NEON/wrapper/intrinsics/min.h index ae79631190..5ea2068f24 100644 --- a/arm_compute/core/NEON/wrapper/intrinsics/min.h +++ b/arm_compute/core/NEON/wrapper/intrinsics/min.h @@ -43,6 +43,9 @@ VMIN_IMPL(int16_t, int16x4_t, vmin, s16) VMIN_IMPL(uint32_t, uint32x2_t, vmin, u32) VMIN_IMPL(int32_t, int32x2_t, vmin, s32) VMIN_IMPL(float, float32x2_t, vmin, f32) +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +VMIN_IMPL(float16_t, float16x4_t, vmin, f16) +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC VMIN_IMPL(uint8_t, uint8x16_t, vminq, u8) VMIN_IMPL(int8_t, int8x16_t, vminq, s8) @@ -51,6 +54,9 @@ VMIN_IMPL(int16_t, int16x8_t, vminq, s16) VMIN_IMPL(uint32_t, uint32x4_t, vminq, u32) VMIN_IMPL(int32_t, int32x4_t, vminq, s32) VMIN_IMPL(float, float32x4_t, vminq, f32) +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +VMIN_IMPL(float16_t, float16x8_t, vminq, f16) +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC #undef VMIN_IMPL } // namespace wrapper diff --git a/arm_compute/core/NEON/wrapper/traits.h b/arm_compute/core/NEON/wrapper/traits.h index 495ddbb1af..5cd6086c0c 100644 --- a/arm_compute/core/NEON/wrapper/traits.h +++ b/arm_compute/core/NEON/wrapper/traits.h @@ -62,6 +62,10 @@ template <> struct neon_vector{ using type = uint64x2_t; using tag_ template <> struct neon_vector{ using type = int64x2_t; using tag_type = vector_128_tag; }; template <> struct neon_vector{ using type = float32x2_t; using tag_type = vector_64_tag; }; template <> struct neon_vector{ using type = float32x4_t; using tag_type = vector_128_tag; }; +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +template <> struct neon_vector{ using type = float16x4_t; using tag_type = vector_64_tag; }; +template <> struct neon_vector{ using type = float16x8_t; using tag_type = vector_128_tag; }; +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC #endif /* DOXYGEN_SKIP_THIS */ /** Helper type template to get the type of a neon vector */ -- cgit v1.2.1