diff options
author | Michalis Spyrou <michalis.spyrou@arm.com> | 2018-11-22 11:22:18 +0000 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2018-11-23 17:02:27 +0000 |
commit | 0c71d0ba75a11720e39e2a7163e993d51350683d (patch) | |
tree | 089f7b293802944a7672c85f637141aad0b55c75 /arm_compute/core/NEON/wrapper/intrinsics/mla.h | |
parent | aaa27189e0e75c3ebad57854ac8901d0140677ac (diff) | |
download | ComputeLibrary-0c71d0ba75a11720e39e2a7163e993d51350683d.tar.gz |
COMPMID-1647 NENormalizationLayer IN_MAP_2D support for NHWC for FP32/FP16
Change-Id: Id74cc7ba8e5cabee6acd3798d4779f88b1f00a9b
Diffstat (limited to 'arm_compute/core/NEON/wrapper/intrinsics/mla.h')
-rw-r--r-- | arm_compute/core/NEON/wrapper/intrinsics/mla.h | 13 |
1 files changed, 13 insertions, 0 deletions
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/mla.h b/arm_compute/core/NEON/wrapper/intrinsics/mla.h index 32a650b57f..db6d7b957a 100644 --- a/arm_compute/core/NEON/wrapper/intrinsics/mla.h +++ b/arm_compute/core/NEON/wrapper/intrinsics/mla.h @@ -35,6 +35,13 @@ namespace wrapper { \ return prefix##_##postfix(a, b, c); \ } +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#define VMLA_IMPL2(stype, vtype, prefix1, prefix2, postfix) \ + inline vtype vmla(const vtype &a, const vtype &b, const vtype &c) \ + { \ + return prefix1##_##postfix(a, prefix2##_##postfix(b, c)); \ + } +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC VMLA_IMPL(uint8x8_t, uint8x8_t, vmla, u8) VMLA_IMPL(int8x8_t, int8x8_t, vmla, s8) @@ -43,6 +50,9 @@ VMLA_IMPL(int16x4_t, int16x4_t, vmla, s16) VMLA_IMPL(uint32x2_t, uint32x2_t, vmla, u32) VMLA_IMPL(int32x2_t, int32x2_t, vmla, s32) VMLA_IMPL(float32x2_t, float32x2_t, vmla, f32) +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +VMLA_IMPL2(float16x4_t, float16x4_t, vadd, vmul, f16) +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC VMLA_IMPL(uint8x16_t, uint8x16_t, vmlaq, u8) VMLA_IMPL(int8x16_t, int8x16_t, vmlaq, s8) @@ -51,6 +61,9 @@ VMLA_IMPL(int16x8_t, int16x8_t, vmlaq, s16) VMLA_IMPL(uint32x4_t, uint32x4_t, vmlaq, u32) VMLA_IMPL(int32x4_t, int32x4_t, vmlaq, s32) VMLA_IMPL(float32x4_t, float32x4_t, vmlaq, f32) +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +VMLA_IMPL2(float16x8_t, float16x8_t, vaddq, vmulq, f16) +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC #undef VMLA_IMPL } // namespace wrapper |