aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/NEON/wrapper/intrinsics/mla.h
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2018-11-22 11:22:18 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2018-11-23 17:02:27 +0000
commit0c71d0ba75a11720e39e2a7163e993d51350683d (patch)
tree089f7b293802944a7672c85f637141aad0b55c75 /arm_compute/core/NEON/wrapper/intrinsics/mla.h
parentaaa27189e0e75c3ebad57854ac8901d0140677ac (diff)
downloadComputeLibrary-0c71d0ba75a11720e39e2a7163e993d51350683d.tar.gz
COMPMID-1647 NENormalizationLayer IN_MAP_2D support for NHWC for FP32/FP16
Change-Id: Id74cc7ba8e5cabee6acd3798d4779f88b1f00a9b
Diffstat (limited to 'arm_compute/core/NEON/wrapper/intrinsics/mla.h')
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/mla.h13
1 files changed, 13 insertions, 0 deletions
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/mla.h b/arm_compute/core/NEON/wrapper/intrinsics/mla.h
index 32a650b57f..db6d7b957a 100644
--- a/arm_compute/core/NEON/wrapper/intrinsics/mla.h
+++ b/arm_compute/core/NEON/wrapper/intrinsics/mla.h
@@ -35,6 +35,13 @@ namespace wrapper
{ \
return prefix##_##postfix(a, b, c); \
}
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#define VMLA_IMPL2(stype, vtype, prefix1, prefix2, postfix) \
+ inline vtype vmla(const vtype &a, const vtype &b, const vtype &c) \
+ { \
+ return prefix1##_##postfix(a, prefix2##_##postfix(b, c)); \
+ }
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
VMLA_IMPL(uint8x8_t, uint8x8_t, vmla, u8)
VMLA_IMPL(int8x8_t, int8x8_t, vmla, s8)
@@ -43,6 +50,9 @@ VMLA_IMPL(int16x4_t, int16x4_t, vmla, s16)
VMLA_IMPL(uint32x2_t, uint32x2_t, vmla, u32)
VMLA_IMPL(int32x2_t, int32x2_t, vmla, s32)
VMLA_IMPL(float32x2_t, float32x2_t, vmla, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VMLA_IMPL2(float16x4_t, float16x4_t, vadd, vmul, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
VMLA_IMPL(uint8x16_t, uint8x16_t, vmlaq, u8)
VMLA_IMPL(int8x16_t, int8x16_t, vmlaq, s8)
@@ -51,6 +61,9 @@ VMLA_IMPL(int16x8_t, int16x8_t, vmlaq, s16)
VMLA_IMPL(uint32x4_t, uint32x4_t, vmlaq, u32)
VMLA_IMPL(int32x4_t, int32x4_t, vmlaq, s32)
VMLA_IMPL(float32x4_t, float32x4_t, vmlaq, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VMLA_IMPL2(float16x8_t, float16x8_t, vaddq, vmulq, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
#undef VMLA_IMPL
} // namespace wrapper