aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/wrapper
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2020-11-23 17:47:23 +0000
committerGiorgio Arena <giorgio.arena@arm.com>2020-12-14 13:58:26 +0000
commit3737c7934da929003bda446291489cf352e43751 (patch)
treec13c5bf2e5b3d3610d3b2b7aba74b6d68ec76c99 /src/core/NEON/wrapper
parentea7de7babc319e2fa31c5e1c986e48d6c5370689 (diff)
downloadComputeLibrary-3737c7934da929003bda446291489cf352e43751.tar.gz
COMPMID-3968 30% regression on FSSD v1 25 Grayscale
Signed-off-by: Giorgio Arena <giorgio.arena@arm.com> Change-Id: Ib1ecd7aa10fec0b7e2b3d929e212c1af34c0f58d Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4533 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/wrapper')
-rw-r--r--src/core/NEON/wrapper/intrinsics/mla.h16
-rw-r--r--src/core/NEON/wrapper/intrinsics/reinterpret.h2
2 files changed, 17 insertions, 1 deletions
diff --git a/src/core/NEON/wrapper/intrinsics/mla.h b/src/core/NEON/wrapper/intrinsics/mla.h
index 2b38b34137..9fb5a08f9b 100644
--- a/src/core/NEON/wrapper/intrinsics/mla.h
+++ b/src/core/NEON/wrapper/intrinsics/mla.h
@@ -66,6 +66,22 @@ VMLA_IMPL2(float16x8_t, float16x8_t, vaddq, vmulq, f16)
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
#undef VMLA_IMPL
+
+#define VMLAL_IMPL(vtype_in, vtype_out, postfix) \
+ inline vtype_out vmlal(const vtype_out &a, const vtype_in &b, const vtype_in &c) \
+ { \
+ return vmlal_##postfix(a, b, c); \
+ }
+
+VMLAL_IMPL(uint8x8_t, uint16x8_t, u8)
+VMLAL_IMPL(int8x8_t, int16x8_t, s8)
+VMLAL_IMPL(uint16x4_t, uint32x4_t, u16)
+VMLAL_IMPL(int16x4_t, int32x4_t, s16)
+VMLAL_IMPL(uint32x2_t, uint64x2_t, u32)
+VMLAL_IMPL(int32x2_t, int64x2_t, s32)
+
+#undef VMLAL_IMPL
+
} // namespace wrapper
} // namespace arm_compute
#endif /* ARM_COMPUTE_WRAPPER_MLA_H */
diff --git a/src/core/NEON/wrapper/intrinsics/reinterpret.h b/src/core/NEON/wrapper/intrinsics/reinterpret.h
index 0c26cd9008..cf00a4aceb 100644
--- a/src/core/NEON/wrapper/intrinsics/reinterpret.h
+++ b/src/core/NEON/wrapper/intrinsics/reinterpret.h
@@ -42,7 +42,7 @@ namespace wrapper
}
VREINTERPRET_IMPL(int16x4_t, uint16x4_t, vreinterpret, s16, u16)
-
+VREINTERPRET_IMPL(int16x8_t, uint16x8_t, vreinterpretq, s16, u16)
VREINTERPRET_IMPL(int32x4_t, uint32x4_t, vreinterpretq, s32, u32)
} // namespace wrapper
} // namespace arm_compute