From 3737c7934da929003bda446291489cf352e43751 Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Mon, 23 Nov 2020 17:47:23 +0000 Subject: COMPMID-3968 30% regression on FSSD v1 25 Grayscale Signed-off-by: Giorgio Arena Change-Id: Ib1ecd7aa10fec0b7e2b3d929e212c1af34c0f58d Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4533 Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas Reviewed-by: Gian Marco Iodice Comments-Addressed: Arm Jenkins --- src/core/NEON/wrapper/intrinsics/mla.h | 16 ++++++++++++++++ src/core/NEON/wrapper/intrinsics/reinterpret.h | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'src/core/NEON/wrapper') diff --git a/src/core/NEON/wrapper/intrinsics/mla.h b/src/core/NEON/wrapper/intrinsics/mla.h index 2b38b34137..9fb5a08f9b 100644 --- a/src/core/NEON/wrapper/intrinsics/mla.h +++ b/src/core/NEON/wrapper/intrinsics/mla.h @@ -66,6 +66,22 @@ VMLA_IMPL2(float16x8_t, float16x8_t, vaddq, vmulq, f16) #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC #undef VMLA_IMPL + +#define VMLAL_IMPL(vtype_in, vtype_out, postfix) \ + inline vtype_out vmlal(const vtype_out &a, const vtype_in &b, const vtype_in &c) \ + { \ + return vmlal_##postfix(a, b, c); \ + } + +VMLAL_IMPL(uint8x8_t, uint16x8_t, u8) +VMLAL_IMPL(int8x8_t, int16x8_t, s8) +VMLAL_IMPL(uint16x4_t, uint32x4_t, u16) +VMLAL_IMPL(int16x4_t, int32x4_t, s16) +VMLAL_IMPL(uint32x2_t, uint64x2_t, u32) +VMLAL_IMPL(int32x2_t, int64x2_t, s32) + +#undef VMLAL_IMPL + } // namespace wrapper } // namespace arm_compute #endif /* ARM_COMPUTE_WRAPPER_MLA_H */ diff --git a/src/core/NEON/wrapper/intrinsics/reinterpret.h b/src/core/NEON/wrapper/intrinsics/reinterpret.h index 0c26cd9008..cf00a4aceb 100644 --- a/src/core/NEON/wrapper/intrinsics/reinterpret.h +++ b/src/core/NEON/wrapper/intrinsics/reinterpret.h @@ -42,7 +42,7 @@ namespace wrapper } VREINTERPRET_IMPL(int16x4_t, uint16x4_t, vreinterpret, s16, u16) - +VREINTERPRET_IMPL(int16x8_t, uint16x8_t, vreinterpretq, s16, u16) VREINTERPRET_IMPL(int32x4_t, uint32x4_t, vreinterpretq, s32, u32) } // namespace wrapper } // namespace arm_compute -- cgit v1.2.1