From dcf3c7e1591cfac19ee2b800141df3b3fe45062d Mon Sep 17 00:00:00 2001 From: Sang-Hoon Park Date: Thu, 4 Mar 2021 17:03:46 +0000 Subject: Move utility functions to NE/SVEMath To avoid unused function warnings when only partial data types are selected, the definition of functions are moved. Partially Resolves: COMPMID-4282 Change-Id: Ic30ddd3f2c88cac5978d27e5f4ada3639b5a04e5 Signed-off-by: Sang-Hoon Park Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5215 Reviewed-by: Georgios Pinitas Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- src/core/NEON/NEMath.h | 20 ++++++- src/core/NEON/NEMath.inl | 30 ++++++++++- src/core/NEON/SVEMath.h | 12 +++++ src/core/NEON/SVEMath.inl | 68 ++++++++++++++++++++++++ src/core/cpu/kernels/softmax/impl/NEON/list.h | 37 ------------- src/core/cpu/kernels/softmax/impl/SVE/list.h | 76 --------------------------- 6 files changed, 128 insertions(+), 115 deletions(-) diff --git a/src/core/NEON/NEMath.h b/src/core/NEON/NEMath.h index 877ffb2827..13484c9c15 100644 --- a/src/core/NEON/NEMath.h +++ b/src/core/NEON/NEMath.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -205,6 +205,24 @@ void convert_float32x4x4_to_uint8x16(const float32x4x4_t &in, uint8x16_t &out); */ void convert_float32x4x4_to_int8x16(const float32x4x4_t &in, int8x16_t &out); +/** Converts from float vector to integer vector + * + * @param[in] in Float vector to converted + * + * @return The converted integer vector + */ +template +int_vec_type convert_float_to_int(const float_vec_type &in); + +/** Converts from integer vector to float vector + * + * @param[in] in Integer vector to converted + * + * @return The converted float vector + */ +template +float_vec_type convert_int_to_float(const int_vec_type &in); + /** Calculate sine. * * @param[in] val Input vector value in radians, F32 format. diff --git a/src/core/NEON/NEMath.inl b/src/core/NEON/NEMath.inl index 1f5cb56dfc..da9d038139 100644 --- a/src/core/NEON/NEMath.inl +++ b/src/core/NEON/NEMath.inl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -387,6 +387,34 @@ inline void convert_float32x4x4_to_int8x16(const float32x4x4_t &in, int8x16_t &o out = vcombine_s8(vqmovn_s16(low), vqmovn_s16(high)); } +template <> +inline uint8x16_t convert_float_to_int(const float32x4x4_t &in) +{ + uint8x16_t out; + convert_float32x4x4_to_uint8x16(in, out); + return out; +} + +template <> +inline float32x4x4_t convert_int_to_float(const uint8x16_t &in) +{ + return convert_uint8x16_to_float32x4x4(in); +} + +template <> +inline int8x16_t convert_float_to_int(const float32x4x4_t &in) +{ + int8x16_t out; + convert_float32x4x4_to_int8x16(in, out); + return out; +} + +template <> +inline float32x4x4_t convert_int_to_float(const int8x16_t &in) +{ + return convert_int8x16_to_float32x4x4(in); +} + #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC /** Exponent polynomial coefficients */ /** Logarithm polynomial coefficients */ diff --git a/src/core/NEON/SVEMath.h b/src/core/NEON/SVEMath.h index 075cb457e3..b73043a435 100644 --- a/src/core/NEON/SVEMath.h +++ b/src/core/NEON/SVEMath.h @@ -171,6 +171,18 @@ svfloat32_t svpow_f32_z(svbool_t pg, svfloat32_t a, svfloat32_t b); */ svfloat16_t svpow_f16_z(svbool_t pg, svfloat16_t a, svfloat16_t b); +/** Convert and pack four 32-bit float vectors into an 8-bit integer vector + * + * @param[in] in_0 The first float vector + * @param[in] in_1 The second float vector + * @param[in] in_2 The third float vector + * @param[in] in_3 The fourth float vector + * + * @return The converted integer vector + */ +template +int_vec_type convert_float_to_int(const svfloat32_t &in_0, const svfloat32_t &in_1, const svfloat32_t &in_2, const svfloat32_t &in_3); + } // namespace arm_compute #include "src/core/NEON/SVEMath.inl" #endif /* defined(__ARM_FEATURE_SVE) */ diff --git a/src/core/NEON/SVEMath.inl b/src/core/NEON/SVEMath.inl index cf7f9f5a95..a851b8a07b 100644 --- a/src/core/NEON/SVEMath.inl +++ b/src/core/NEON/SVEMath.inl @@ -325,5 +325,73 @@ inline svfloat16_t svpow_f16_z(svbool_t pg, svfloat16_t a, svfloat16_t b) #endif /* defined(__ARM_FEATURE_SVE2) */ } +template <> +inline svuint8_t convert_float_to_int(const svfloat32_t &in_0, const svfloat32_t &in_1, const svfloat32_t &in_2, const svfloat32_t &in_3) +{ + svuint8_t out; + const auto all_true_pg = svptrue_b32(); + auto tmp_0 = svcvt_u32_f32_z(all_true_pg, in_0); + auto tmp_1 = svcvt_u32_f32_z(all_true_pg, in_1); + auto tmp_2 = svcvt_u32_f32_z(all_true_pg, in_2); + auto tmp_3 = svcvt_u32_f32_z(all_true_pg, in_3); + + auto tmp_16_0 = svqxtnt_u32(svqxtnb_u32(tmp_0), tmp_1); + auto tmp_16_1 = svqxtnt_u32(svqxtnb_u32(tmp_2), tmp_3); + + auto tmp_16_uzp_0 = svuzp1(tmp_16_0, tmp_16_0); + auto tmp_16_uzp_1 = svuzp2(tmp_16_0, tmp_16_0); + auto tmp_16_uzp_2 = svuzp1(tmp_16_1, tmp_16_1); + auto tmp_16_uzp_3 = svuzp2(tmp_16_1, tmp_16_1); + + auto pg = svwhilelt_b16_s32(0, svcnth() / 2); + + tmp_16_0 = svsplice(pg, tmp_16_uzp_0, tmp_16_uzp_1); + tmp_16_1 = svsplice(pg, tmp_16_uzp_2, tmp_16_uzp_3); + + out = svqxtnt_u16(svqxtnb_u16(tmp_16_0), tmp_16_1); + + auto out_uzp_0 = svuzp1(out, out); + auto out_uzp_1 = svuzp2(out, out); + + pg = svwhilelt_b8_s32(0, svcntb() / 2); + out = svsplice(pg, out_uzp_0, out_uzp_1); + + return out; +} + +template <> +inline svint8_t convert_float_to_int(const svfloat32_t &in_0, const svfloat32_t &in_1, const svfloat32_t &in_2, const svfloat32_t &in_3) +{ + svint8_t out; + const auto all_true_pg = svptrue_b32(); + auto tmp_0 = svcvt_s32_f32_z(all_true_pg, in_0); + auto tmp_1 = svcvt_s32_f32_z(all_true_pg, in_1); + auto tmp_2 = svcvt_s32_f32_z(all_true_pg, in_2); + auto tmp_3 = svcvt_s32_f32_z(all_true_pg, in_3); + + auto tmp_16_0 = svqxtnt_s32(svqxtnb_s32(tmp_0), tmp_1); + auto tmp_16_1 = svqxtnt_s32(svqxtnb_s32(tmp_2), tmp_3); + + auto tmp_16_uzp_0 = svuzp1(tmp_16_0, tmp_16_0); + auto tmp_16_uzp_1 = svuzp2(tmp_16_0, tmp_16_0); + auto tmp_16_uzp_2 = svuzp1(tmp_16_1, tmp_16_1); + auto tmp_16_uzp_3 = svuzp2(tmp_16_1, tmp_16_1); + + auto pg = svwhilelt_b16_s32(0, svcnth() / 2); + + tmp_16_0 = svsplice(pg, tmp_16_uzp_0, tmp_16_uzp_1); + tmp_16_1 = svsplice(pg, tmp_16_uzp_2, tmp_16_uzp_3); + + out = svqxtnt_s16(svqxtnb_s16(tmp_16_0), tmp_16_1); + + auto out_uzp_0 = svuzp1(out, out); + auto out_uzp_1 = svuzp2(out, out); + + pg = svwhilelt_b8_s32(0, svcntb() / 2); + out = svsplice(pg, out_uzp_0, out_uzp_1); + + return out; +} + } // namespace arm_compute #endif /* defined(__ARM_FEATURE_SVE) */ diff --git a/src/core/cpu/kernels/softmax/impl/NEON/list.h b/src/core/cpu/kernels/softmax/impl/NEON/list.h index 3f9438e0c7..740e6ea9bc 100644 --- a/src/core/cpu/kernels/softmax/impl/NEON/list.h +++ b/src/core/cpu/kernels/softmax/impl/NEON/list.h @@ -33,43 +33,6 @@ namespace arm_compute { namespace cpu { -namespace -{ -template -int_vec_type convert_float_to_int(const float_vec_type &in); - -template -float_vec_type convert_int_to_float(const int_vec_type &in); - -template <> -uint8x16_t convert_float_to_int(const float32x4x4_t &in) -{ - uint8x16_t out; - convert_float32x4x4_to_uint8x16(in, out); - return out; -} - -template <> -int8x16_t convert_float_to_int(const float32x4x4_t &in) -{ - int8x16_t out; - convert_float32x4x4_to_int8x16(in, out); - return out; -} - -template <> -float32x4x4_t convert_int_to_float(const uint8x16_t &in) -{ - return convert_uint8x16_to_float32x4x4(in); -} - -template <> -float32x4x4_t convert_int_to_float(const int8x16_t &in) -{ - return convert_int8x16_to_float32x4x4(in); -} -} // namespace - template void neon_logits_1d_max(const ITensor *in, ITensor *out, const Window &window) { diff --git a/src/core/cpu/kernels/softmax/impl/SVE/list.h b/src/core/cpu/kernels/softmax/impl/SVE/list.h index 0936bd5a56..d558d7d193 100644 --- a/src/core/cpu/kernels/softmax/impl/SVE/list.h +++ b/src/core/cpu/kernels/softmax/impl/SVE/list.h @@ -35,82 +35,6 @@ namespace arm_compute { namespace cpu { -namespace -{ -#if defined(__ARM_FEATURE_SVE2) -template -int_vec_type convert_float_to_int(const svfloat32_t &in_0, const svfloat32_t &in_1, const svfloat32_t &in_2, const svfloat32_t &in_3); - -template <> -svuint8_t convert_float_to_int(const svfloat32_t &in_0, const svfloat32_t &in_1, const svfloat32_t &in_2, const svfloat32_t &in_3) -{ - svuint8_t out; - const auto all_true_pg = svptrue_b32(); - auto tmp_0 = svcvt_u32_f32_z(all_true_pg, in_0); - auto tmp_1 = svcvt_u32_f32_z(all_true_pg, in_1); - auto tmp_2 = svcvt_u32_f32_z(all_true_pg, in_2); - auto tmp_3 = svcvt_u32_f32_z(all_true_pg, in_3); - - auto tmp_16_0 = svqxtnt_u32(svqxtnb_u32(tmp_0), tmp_1); - auto tmp_16_1 = svqxtnt_u32(svqxtnb_u32(tmp_2), tmp_3); - - auto tmp_16_uzp_0 = svuzp1(tmp_16_0, tmp_16_0); - auto tmp_16_uzp_1 = svuzp2(tmp_16_0, tmp_16_0); - auto tmp_16_uzp_2 = svuzp1(tmp_16_1, tmp_16_1); - auto tmp_16_uzp_3 = svuzp2(tmp_16_1, tmp_16_1); - - auto pg = svwhilelt_b16_s32(0, svcnth() / 2); - - tmp_16_0 = svsplice(pg, tmp_16_uzp_0, tmp_16_uzp_1); - tmp_16_1 = svsplice(pg, tmp_16_uzp_2, tmp_16_uzp_3); - - out = svqxtnt_u16(svqxtnb_u16(tmp_16_0), tmp_16_1); - - auto out_uzp_0 = svuzp1(out, out); - auto out_uzp_1 = svuzp2(out, out); - - pg = svwhilelt_b8_s32(0, svcntb() / 2); - out = svsplice(pg, out_uzp_0, out_uzp_1); - - return out; -} - -template <> -svint8_t convert_float_to_int(const svfloat32_t &in_0, const svfloat32_t &in_1, const svfloat32_t &in_2, const svfloat32_t &in_3) -{ - svint8_t out; - const auto all_true_pg = svptrue_b32(); - auto tmp_0 = svcvt_s32_f32_z(all_true_pg, in_0); - auto tmp_1 = svcvt_s32_f32_z(all_true_pg, in_1); - auto tmp_2 = svcvt_s32_f32_z(all_true_pg, in_2); - auto tmp_3 = svcvt_s32_f32_z(all_true_pg, in_3); - - auto tmp_16_0 = svqxtnt_s32(svqxtnb_s32(tmp_0), tmp_1); - auto tmp_16_1 = svqxtnt_s32(svqxtnb_s32(tmp_2), tmp_3); - - auto tmp_16_uzp_0 = svuzp1(tmp_16_0, tmp_16_0); - auto tmp_16_uzp_1 = svuzp2(tmp_16_0, tmp_16_0); - auto tmp_16_uzp_2 = svuzp1(tmp_16_1, tmp_16_1); - auto tmp_16_uzp_3 = svuzp2(tmp_16_1, tmp_16_1); - - auto pg = svwhilelt_b16_s32(0, svcnth() / 2); - - tmp_16_0 = svsplice(pg, tmp_16_uzp_0, tmp_16_uzp_1); - tmp_16_1 = svsplice(pg, tmp_16_uzp_2, tmp_16_uzp_3); - - out = svqxtnt_s16(svqxtnb_s16(tmp_16_0), tmp_16_1); - - auto out_uzp_0 = svuzp1(out, out); - auto out_uzp_1 = svuzp2(out, out); - - pg = svwhilelt_b8_s32(0, svcntb() / 2); - out = svsplice(pg, out_uzp_0, out_uzp_1); - - return out; -} -#endif /* defined(__ARM_FEATURE_SVE2) */ -} // namespace - template void sve_logits_1d_max(const ITensor *in, ITensor *out, const Window &window) { -- cgit v1.2.1