diff options
author | Sang-Hoon Park <sang-hoon.park@arm.com> | 2021-03-04 17:03:46 +0000 |
---|---|---|
committer | Sang-Hoon Park <sang-hoon.park@arm.com> | 2021-03-05 09:22:27 +0000 |
commit | dcf3c7e1591cfac19ee2b800141df3b3fe45062d (patch) | |
tree | 3384387140c8955c24269f5e52788cdfb1de48d5 /src/core/cpu/kernels | |
parent | ca419dde35118fcfe07fa0a5ce388c0a40b75c49 (diff) | |
download | ComputeLibrary-dcf3c7e1591cfac19ee2b800141df3b3fe45062d.tar.gz |
Move utility functions to NE/SVEMath
To avoid unused function warnings when only
partial data types are selected, the definition
of functions are moved.
Partially Resolves: COMPMID-4282
Change-Id: Ic30ddd3f2c88cac5978d27e5f4ada3639b5a04e5
Signed-off-by: Sang-Hoon Park <sang-hoon.park@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5215
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/cpu/kernels')
-rw-r--r-- | src/core/cpu/kernels/softmax/impl/NEON/list.h | 37 | ||||
-rw-r--r-- | src/core/cpu/kernels/softmax/impl/SVE/list.h | 76 |
2 files changed, 0 insertions, 113 deletions
diff --git a/src/core/cpu/kernels/softmax/impl/NEON/list.h b/src/core/cpu/kernels/softmax/impl/NEON/list.h index 3f9438e0c7..740e6ea9bc 100644 --- a/src/core/cpu/kernels/softmax/impl/NEON/list.h +++ b/src/core/cpu/kernels/softmax/impl/NEON/list.h @@ -33,43 +33,6 @@ namespace arm_compute { namespace cpu { -namespace -{ -template <typename float_vec_type, typename int_vec_type> -int_vec_type convert_float_to_int(const float_vec_type &in); - -template <typename float_vec_type, typename int_vec_type> -float_vec_type convert_int_to_float(const int_vec_type &in); - -template <> -uint8x16_t convert_float_to_int<float32x4x4_t, uint8x16_t>(const float32x4x4_t &in) -{ - uint8x16_t out; - convert_float32x4x4_to_uint8x16(in, out); - return out; -} - -template <> -int8x16_t convert_float_to_int<float32x4x4_t, int8x16_t>(const float32x4x4_t &in) -{ - int8x16_t out; - convert_float32x4x4_to_int8x16(in, out); - return out; -} - -template <> -float32x4x4_t convert_int_to_float<float32x4x4_t, uint8x16_t>(const uint8x16_t &in) -{ - return convert_uint8x16_to_float32x4x4(in); -} - -template <> -float32x4x4_t convert_int_to_float<float32x4x4_t, int8x16_t>(const int8x16_t &in) -{ - return convert_int8x16_to_float32x4x4(in); -} -} // namespace - template <typename T> void neon_logits_1d_max(const ITensor *in, ITensor *out, const Window &window) { diff --git a/src/core/cpu/kernels/softmax/impl/SVE/list.h b/src/core/cpu/kernels/softmax/impl/SVE/list.h index 0936bd5a56..d558d7d193 100644 --- a/src/core/cpu/kernels/softmax/impl/SVE/list.h +++ b/src/core/cpu/kernels/softmax/impl/SVE/list.h @@ -35,82 +35,6 @@ namespace arm_compute { namespace cpu { -namespace -{ -#if defined(__ARM_FEATURE_SVE2) -template <typename int_vec_type> -int_vec_type convert_float_to_int(const svfloat32_t &in_0, const svfloat32_t &in_1, const svfloat32_t &in_2, const svfloat32_t &in_3); - -template <> -svuint8_t convert_float_to_int<svuint8_t>(const svfloat32_t &in_0, const svfloat32_t &in_1, const svfloat32_t &in_2, const svfloat32_t &in_3) -{ - svuint8_t out; - const auto all_true_pg = svptrue_b32(); - auto tmp_0 = svcvt_u32_f32_z(all_true_pg, in_0); - auto tmp_1 = svcvt_u32_f32_z(all_true_pg, in_1); - auto tmp_2 = svcvt_u32_f32_z(all_true_pg, in_2); - auto tmp_3 = svcvt_u32_f32_z(all_true_pg, in_3); - - auto tmp_16_0 = svqxtnt_u32(svqxtnb_u32(tmp_0), tmp_1); - auto tmp_16_1 = svqxtnt_u32(svqxtnb_u32(tmp_2), tmp_3); - - auto tmp_16_uzp_0 = svuzp1(tmp_16_0, tmp_16_0); - auto tmp_16_uzp_1 = svuzp2(tmp_16_0, tmp_16_0); - auto tmp_16_uzp_2 = svuzp1(tmp_16_1, tmp_16_1); - auto tmp_16_uzp_3 = svuzp2(tmp_16_1, tmp_16_1); - - auto pg = svwhilelt_b16_s32(0, svcnth() / 2); - - tmp_16_0 = svsplice(pg, tmp_16_uzp_0, tmp_16_uzp_1); - tmp_16_1 = svsplice(pg, tmp_16_uzp_2, tmp_16_uzp_3); - - out = svqxtnt_u16(svqxtnb_u16(tmp_16_0), tmp_16_1); - - auto out_uzp_0 = svuzp1(out, out); - auto out_uzp_1 = svuzp2(out, out); - - pg = svwhilelt_b8_s32(0, svcntb() / 2); - out = svsplice(pg, out_uzp_0, out_uzp_1); - - return out; -} - -template <> -svint8_t convert_float_to_int<svint8_t>(const svfloat32_t &in_0, const svfloat32_t &in_1, const svfloat32_t &in_2, const svfloat32_t &in_3) -{ - svint8_t out; - const auto all_true_pg = svptrue_b32(); - auto tmp_0 = svcvt_s32_f32_z(all_true_pg, in_0); - auto tmp_1 = svcvt_s32_f32_z(all_true_pg, in_1); - auto tmp_2 = svcvt_s32_f32_z(all_true_pg, in_2); - auto tmp_3 = svcvt_s32_f32_z(all_true_pg, in_3); - - auto tmp_16_0 = svqxtnt_s32(svqxtnb_s32(tmp_0), tmp_1); - auto tmp_16_1 = svqxtnt_s32(svqxtnb_s32(tmp_2), tmp_3); - - auto tmp_16_uzp_0 = svuzp1(tmp_16_0, tmp_16_0); - auto tmp_16_uzp_1 = svuzp2(tmp_16_0, tmp_16_0); - auto tmp_16_uzp_2 = svuzp1(tmp_16_1, tmp_16_1); - auto tmp_16_uzp_3 = svuzp2(tmp_16_1, tmp_16_1); - - auto pg = svwhilelt_b16_s32(0, svcnth() / 2); - - tmp_16_0 = svsplice(pg, tmp_16_uzp_0, tmp_16_uzp_1); - tmp_16_1 = svsplice(pg, tmp_16_uzp_2, tmp_16_uzp_3); - - out = svqxtnt_s16(svqxtnb_s16(tmp_16_0), tmp_16_1); - - auto out_uzp_0 = svuzp1(out, out); - auto out_uzp_1 = svuzp2(out, out); - - pg = svwhilelt_b8_s32(0, svcntb() / 2); - out = svsplice(pg, out_uzp_0, out_uzp_1); - - return out; -} -#endif /* defined(__ARM_FEATURE_SVE2) */ -} // namespace - template <typename ScalarType> void sve_logits_1d_max(const ITensor *in, ITensor *out, const Window &window) { |