aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/SVEMath.inl
diff options
context:
space:
mode:
authorSang-Hoon Park <sang-hoon.park@arm.com>2021-03-04 17:03:46 +0000
committerSang-Hoon Park <sang-hoon.park@arm.com>2021-03-05 09:22:27 +0000
commitdcf3c7e1591cfac19ee2b800141df3b3fe45062d (patch)
tree3384387140c8955c24269f5e52788cdfb1de48d5 /src/core/NEON/SVEMath.inl
parentca419dde35118fcfe07fa0a5ce388c0a40b75c49 (diff)
downloadComputeLibrary-dcf3c7e1591cfac19ee2b800141df3b3fe45062d.tar.gz
Move utility functions to NE/SVEMath
To avoid unused function warnings when only partial data types are selected, the definition of functions are moved. Partially Resolves: COMPMID-4282 Change-Id: Ic30ddd3f2c88cac5978d27e5f4ada3639b5a04e5 Signed-off-by: Sang-Hoon Park <sang-hoon.park@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5215 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/SVEMath.inl')
-rw-r--r--src/core/NEON/SVEMath.inl68
1 files changed, 68 insertions, 0 deletions
diff --git a/src/core/NEON/SVEMath.inl b/src/core/NEON/SVEMath.inl
index cf7f9f5a95..a851b8a07b 100644
--- a/src/core/NEON/SVEMath.inl
+++ b/src/core/NEON/SVEMath.inl
@@ -325,5 +325,73 @@ inline svfloat16_t svpow_f16_z(svbool_t pg, svfloat16_t a, svfloat16_t b)
#endif /* defined(__ARM_FEATURE_SVE2) */
}
+template <>
+inline svuint8_t convert_float_to_int<svuint8_t>(const svfloat32_t &in_0, const svfloat32_t &in_1, const svfloat32_t &in_2, const svfloat32_t &in_3)
+{
+ svuint8_t out;
+ const auto all_true_pg = svptrue_b32();
+ auto tmp_0 = svcvt_u32_f32_z(all_true_pg, in_0);
+ auto tmp_1 = svcvt_u32_f32_z(all_true_pg, in_1);
+ auto tmp_2 = svcvt_u32_f32_z(all_true_pg, in_2);
+ auto tmp_3 = svcvt_u32_f32_z(all_true_pg, in_3);
+
+ auto tmp_16_0 = svqxtnt_u32(svqxtnb_u32(tmp_0), tmp_1);
+ auto tmp_16_1 = svqxtnt_u32(svqxtnb_u32(tmp_2), tmp_3);
+
+ auto tmp_16_uzp_0 = svuzp1(tmp_16_0, tmp_16_0);
+ auto tmp_16_uzp_1 = svuzp2(tmp_16_0, tmp_16_0);
+ auto tmp_16_uzp_2 = svuzp1(tmp_16_1, tmp_16_1);
+ auto tmp_16_uzp_3 = svuzp2(tmp_16_1, tmp_16_1);
+
+ auto pg = svwhilelt_b16_s32(0, svcnth() / 2);
+
+ tmp_16_0 = svsplice(pg, tmp_16_uzp_0, tmp_16_uzp_1);
+ tmp_16_1 = svsplice(pg, tmp_16_uzp_2, tmp_16_uzp_3);
+
+ out = svqxtnt_u16(svqxtnb_u16(tmp_16_0), tmp_16_1);
+
+ auto out_uzp_0 = svuzp1(out, out);
+ auto out_uzp_1 = svuzp2(out, out);
+
+ pg = svwhilelt_b8_s32(0, svcntb() / 2);
+ out = svsplice(pg, out_uzp_0, out_uzp_1);
+
+ return out;
+}
+
+template <>
+inline svint8_t convert_float_to_int<svint8_t>(const svfloat32_t &in_0, const svfloat32_t &in_1, const svfloat32_t &in_2, const svfloat32_t &in_3)
+{
+ svint8_t out;
+ const auto all_true_pg = svptrue_b32();
+ auto tmp_0 = svcvt_s32_f32_z(all_true_pg, in_0);
+ auto tmp_1 = svcvt_s32_f32_z(all_true_pg, in_1);
+ auto tmp_2 = svcvt_s32_f32_z(all_true_pg, in_2);
+ auto tmp_3 = svcvt_s32_f32_z(all_true_pg, in_3);
+
+ auto tmp_16_0 = svqxtnt_s32(svqxtnb_s32(tmp_0), tmp_1);
+ auto tmp_16_1 = svqxtnt_s32(svqxtnb_s32(tmp_2), tmp_3);
+
+ auto tmp_16_uzp_0 = svuzp1(tmp_16_0, tmp_16_0);
+ auto tmp_16_uzp_1 = svuzp2(tmp_16_0, tmp_16_0);
+ auto tmp_16_uzp_2 = svuzp1(tmp_16_1, tmp_16_1);
+ auto tmp_16_uzp_3 = svuzp2(tmp_16_1, tmp_16_1);
+
+ auto pg = svwhilelt_b16_s32(0, svcnth() / 2);
+
+ tmp_16_0 = svsplice(pg, tmp_16_uzp_0, tmp_16_uzp_1);
+ tmp_16_1 = svsplice(pg, tmp_16_uzp_2, tmp_16_uzp_3);
+
+ out = svqxtnt_s16(svqxtnb_s16(tmp_16_0), tmp_16_1);
+
+ auto out_uzp_0 = svuzp1(out, out);
+ auto out_uzp_1 = svuzp2(out, out);
+
+ pg = svwhilelt_b8_s32(0, svcntb() / 2);
+ out = svsplice(pg, out_uzp_0, out_uzp_1);
+
+ return out;
+}
+
} // namespace arm_compute
#endif /* defined(__ARM_FEATURE_SVE) */