From 0870db478fc46213138bcfd6e1536610ea145ecf Mon Sep 17 00:00:00 2001 From: Sang-Hoon Park Date: Tue, 8 Dec 2020 18:42:19 +0000 Subject: Add utility functions for SVE - Few bit-width dependent intrinsics are added. - Few math functions are added. Partially implements: COMPMID-3872 Change-Id: Ia6ab46bd170fec9c7c8d4410b7ef4d84710b68ed Signed-off-by: Sang-Hoon Park Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4718 Tested-by: Arm Jenkins Reviewed-by: Michalis Spyrou Comments-Addressed: Arm Jenkins --- src/core/NEON/SVEMath.h | 62 +++++++ src/core/NEON/SVEMath.inl | 201 +++++++++++++++-------- src/core/NEON/wrapper/intrinsics/intrinsics.h | 11 ++ src/core/NEON/wrapper/intrinsics/svcnt.h | 68 ++++++++ src/core/NEON/wrapper/intrinsics/svcvt.h | 74 +++++++++ src/core/NEON/wrapper/intrinsics/svdup_n.h | 57 +++++++ src/core/NEON/wrapper/intrinsics/svexp.h | 49 ++++++ src/core/NEON/wrapper/intrinsics/svlog.h | 47 ++++++ src/core/NEON/wrapper/intrinsics/svpow.h | 47 ++++++ src/core/NEON/wrapper/intrinsics/svptrue.h | 68 ++++++++ src/core/NEON/wrapper/intrinsics/svreinterpret.h | 57 +++++++ src/core/NEON/wrapper/intrinsics/svsin.h | 47 ++++++ src/core/NEON/wrapper/intrinsics/svwhilelt.h | 73 ++++++++ 13 files changed, 792 insertions(+), 69 deletions(-) create mode 100644 src/core/NEON/wrapper/intrinsics/svcnt.h create mode 100644 src/core/NEON/wrapper/intrinsics/svcvt.h create mode 100644 src/core/NEON/wrapper/intrinsics/svdup_n.h create mode 100644 src/core/NEON/wrapper/intrinsics/svexp.h create mode 100644 src/core/NEON/wrapper/intrinsics/svlog.h create mode 100644 src/core/NEON/wrapper/intrinsics/svpow.h create mode 100644 src/core/NEON/wrapper/intrinsics/svptrue.h create mode 100644 src/core/NEON/wrapper/intrinsics/svreinterpret.h create mode 100644 src/core/NEON/wrapper/intrinsics/svsin.h create mode 100644 src/core/NEON/wrapper/intrinsics/svwhilelt.h (limited to 'src/core') diff --git a/src/core/NEON/SVEMath.h b/src/core/NEON/SVEMath.h index bdf2e894e2..490759c789 100644 --- a/src/core/NEON/SVEMath.h +++ b/src/core/NEON/SVEMath.h @@ -25,6 +25,10 @@ #define ARM_COMPUTE_SVEMATH_H #if defined(__ARM_FEATURE_SVE) +#include "src/core/NEON/wrapper/intrinsics/svcvt.h" +#include "src/core/NEON/wrapper/intrinsics/svdup_n.h" +#include "src/core/NEON/wrapper/intrinsics/svreinterpret.h" +#include "src/core/common/StdTypes.h" #include #include @@ -110,6 +114,64 @@ svfloat16_t svinv_f16_z(svbool_t pg, svfloat16_t x); */ svfloat16_t svlog_f16_z(svbool_t pg, svfloat16_t x); +/** Calculate inverse square root. + * + * @param[in] pg Input reciprocal. + * @param[in] val Input value. + * + * @return The calculated inverse square root. + */ +template +inline VectorType svinvsqrt(svbool_t pg, VectorType val) +{ + auto sqrt_reciprocal = svrsqrte(val); + sqrt_reciprocal = svmul_z(pg, svrsqrts(svmul_z(pg, val, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal); + sqrt_reciprocal = svmul_z(pg, svrsqrts(svmul_z(pg, val, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal); + return sqrt_reciprocal; +} + +/** Calculate sine. + * + * @param[in] pg Input reciprocal. + * @param[in] val Input vector value in radians, F32 format. + * + * @return The calculated sine. + */ +svfloat32_t svsin_f32_z(svbool_t pg, svfloat32_t val); + +/** Calculate sine. + * + * @param[in] pg Input reciprocal. + * @param[in] val Input vector value in radians, F16 format. + * + * @return The calculated sine. + */ +svfloat16_t svsin_f16_z(svbool_t pg, svfloat16_t val); + +/** Calculate n power of a number. + * + * pow(x,n) = e^(n*log(x)) + * + * @param[in] pg Input reciprocal. + * @param[in] a Input vector value in F32 format. + * @param[in] b Powers to raise the input to. + * + * @return The calculated power. + */ +svfloat32_t svpow_f32_z(svbool_t pg, svfloat32_t a, svfloat32_t b); + +/** Calculate n power of a number. + * + * pow(x,n) = e^(n*log(x)) + * + * @param[in] pg Input reciprocal. + * @param[in] a Input vector value in F16 format. + * @param[in] b Powers to raise the input to. + * + * @return The calculated power. + */ +svfloat16_t svpow_f16_z(svbool_t pg, svfloat16_t a, svfloat16_t b); + } // namespace arm_compute #include "src/core/NEON/SVEMath.inl" #endif /* defined(__ARM_FEATURE_SVE) */ diff --git a/src/core/NEON/SVEMath.inl b/src/core/NEON/SVEMath.inl index 5ebfeaa5c5..86592f6dc3 100644 --- a/src/core/NEON/SVEMath.inl +++ b/src/core/NEON/SVEMath.inl @@ -26,6 +26,10 @@ #if defined(__ARM_FEATURE_SVE) +#ifndef M_PI +#define M_PI (3.14159265358979323846) +#endif // M_PI + namespace arm_compute { inline svfloat32_t svtaylor_poly_f32_z(svbool_t pg, svfloat32_t x, const std::array &coeffs) @@ -115,47 +119,23 @@ inline svfloat32_t svexp_f32_z(svbool_t pg, svfloat32_t x) inline svfloat16_t svexp_f16_z(svbool_t pg, svfloat16_t x) { - const auto CONST_LN2 = svdup_n_f16(0.6931471805f); // ln(2) - const auto CONST_INV_LN2 = svdup_n_f16(1.4426950408f); // 1/ln(2) - const auto CONST_INF = svdup_n_f16(std::numeric_limits::infinity()); - const auto CONST_MAX_INPUT = svdup_n_f16(88.7f); - const auto CONST_0 = svdup_n_f16(0.f); - const auto CONST_NEGATIVE_126 = svdup_n_s16(-126); - - /** Exponent polynomial coefficients */ - const std::array exp_tab = - { - { - svdup_n_f16(1.f), - svdup_n_f16(0.0416598916054f), - svdup_n_f16(0.500000596046f), - svdup_n_f16(0.0014122662833f), - svdup_n_f16(1.00000011921f), - svdup_n_f16(0.00833693705499f), - svdup_n_f16(0.166665703058f), - svdup_n_f16(0.000195780929062f), - } - }; - - // Perform range reduction [-log(2),log(2)] - auto m = svcvt_s16_f16_z(pg, svmul_f16_z(pg, x, CONST_INV_LN2)); - auto val = svmls_f16_z(pg, x, svcvt_f16_s16_z(pg, m), CONST_LN2); - - // Polynomial Approximation - auto poly = svtaylor_poly_f16_z(pg, val, exp_tab); - - // Reconstruct - poly = svreinterpret_f16_s16(svqadd_s16(svreinterpret_s16_f16(poly), svlsl_n_s16_z(pg, m, 11))); - - // Handle underflow - svbool_t ltpg = svcmplt_s16(pg, m, CONST_NEGATIVE_126); - poly = svsel_f16(ltpg, CONST_0, poly); + auto bottom = svcvt_f32_z(pg, x); +#if defined(__ARM_FEATURE_SVE2) + auto top = svcvtlt_f32_x(pg, x); + auto pg_top = pg; +#else /* defined(__ARM_FEATURE_SVE2) */ + auto pg_top = svptrue_b16(); + auto top = svcvt_f32_z(pg_top, svreinterpret_f16(svrevh_z(svptrue_b16(), svreinterpret_u32(x)))); +#endif /* defined(__ARM_FEATURE_SVE2) */ - // Handle overflow - svbool_t gtpg = svcmpgt_f16(pg, x, CONST_MAX_INPUT); - poly = svsel_f16(gtpg, CONST_INF, poly); + bottom = svexp_f32_z(pg, bottom); + top = svexp_f32_z(pg_top, top); - return poly; +#if defined(__ARM_FEATURE_SVE2) + return svcvtnt_f16_m(svcvt_f16_z(pg, bottom), pg_top, top); +#else /* defined(__ARM_FEATURE_SVE2) */ + return svtrn1(svcvt_f16_z(pg, bottom), svcvt_f16_z(pg_top, top)); +#endif /* defined(__ARM_FEATURE_SVE2) */ } inline svfloat32_t svtanh_f32_z(svbool_t pg, svfloat32_t val) @@ -190,9 +170,6 @@ inline svfloat16_t svtanh_f16_z(svbool_t pg, svfloat16_t val) inline svfloat32_t svlog_f32_z(svbool_t pg, svfloat32_t x) { -#if defined(__ARM_FEATURE_SVE2) - return svcvt_f32_s32_z(pg, svlogb_f32_z(pg, x)); -#else /* !defined(__ARM_FEATURE_SVE2) */ /** Logarithm polynomial coefficients */ const std::array log_tab = { @@ -222,45 +199,131 @@ inline svfloat32_t svlog_f32_z(svbool_t pg, svfloat32_t x) poly = svmla_f32_z(pg, poly, svcvt_f32_s32_z(pg, m), CONST_LN2); return poly; -#endif /* defined(__ARM_FEATURE_SVE2) */ } inline svfloat16_t svlog_f16_z(svbool_t pg, svfloat16_t x) { + auto bottom = svcvt_f32_z(pg, x); #if defined(__ARM_FEATURE_SVE2) - return svcvt_f16_s16_z(pg, svlogb_f16_z(pg, x)); -#else /* !defined(__ARM_FEATURE_SVE2) */ + auto top = svcvtlt_f32_x(pg, x); + auto pg_top = pg; +#else /* defined(__ARM_FEATURE_SVE2) */ + auto pg_top = svptrue_b16(); + auto top = svcvt_f32_z(pg_top, svreinterpret_f16(svrevh_z(svptrue_b16(), svreinterpret_u32(x)))); +#endif /* defined(__ARM_FEATURE_SVE2) */ - /** Logarithm polynomial coefficients */ - const std::array log_tab - { - { - svdup_n_f16(-2.29561495781f), - svdup_n_f16(-2.47071170807f), - svdup_n_f16(-5.68692588806f), - svdup_n_f16(-0.165253549814f), - svdup_n_f16(5.17591238022f), - svdup_n_f16(0.844007015228f), - svdup_n_f16(4.58445882797f), - svdup_n_f16(0.0141278216615f), - } - }; + bottom = svlog_f32_z(pg, bottom); + top = svlog_f32_z(pg_top, top); - const auto CONST_7 = svdup_n_s16(7); // 7 - const auto CONST_LN2 = svdup_n_f16(0.6931471805f); // ln(2) +#if defined(__ARM_FEATURE_SVE2) + return svcvtnt_f16_m(svcvt_f16_z(pg, bottom), pg_top, top); +#else /* defined(__ARM_FEATURE_SVE2) */ + return svtrn1(svcvt_f16_z(pg, bottom), svcvt_f16_z(pg_top, top)); +#endif /* defined(__ARM_FEATURE_SVE2) */ +} - // Extract exponent - auto m = svsub_s16_z(pg, svasr_n_s16_z(pg, svreinterpret_s16_f16(x), 11), CONST_7); - auto val = svreinterpret_f16_s16(svsub_s16_z(pg, svreinterpret_s16_f16(x), svlsl_n_s16_z(pg, m, 11))); +inline svfloat32_t svsin_f32_z(svbool_t pg, svfloat32_t val) +{ + using ScalarType = float; + using IntType = u32; - // Polynomial Approximation - auto poly = svtaylor_poly_f16_z(pg, val, log_tab); + constexpr float te_sin_coeff2 = 0.166666666666f; // 1/(2*3) + constexpr float te_sin_coeff3 = 0.05f; // 1/(4*5) + constexpr float te_sin_coeff4 = 0.023809523810f; // 1/(6*7) + constexpr float te_sin_coeff5 = 0.013888888889f; // 1/(8*9) - // Reconstruct - poly = svmla_f16_z(pg, poly, svcvt_f16_s16_z(pg, m), CONST_LN2); + const auto pi_v = wrapper::svdup_n(ScalarType(M_PI)); + const auto pio2_v = wrapper::svdup_n(ScalarType(M_PI / 2)); + const auto ipi_v = wrapper::svdup_n(ScalarType(1 / M_PI)); - return poly; + //Find positive or negative + const auto c_v = svabs_z(pg, wrapper::svcvt_z(pg, svmul_z(pg, val, ipi_v))); + const auto sign_v = svcmple(pg, val, wrapper::svdup_n(ScalarType(0))); + const auto odd_v = svcmpne(pg, svand_z(pg, wrapper::svreinterpret(c_v), wrapper::svdup_n(IntType(1))), wrapper::svdup_n(IntType(0))); + + auto neg_v = sveor_z(pg, odd_v, sign_v); + + //Modulus a - (n * int(a*(1/n))) + auto ma = svsub_z(pg, svabs_z(pg, val), svmul_z(pg, pi_v, wrapper::svcvt_z(pg, c_v))); + const auto reb_v = svcmpge(pg, ma, pio2_v); + + //Rebase a between 0 and pi/2 + ma = svsel(reb_v, svsub_z(pg, pi_v, ma), ma); + + //Taylor series + const auto ma2 = svmul_z(pg, ma, ma); + + //2nd elem: x^3 / 3! + auto elem = svmul_z(pg, svmul_z(pg, ma, ma2), wrapper::svdup_n(ScalarType(te_sin_coeff2))); + auto res = svsub_z(pg, ma, elem); + + //3rd elem: x^5 / 5! + elem = svmul_z(pg, svmul_z(pg, elem, ma2), wrapper::svdup_n(ScalarType(te_sin_coeff3))); + res = svadd_z(pg, res, elem); + + //4th elem: x^7 / 7!float32x2_t vsin_f32(float32x2_t val) + elem = svmul_z(pg, svmul_z(pg, elem, ma2), wrapper::svdup_n(ScalarType(te_sin_coeff4))); + res = svsub_z(pg, res, elem); + + //5th elem: x^9 / 9! + elem = svmul_z(pg, svmul_z(pg, elem, ma2), wrapper::svdup_n(ScalarType(te_sin_coeff5))); + res = svadd_z(pg, res, elem); + + //Change of sign + res = svneg_m(res, neg_v, res); + return res; +} + +inline svfloat16_t svsin_f16_z(svbool_t pg, svfloat16_t val) +{ + auto bottom = svcvt_f32_z(pg, val); +#if defined(__ARM_FEATURE_SVE2) + auto top = svcvtlt_f32_x(pg, val); + auto pg_top = pg; +#else /* defined(__ARM_FEATURE_SVE2) */ + auto pg_top = svptrue_b16(); + auto top = svcvt_f32_z(pg_top, svreinterpret_f16(svrevh_z(svptrue_b16(), svreinterpret_u32(val)))); +#endif /* defined(__ARM_FEATURE_SVE2) */ + + bottom = svsin_f32_z(pg, bottom); + top = svsin_f32_z(pg_top, top); + +#if defined(__ARM_FEATURE_SVE2) + return svcvtnt_f16_m(svcvt_f16_z(pg, bottom), pg_top, top); +#else /* defined(__ARM_FEATURE_SVE2) */ + return svtrn1(svcvt_f16_z(pg, bottom), svcvt_f16_z(pg_top, top)); #endif /* defined(__ARM_FEATURE_SVE2) */ } + +inline svfloat32_t svpow_f32_z(svbool_t pg, svfloat32_t a, svfloat32_t b) +{ + return svexp_f32_z(pg, svmul_z(pg, b, svlog_f32_z(pg, a))); +} + +inline svfloat16_t svpow_f16_z(svbool_t pg, svfloat16_t a, svfloat16_t b) +{ + auto a_bottom = svcvt_f32_z(pg, a); + auto b_bottom = svcvt_f32_z(pg, b); + +#if defined(__ARM_FEATURE_SVE2) + auto pg_top = pg; + auto a_top = svcvtlt_f32_x(pg, a); + auto b_top = svcvtlt_f32_x(pg, b) +#else /* defined(__ARM_FEATURE_SVE2) */ + auto pg_top = svptrue_b16(); + auto a_top = svcvt_f32_z(pg_top, svreinterpret_f16(svrevh_z(svptrue_b16(), svreinterpret_u32(a)))); + auto b_top = svcvt_f32_z(pg_top, svreinterpret_f16(svrevh_z(svptrue_b16(), svreinterpret_u32(b)))); +#endif /* defined(__ARM_FEATURE_SVE2) */ + + auto res_bottom = svpow_f32_z(pg, a_bottom, b_bottom); + auto res_top = svpow_f32_z(pg_top, a_top, b_top); + +#if defined(__ARM_FEATURE_SVE2) + return svcvtnt_f16_m(svcvt_f16_z(pg, res_bottom), pg_top, res_top); +#else /* defined(__ARM_FEATURE_SVE2) */ + return svtrn1(svcvt_f16_z(pg, res_bottom), svcvt_f16_z(pg_top, res_top)); +#endif /* defined(__ARM_FEATURE_SVE2) */ +} + } // namespace arm_compute #endif /* defined(__ARM_FEATURE_SVE) */ diff --git a/src/core/NEON/wrapper/intrinsics/intrinsics.h b/src/core/NEON/wrapper/intrinsics/intrinsics.h index c6bad3f9dd..6cf7f9d287 100644 --- a/src/core/NEON/wrapper/intrinsics/intrinsics.h +++ b/src/core/NEON/wrapper/intrinsics/intrinsics.h @@ -73,4 +73,15 @@ #include "src/core/NEON/wrapper/intrinsics/tanh.h" #include "src/core/NEON/wrapper/intrinsics/tbl.h" +#if defined(__ARM_FEATURE_SVE) +#include "src/core/NEON/wrapper/intrinsics/svcnt.h" +#include "src/core/NEON/wrapper/intrinsics/svcvt.h" +#include "src/core/NEON/wrapper/intrinsics/svdup_n.h" +#include "src/core/NEON/wrapper/intrinsics/svexp.h" +#include "src/core/NEON/wrapper/intrinsics/svlog.h" +#include "src/core/NEON/wrapper/intrinsics/svptrue.h" +#include "src/core/NEON/wrapper/intrinsics/svsin.h" +#include "src/core/NEON/wrapper/intrinsics/svwhilelt.h" +#endif /* defined(__ARM_FEATURE_SVE) */ + #endif /* ARM_COMPUTE_WRAPPER_INTRINSICS_H */ diff --git a/src/core/NEON/wrapper/intrinsics/svcnt.h b/src/core/NEON/wrapper/intrinsics/svcnt.h new file mode 100644 index 0000000000..e530e7c83f --- /dev/null +++ b/src/core/NEON/wrapper/intrinsics/svcnt.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_CORE_NEON_WRAPPER_INTRINSICS_SVCNT_H +#define SRC_CORE_NEON_WRAPPER_INTRINSICS_SVCNT_H +#if defined(__ARM_FEATURE_SVE) +#include +namespace arm_compute +{ +namespace wrapper +{ +template +inline uint64_t svcnt_size(); + +template <> +inline uint64_t svcnt_size<64>() +{ + return svcntd(); +} + +template <> +inline uint64_t svcnt_size<32>() +{ + return svcntw(); +} + +template <> +inline uint64_t svcnt_size<16>() +{ + return svcnth(); +} + +template <> +inline uint64_t svcnt_size<8>() +{ + return svcntb(); +} + +template +inline uint64_t svcnt() +{ + return svcnt_size(); +} +} // namespace wrapper +} // namespace arm_compute + +#endif /* defined(__ARM_FEATURE_SVE) */ +#endif /* SRC_CORE_NEON_WRAPPER_INTRINSICS_SVCNT_H */ \ No newline at end of file diff --git a/src/core/NEON/wrapper/intrinsics/svcvt.h b/src/core/NEON/wrapper/intrinsics/svcvt.h new file mode 100644 index 0000000000..746b004d7d --- /dev/null +++ b/src/core/NEON/wrapper/intrinsics/svcvt.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_CORE_NEON_WRAPPER_INTRINSICS_SVCVT_H +#define SRC_CORE_NEON_WRAPPER_INTRINSICS_SVCVT_H +#if defined(__ARM_FEATURE_SVE) +#include +namespace arm_compute +{ +namespace wrapper +{ +#define SVCVT_Z_TO_F32_IMPL(vtype) \ + template \ + inline typename std::enable_if::value, svfloat32_t>::type svcvt_z(svbool_t pg, const vtype &a) \ + { \ + return svcvt_f32_z(pg, a); \ + } + +SVCVT_Z_TO_F32_IMPL(svuint32_t) +SVCVT_Z_TO_F32_IMPL(svint32_t) +SVCVT_Z_TO_F32_IMPL(svfloat16_t) + +#undef SVCVT_Z_TO_F32_IMPL + +#define SVCVT_Z_TO_F16_IMPL(vtype) \ + template \ + inline typename std::enable_if::value, svfloat16_t>::type svcvt_z(svbool_t pg, const vtype &a) \ + { \ + return svcvt_f16_z(pg, a); \ + } + +SVCVT_Z_TO_F16_IMPL(svuint32_t) +SVCVT_Z_TO_F16_IMPL(svint32_t) +SVCVT_Z_TO_F16_IMPL(svfloat32_t) + +#undef SVCVT_Z_TO_F16_IMPL + +#define SVCVT_Z_TO_S32_IMPL(vtype) \ + template \ + inline typename std::enable_if::value, svint32_t>::type svcvt_z(svbool_t pg, const vtype &a) \ + { \ + return svcvt_s32_z(pg, a); \ + } + +SVCVT_Z_TO_S32_IMPL(svfloat16_t) +SVCVT_Z_TO_S32_IMPL(svfloat32_t) + +#undef SVCVT_Z_TO_S32_IMPL + +} // namespace wrapper +} // namespace arm_compute + +#endif /* defined(__ARM_FEATURE_SVE) */ +#endif /* SRC_CORE_NEON_WRAPPER_INTRINSICS_SVCVT_H */ \ No newline at end of file diff --git a/src/core/NEON/wrapper/intrinsics/svdup_n.h b/src/core/NEON/wrapper/intrinsics/svdup_n.h new file mode 100644 index 0000000000..b1aed97d9c --- /dev/null +++ b/src/core/NEON/wrapper/intrinsics/svdup_n.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_CORE_NEON_WRAPPER_INTRINSICS_SVDUP_N_H +#define SRC_CORE_NEON_WRAPPER_INTRINSICS_SVDUP_N_H +#if defined(__ARM_FEATURE_SVE) +#include +namespace arm_compute +{ +namespace wrapper +{ +#define SVDUP_N_IMPL(etype, vtype, postfix) \ + inline vtype svdup_n(etype a) \ + { \ + return svdup_n_##postfix(a); \ + } + +SVDUP_N_IMPL(int8_t, svint8_t, s8) +SVDUP_N_IMPL(int16_t, svint16_t, s16) +SVDUP_N_IMPL(int32_t, svint32_t, s32) +SVDUP_N_IMPL(int64_t, svint64_t, s64) +SVDUP_N_IMPL(uint8_t, svuint8_t, u8) +SVDUP_N_IMPL(uint16_t, svuint16_t, u16) +SVDUP_N_IMPL(uint32_t, svuint32_t, u32) +SVDUP_N_IMPL(uint64_t, svuint64_t, u64) +SVDUP_N_IMPL(float16_t, svfloat16_t, f16) +SVDUP_N_IMPL(float, svfloat32_t, f32) +SVDUP_N_IMPL(float64_t, svfloat64_t, f64) +SVDUP_N_IMPL(bfloat16_t, svbfloat16_t, bf16) + +#undef SVDUP_N_IMPL + +} // namespace wrapper +} // namespace arm_compute + +#endif /* defined(__ARM_FEATURE_SVE) */ +#endif /* SRC_CORE_NEON_WRAPPER_INTRINSICS_SVDUP_N_H */ \ No newline at end of file diff --git a/src/core/NEON/wrapper/intrinsics/svexp.h b/src/core/NEON/wrapper/intrinsics/svexp.h new file mode 100644 index 0000000000..d6ce9a77d1 --- /dev/null +++ b/src/core/NEON/wrapper/intrinsics/svexp.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_CORE_NEON_WRAPPER_INTRINSICS_SVEXP_H +#define SRC_CORE_NEON_WRAPPER_INTRINSICS_SVEXP_H + +#if defined(__ARM_FEATURE_SVE) +#include "src/core/NEON/SVEMath.h" +#include + +namespace arm_compute +{ +namespace wrapper +{ +#define SVEXP_IMPL(vtype, postfix) \ + inline vtype svexp_z(svbool_t pg, const vtype &a) \ + { \ + return svexp_##postfix##_z(pg, a); \ + } + +SVEXP_IMPL(svfloat32_t, f32) +SVEXP_IMPL(svfloat16_t, f16) + +#undef SVEXP_IMPL +} // namespace wrapper +} // namespace arm_compute + +#endif /* defined(__ARM_FEATURE_SVE) */ +#endif /* SRC_CORE_NEON_WRAPPER_INTRINSICS_SVEXP_H */ \ No newline at end of file diff --git a/src/core/NEON/wrapper/intrinsics/svlog.h b/src/core/NEON/wrapper/intrinsics/svlog.h new file mode 100644 index 0000000000..5b505ae1e3 --- /dev/null +++ b/src/core/NEON/wrapper/intrinsics/svlog.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_CORE_NEON_WRAPPER_INTRINSICS_SVLOG_H +#define SRC_CORE_NEON_WRAPPER_INTRINSICS_SVLOG_H +#if defined(__ARM_FEATURE_SVE) +#include "src/core/NEON/SVEMath.h" +#include + +namespace arm_compute +{ +namespace wrapper +{ +#define SVLOG_IMPL(vtype, postfix) \ + inline vtype svlog_z(svbool_t pg, const vtype &a) \ + { \ + return svlog_##postfix##_z(pg, a); \ + } + +SVLOG_IMPL(svfloat32_t, f32) +SVLOG_IMPL(svfloat16_t, f16) + +#undef SVLOG_IMPL +} // namespace wrapper +} // namespace arm_compute +#endif /* defined(__ARM_FEATURE_SVE) */ +#endif /* SRC_CORE_NEON_WRAPPER_INTRINSICS_SVLOG_H */ \ No newline at end of file diff --git a/src/core/NEON/wrapper/intrinsics/svpow.h b/src/core/NEON/wrapper/intrinsics/svpow.h new file mode 100644 index 0000000000..e89a4ab8f6 --- /dev/null +++ b/src/core/NEON/wrapper/intrinsics/svpow.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_CORE_NEON_WRAPPER_INTRINSICS_SVPOW_H +#define SRC_CORE_NEON_WRAPPER_INTRINSICS_SVPOW_H +#if defined(__ARM_FEATURE_SVE) +#include "src/core/NEON/SVEMath.h" +namespace arm_compute +{ +namespace wrapper +{ +#define SVPOW_Z_IMPL(type, postfix) \ + inline type svpow_z(svbool_t pg, const type &a, const type &b) \ + { \ + return svpow_##postfix##_z(pg, a, b); \ + } + +SVPOW_Z_IMPL(svfloat32_t, f32) +SVPOW_Z_IMPL(svfloat16_t, f16) + +#undef SVPOW_Z_IMPL + +} // namespace wrapper +} // namespace arm_compute + +#endif /* defined(__ARM_FEATURE_SVE) */ +#endif /* SRC_CORE_NEON_WRAPPER_INTRINSICS_SVPOW_H */ \ No newline at end of file diff --git a/src/core/NEON/wrapper/intrinsics/svptrue.h b/src/core/NEON/wrapper/intrinsics/svptrue.h new file mode 100644 index 0000000000..53407e5301 --- /dev/null +++ b/src/core/NEON/wrapper/intrinsics/svptrue.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_CORE_NEON_WRAPPER_INTRINSICS_SVPTRUE_H +#define SRC_CORE_NEON_WRAPPER_INTRINSICS_SVPTRUE_H +#if defined(__ARM_FEATURE_SVE) +#include +namespace arm_compute +{ +namespace wrapper +{ +template +inline svbool_t svptrue_size(); + +template <> +inline svbool_t svptrue_size<64>() +{ + return svptrue_b64(); +} + +template <> +inline svbool_t svptrue_size<32>() +{ + return svptrue_b32(); +} + +template <> +inline svbool_t svptrue_size<16>() +{ + return svptrue_b16(); +} + +template <> +inline svbool_t svptrue_size<8>() +{ + return svptrue_b8(); +} + +template +svbool_t svptrue() +{ + return svptrue_size(); +} +} // namespace wrapper +} // namespace arm_compute + +#endif /* defined(__ARM_FEATURE_SVE) */ +#endif /* SRC_CORE_NEON_WRAPPER_INTRINSICS_SVPTRUE_H */ \ No newline at end of file diff --git a/src/core/NEON/wrapper/intrinsics/svreinterpret.h b/src/core/NEON/wrapper/intrinsics/svreinterpret.h new file mode 100644 index 0000000000..e98742676d --- /dev/null +++ b/src/core/NEON/wrapper/intrinsics/svreinterpret.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_CORE_NEON_WRAPPER_INTRINSICS_SVREINTERPRET_H +#define SRC_CORE_NEON_WRAPPER_INTRINSICS_SVREINTERPRET_H +#if defined(__ARM_FEATURE_SVE) +#include +namespace arm_compute +{ +namespace wrapper +{ +#define SVREINTERPRET_TO_F32_IMPL(vtype) \ + template \ + inline typename std::enable_if::value, svfloat32_t>::type svreinterpret(const vtype &a) \ + { \ + return svreinterpret_f32(a); \ + } + +SVREINTERPRET_TO_F32_IMPL(svuint32_t) +#undef SVREINTERPRET_TO_F32_IMPL + +#define SVREINTERPRET_TO_U32_IMPL(vtype) \ + template \ + inline typename std::enable_if::value, svuint32_t>::type svreinterpret(const vtype &a) \ + { \ + return svreinterpret_u32(a); \ + } + +SVREINTERPRET_TO_U32_IMPL(svint32_t) +SVREINTERPRET_TO_U32_IMPL(svfloat32_t) +#undef SVREINTERPRET_TO_U32_IMPL + +} // namespace wrapper +} // namespace arm_compute + +#endif /* defined(__ARM_FEATURE_SVE) */ +#endif /* SRC_CORE_NEON_WRAPPER_INTRINSICS_SVREINTERPRET_H */ \ No newline at end of file diff --git a/src/core/NEON/wrapper/intrinsics/svsin.h b/src/core/NEON/wrapper/intrinsics/svsin.h new file mode 100644 index 0000000000..05d88d0250 --- /dev/null +++ b/src/core/NEON/wrapper/intrinsics/svsin.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_CORE_NEON_WRAPPER_INTRINSICS_SVSIN_H +#define SRC_CORE_NEON_WRAPPER_INTRINSICS_SVSIN_H +#if defined(__ARM_FEATURE_SVE) +#include "src/core/NEON/SVEMath.h" +namespace arm_compute +{ +namespace wrapper +{ +#define SVSIN_Z_IMPL(type, postfix) \ + inline type svsin_z(svbool_t pg, const type &val) \ + { \ + return svsin_##postfix##_z(pg, val); \ + } + +SVSIN_Z_IMPL(svfloat32_t, f32) +SVSIN_Z_IMPL(svfloat16_t, f16) + +#undef SVSIN_Z_IMPL + +} // namespace wrapper +} // namespace arm_compute + +#endif /* defined(__ARM_FEATURE_SVE) */ +#endif /* SRC_CORE_NEON_WRAPPER_INTRINSICS_SVSIN_H */ \ No newline at end of file diff --git a/src/core/NEON/wrapper/intrinsics/svwhilelt.h b/src/core/NEON/wrapper/intrinsics/svwhilelt.h new file mode 100644 index 0000000000..ef58217dc4 --- /dev/null +++ b/src/core/NEON/wrapper/intrinsics/svwhilelt.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_CORE_NEON_WRAPPER_INTRINSICS_SVWHILELT_H +#define SRC_CORE_NEON_WRAPPER_INTRINSICS_SVWHILELT_H +#if defined(__ARM_FEATURE_SVE) +#include +namespace arm_compute +{ +namespace wrapper +{ +#define SVWHILELT_IMPL(type) \ + template \ + inline svbool_t svwhilelt_size(type a, type b); \ + \ + template <> \ + inline svbool_t svwhilelt_size<64>(type a, type b) \ + { \ + return svwhilelt_b64(a, b); \ + } \ + template <> \ + inline svbool_t svwhilelt_size<32>(type a, type b) \ + { \ + return svwhilelt_b32(a, b); \ + } \ + template <> \ + inline svbool_t svwhilelt_size<16>(type a, type b) \ + { \ + return svwhilelt_b16(a, b); \ + } \ + template <> \ + inline svbool_t svwhilelt_size<8>(type a, type b) \ + { \ + return svwhilelt_b8(a, b); \ + } + +SVWHILELT_IMPL(int32_t) +SVWHILELT_IMPL(uint32_t) +SVWHILELT_IMPL(int64_t) +SVWHILELT_IMPL(uint64_t) + +#undef SVWHILELT_IMPL + +template +inline svbool_t svwhilelt(IndexType a, IndexType b) +{ + return svwhilelt_size(a, b); +} +} // namespace wrapper +} // namespace arm_compute + +#endif /* defined(__ARM_FEATURE_SVE) */ +#endif /* SRC_CORE_NEON_WRAPPER_INTRINSICS_SVWHILELT_H */ \ No newline at end of file -- cgit v1.2.1