diff options
Diffstat (limited to 'src/core/NEON/SVEAsymm.h')
-rw-r--r-- | src/core/NEON/SVEAsymm.h | 81 |
1 files changed, 36 insertions, 45 deletions
diff --git a/src/core/NEON/SVEAsymm.h b/src/core/NEON/SVEAsymm.h index 4b0ecd9eea..a448cde475 100644 --- a/src/core/NEON/SVEAsymm.h +++ b/src/core/NEON/SVEAsymm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 Arm Limited. + * Copyright (c) 2020-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,8 +24,9 @@ #ifndef ARM_COMPUTE_SVEASYMM_H #define ARM_COMPUTE_SVEASYMM_H -#if defined(__ARM_FEATURE_SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE2) #include "src/core/NEON/SVEMath.h" + #include <arm_sve.h> namespace arm_compute @@ -67,18 +68,21 @@ svint8_t svmla_qasymm8_signed_z(svbool_t pg, svint8_t vd, svfloat32_t vs, svfloa */ inline svfloat32x4_t svdequantize_z(svbool_t pg, const svuint8_t &qv, float scale, int32_t offset) { - const auto voffset = svdup_n_s32(offset); - const auto vscale = svdup_n_f32(scale); - const svfloat32x4_t vdequantized_input = - { - { { - svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svreinterpret_s32_u32(svmovlb_u32(svmovlb_u16(qv))), voffset)), vscale), - svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svreinterpret_s32_u32(svmovlt_u32(svmovlb_u16(qv))), voffset)), vscale), - svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svreinterpret_s32_u32(svmovlb_u32(svmovlt_u16(qv))), voffset)), vscale), - svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svreinterpret_s32_u32(svmovlt_u32(svmovlt_u16(qv))), voffset)), vscale), - } - } - }; + const auto voffset = svdup_n_s32(offset); + const auto vscale = svdup_n_f32(scale); + const svfloat32x4_t vdequantized_input = svcreate4_f32( + svmul_f32_z(pg, + svcvt_f32_s32_z(pg, svsub_s32_z(pg, svreinterpret_s32_u32(svmovlb_u32(svmovlb_u16(qv))), voffset)), + vscale), + svmul_f32_z(pg, + svcvt_f32_s32_z(pg, svsub_s32_z(pg, svreinterpret_s32_u32(svmovlt_u32(svmovlb_u16(qv))), voffset)), + vscale), + svmul_f32_z(pg, + svcvt_f32_s32_z(pg, svsub_s32_z(pg, svreinterpret_s32_u32(svmovlb_u32(svmovlt_u16(qv))), voffset)), + vscale), + svmul_f32_z(pg, + svcvt_f32_s32_z(pg, svsub_s32_z(pg, svreinterpret_s32_u32(svmovlt_u32(svmovlt_u16(qv))), voffset)), + vscale)); return vdequantized_input; } @@ -106,18 +110,14 @@ inline svfloat32x4_t svdequantize_z(svbool_t pg, const svuint8_t &qv, const Unif */ inline svfloat32x4_t svdequantize_z(svbool_t pg, const svint8_t &qv, float scale, int32_t offset) { - const auto voffset = svdup_n_s32(offset); - const auto vscale = svdup_n_f32(scale); - const svfloat32x4_t vdequantized_input = - { - { { - svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svmovlb_s32(svmovlb_s16(qv)), voffset)), vscale), - svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svmovlt_s32(svmovlb_s16(qv)), voffset)), vscale), - svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svmovlb_s32(svmovlt_s16(qv)), voffset)), vscale), - svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svmovlt_s32(svmovlt_s16(qv)), voffset)), vscale), - } - } - }; + const auto voffset = svdup_n_s32(offset); + const auto vscale = svdup_n_f32(scale); + const svfloat32x4_t vdequantized_input = svcreate4_f32( + svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svmovlb_s32(svmovlb_s16(qv)), voffset)), vscale), + svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svmovlt_s32(svmovlb_s16(qv)), voffset)), vscale), + svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svmovlb_s32(svmovlt_s16(qv)), voffset)), vscale), + svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svmovlt_s32(svmovlt_s16(qv)), voffset)), vscale)); + return vdequantized_input; } @@ -145,15 +145,11 @@ inline svfloat32x4_t svdequantize_z(svbool_t pg, const svint8_t &qv, const Unifo inline svfloat32x4_t svdequantize_z(svbool_t pg, const svint8_t &qv, const svfloat32x4_t vscale) { const svfloat32x4_t vdequantized_input = - { - { { - svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svmovlb_s16(qv))), svget4_f32(vscale, 0)), - svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svmovlb_s16(qv))), svget4_f32(vscale, 1)), - svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svmovlt_s16(qv))), svget4_f32(vscale, 2)), - svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svmovlt_s16(qv))), svget4_f32(vscale, 3)), - } - } - }; + svcreate4_f32(svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svmovlb_s16(qv))), svget4_f32(vscale, 0)), + svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svmovlb_s16(qv))), svget4_f32(vscale, 1)), + svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svmovlt_s16(qv))), svget4_f32(vscale, 2)), + svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svmovlt_s16(qv))), svget4_f32(vscale, 3))); + return vdequantized_input; } @@ -168,15 +164,10 @@ inline svfloat32x4_t svdequantize_z(svbool_t pg, const svint8_t &qv, float scale { const auto vscale = svdup_n_f32(scale); const svfloat32x4_t vdequantized_input = - { - { { - svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svmovlb_s16(qv))), vscale), - svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svmovlb_s16(qv))), vscale), - svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svmovlt_s16(qv))), vscale), - svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svmovlt_s16(qv))), vscale), - } - } - }; + svcreate4_f32(svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svmovlb_s16(qv))), vscale), + svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svmovlb_s16(qv))), vscale), + svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svmovlt_s16(qv))), vscale), + svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svmovlt_s16(qv))), vscale)); return vdequantized_input; } @@ -258,5 +249,5 @@ inline svuint16x2_t svquantize_qasymm16_z(svbool_t pg, const svfloat32x4_t qv, c } } // namespace arm_compute #include "src/core/NEON/SVEAsymm.inl" -#endif /* defined(__ARM_FEATURE_SVE2) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ #endif // ARM_COMPUTE_NEASYMM_H |