aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/SVEAsymm.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/NEON/SVEAsymm.h')
-rw-r--r--src/core/NEON/SVEAsymm.h81
1 files changed, 36 insertions, 45 deletions
diff --git a/src/core/NEON/SVEAsymm.h b/src/core/NEON/SVEAsymm.h
index 4b0ecd9eea..a448cde475 100644
--- a/src/core/NEON/SVEAsymm.h
+++ b/src/core/NEON/SVEAsymm.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,8 +24,9 @@
#ifndef ARM_COMPUTE_SVEASYMM_H
#define ARM_COMPUTE_SVEASYMM_H
-#if defined(__ARM_FEATURE_SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
#include "src/core/NEON/SVEMath.h"
+
#include <arm_sve.h>
namespace arm_compute
@@ -67,18 +68,21 @@ svint8_t svmla_qasymm8_signed_z(svbool_t pg, svint8_t vd, svfloat32_t vs, svfloa
*/
inline svfloat32x4_t svdequantize_z(svbool_t pg, const svuint8_t &qv, float scale, int32_t offset)
{
- const auto voffset = svdup_n_s32(offset);
- const auto vscale = svdup_n_f32(scale);
- const svfloat32x4_t vdequantized_input =
- {
- { {
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svreinterpret_s32_u32(svmovlb_u32(svmovlb_u16(qv))), voffset)), vscale),
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svreinterpret_s32_u32(svmovlt_u32(svmovlb_u16(qv))), voffset)), vscale),
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svreinterpret_s32_u32(svmovlb_u32(svmovlt_u16(qv))), voffset)), vscale),
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svreinterpret_s32_u32(svmovlt_u32(svmovlt_u16(qv))), voffset)), vscale),
- }
- }
- };
+ const auto voffset = svdup_n_s32(offset);
+ const auto vscale = svdup_n_f32(scale);
+ const svfloat32x4_t vdequantized_input = svcreate4_f32(
+ svmul_f32_z(pg,
+ svcvt_f32_s32_z(pg, svsub_s32_z(pg, svreinterpret_s32_u32(svmovlb_u32(svmovlb_u16(qv))), voffset)),
+ vscale),
+ svmul_f32_z(pg,
+ svcvt_f32_s32_z(pg, svsub_s32_z(pg, svreinterpret_s32_u32(svmovlt_u32(svmovlb_u16(qv))), voffset)),
+ vscale),
+ svmul_f32_z(pg,
+ svcvt_f32_s32_z(pg, svsub_s32_z(pg, svreinterpret_s32_u32(svmovlb_u32(svmovlt_u16(qv))), voffset)),
+ vscale),
+ svmul_f32_z(pg,
+ svcvt_f32_s32_z(pg, svsub_s32_z(pg, svreinterpret_s32_u32(svmovlt_u32(svmovlt_u16(qv))), voffset)),
+ vscale));
return vdequantized_input;
}
@@ -106,18 +110,14 @@ inline svfloat32x4_t svdequantize_z(svbool_t pg, const svuint8_t &qv, const Unif
*/
inline svfloat32x4_t svdequantize_z(svbool_t pg, const svint8_t &qv, float scale, int32_t offset)
{
- const auto voffset = svdup_n_s32(offset);
- const auto vscale = svdup_n_f32(scale);
- const svfloat32x4_t vdequantized_input =
- {
- { {
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svmovlb_s32(svmovlb_s16(qv)), voffset)), vscale),
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svmovlt_s32(svmovlb_s16(qv)), voffset)), vscale),
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svmovlb_s32(svmovlt_s16(qv)), voffset)), vscale),
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svmovlt_s32(svmovlt_s16(qv)), voffset)), vscale),
- }
- }
- };
+ const auto voffset = svdup_n_s32(offset);
+ const auto vscale = svdup_n_f32(scale);
+ const svfloat32x4_t vdequantized_input = svcreate4_f32(
+ svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svmovlb_s32(svmovlb_s16(qv)), voffset)), vscale),
+ svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svmovlt_s32(svmovlb_s16(qv)), voffset)), vscale),
+ svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svmovlb_s32(svmovlt_s16(qv)), voffset)), vscale),
+ svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svmovlt_s32(svmovlt_s16(qv)), voffset)), vscale));
+
return vdequantized_input;
}
@@ -145,15 +145,11 @@ inline svfloat32x4_t svdequantize_z(svbool_t pg, const svint8_t &qv, const Unifo
inline svfloat32x4_t svdequantize_z(svbool_t pg, const svint8_t &qv, const svfloat32x4_t vscale)
{
const svfloat32x4_t vdequantized_input =
- {
- { {
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svmovlb_s16(qv))), svget4_f32(vscale, 0)),
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svmovlb_s16(qv))), svget4_f32(vscale, 1)),
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svmovlt_s16(qv))), svget4_f32(vscale, 2)),
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svmovlt_s16(qv))), svget4_f32(vscale, 3)),
- }
- }
- };
+ svcreate4_f32(svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svmovlb_s16(qv))), svget4_f32(vscale, 0)),
+ svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svmovlb_s16(qv))), svget4_f32(vscale, 1)),
+ svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svmovlt_s16(qv))), svget4_f32(vscale, 2)),
+ svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svmovlt_s16(qv))), svget4_f32(vscale, 3)));
+
return vdequantized_input;
}
@@ -168,15 +164,10 @@ inline svfloat32x4_t svdequantize_z(svbool_t pg, const svint8_t &qv, float scale
{
const auto vscale = svdup_n_f32(scale);
const svfloat32x4_t vdequantized_input =
- {
- { {
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svmovlb_s16(qv))), vscale),
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svmovlb_s16(qv))), vscale),
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svmovlt_s16(qv))), vscale),
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svmovlt_s16(qv))), vscale),
- }
- }
- };
+ svcreate4_f32(svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svmovlb_s16(qv))), vscale),
+ svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svmovlb_s16(qv))), vscale),
+ svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svmovlt_s16(qv))), vscale),
+ svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svmovlt_s16(qv))), vscale));
return vdequantized_input;
}
@@ -258,5 +249,5 @@ inline svuint16x2_t svquantize_qasymm16_z(svbool_t pg, const svfloat32x4_t qv, c
}
} // namespace arm_compute
#include "src/core/NEON/SVEAsymm.inl"
-#endif /* defined(__ARM_FEATURE_SVE2) */
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
#endif // ARM_COMPUTE_NEASYMM_H