aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2022-05-03 17:15:42 +0100
committerMichalis Spyrou <michalis.spyrou@arm.com>2022-05-06 12:14:26 +0000
commit168d6a83b8c5c66c6a961c2b9d747685319b56dc (patch)
tree783b8c8c50e1227a3e4e84b7e0b927f263a8af4f /src/core/NEON
parentbc5c407b52c2ea3762b7efa526a971d8180d25c7 (diff)
downloadComputeLibrary-168d6a83b8c5c66c6a961c2b9d747685319b56dc.tar.gz
Use svcreate instead of list initializations.
Partially resolves COMPMID-5250 when building with SVE2. Change-Id: I16bd74d4cd6c70371efd8235c507ba5e7f8f906f Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7498 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Pablo Marquez Tello <pablo.tello@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON')
-rw-r--r--src/core/NEON/SVEAsymm.h74
-rw-r--r--src/core/NEON/SVESymm.h32
2 files changed, 38 insertions, 68 deletions
diff --git a/src/core/NEON/SVEAsymm.h b/src/core/NEON/SVEAsymm.h
index 40b8e64b67..eea2627c62 100644
--- a/src/core/NEON/SVEAsymm.h
+++ b/src/core/NEON/SVEAsymm.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2021 Arm Limited.
+ * Copyright (c) 2020-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -67,18 +67,13 @@ svint8_t svmla_qasymm8_signed_z(svbool_t pg, svint8_t vd, svfloat32_t vs, svfloa
*/
inline svfloat32x4_t svdequantize_z(svbool_t pg, const svuint8_t &qv, float scale, int32_t offset)
{
- const auto voffset = svdup_n_s32(offset);
- const auto vscale = svdup_n_f32(scale);
- const svfloat32x4_t vdequantized_input =
- {
- { {
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svreinterpret_s32_u32(svmovlb_u32(svmovlb_u16(qv))), voffset)), vscale),
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svreinterpret_s32_u32(svmovlt_u32(svmovlb_u16(qv))), voffset)), vscale),
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svreinterpret_s32_u32(svmovlb_u32(svmovlt_u16(qv))), voffset)), vscale),
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svreinterpret_s32_u32(svmovlt_u32(svmovlt_u16(qv))), voffset)), vscale),
- }
- }
- };
+ const auto voffset = svdup_n_s32(offset);
+ const auto vscale = svdup_n_f32(scale);
+ const svfloat32x4_t vdequantized_input = svcreate4_f32(
+ svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svreinterpret_s32_u32(svmovlb_u32(svmovlb_u16(qv))), voffset)), vscale),
+ svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svreinterpret_s32_u32(svmovlt_u32(svmovlb_u16(qv))), voffset)), vscale),
+ svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svreinterpret_s32_u32(svmovlb_u32(svmovlt_u16(qv))), voffset)), vscale),
+ svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svreinterpret_s32_u32(svmovlt_u32(svmovlt_u16(qv))), voffset)), vscale));
return vdequantized_input;
}
@@ -106,18 +101,14 @@ inline svfloat32x4_t svdequantize_z(svbool_t pg, const svuint8_t &qv, const Unif
*/
inline svfloat32x4_t svdequantize_z(svbool_t pg, const svint8_t &qv, float scale, int32_t offset)
{
- const auto voffset = svdup_n_s32(offset);
- const auto vscale = svdup_n_f32(scale);
- const svfloat32x4_t vdequantized_input =
- {
- { {
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svmovlb_s32(svmovlb_s16(qv)), voffset)), vscale),
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svmovlt_s32(svmovlb_s16(qv)), voffset)), vscale),
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svmovlb_s32(svmovlt_s16(qv)), voffset)), vscale),
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svmovlt_s32(svmovlt_s16(qv)), voffset)), vscale),
- }
- }
- };
+ const auto voffset = svdup_n_s32(offset);
+ const auto vscale = svdup_n_f32(scale);
+ const svfloat32x4_t vdequantized_input = svcreate4_f32(
+ svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svmovlb_s32(svmovlb_s16(qv)), voffset)), vscale),
+ svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svmovlt_s32(svmovlb_s16(qv)), voffset)), vscale),
+ svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svmovlb_s32(svmovlt_s16(qv)), voffset)), vscale),
+ svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svmovlt_s32(svmovlt_s16(qv)), voffset)), vscale));
+
return vdequantized_input;
}
@@ -144,16 +135,12 @@ inline svfloat32x4_t svdequantize_z(svbool_t pg, const svint8_t &qv, const Unifo
*/
inline svfloat32x4_t svdequantize_z(svbool_t pg, const svint8_t &qv, const svfloat32x4_t vscale)
{
- const svfloat32x4_t vdequantized_input =
- {
- { {
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svmovlb_s16(qv))), svget4_f32(vscale, 0)),
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svmovlb_s16(qv))), svget4_f32(vscale, 1)),
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svmovlt_s16(qv))), svget4_f32(vscale, 2)),
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svmovlt_s16(qv))), svget4_f32(vscale, 3)),
- }
- }
- };
+ const svfloat32x4_t vdequantized_input = svcreate4_f32(
+ svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svmovlb_s16(qv))), svget4_f32(vscale, 0)),
+ svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svmovlb_s16(qv))), svget4_f32(vscale, 1)),
+ svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svmovlt_s16(qv))), svget4_f32(vscale, 2)),
+ svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svmovlt_s16(qv))), svget4_f32(vscale, 3)));
+
return vdequantized_input;
}
@@ -166,17 +153,12 @@ inline svfloat32x4_t svdequantize_z(svbool_t pg, const svint8_t &qv, const svflo
*/
inline svfloat32x4_t svdequantize_z(svbool_t pg, const svint8_t &qv, float scale)
{
- const auto vscale = svdup_n_f32(scale);
- const svfloat32x4_t vdequantized_input =
- {
- { {
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svmovlb_s16(qv))), vscale),
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svmovlb_s16(qv))), vscale),
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svmovlt_s16(qv))), vscale),
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svmovlt_s16(qv))), vscale),
- }
- }
- };
+ const auto vscale = svdup_n_f32(scale);
+ const svfloat32x4_t vdequantized_input = svcreate4_f32(
+ svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svmovlb_s16(qv))), vscale),
+ svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svmovlb_s16(qv))), vscale),
+ svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svmovlt_s16(qv))), vscale),
+ svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svmovlt_s16(qv))), vscale));
return vdequantized_input;
}
diff --git a/src/core/NEON/SVESymm.h b/src/core/NEON/SVESymm.h
index c71d273b67..6808577681 100644
--- a/src/core/NEON/SVESymm.h
+++ b/src/core/NEON/SVESymm.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2021 Arm Limited.
+ * Copyright (c) 2020-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -42,15 +42,8 @@ namespace arm_compute
*/
inline svfloat32x2_t svdequantize_qsymm16_z(svbool_t pg, const svint16_t &qv, float scale)
{
- const auto vscale = svdup_n_f32(scale);
- const svfloat32x2_t vdequantized_input =
- {
- { {
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(qv)), vscale),
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(qv)), vscale)
- }
- }
- };
+ const auto vscale = svdup_n_f32(scale);
+ const svfloat32x2_t vdequantized_input = svcreate2_f32(svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(qv)), vscale), svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(qv)), vscale));
return vdequantized_input;
}
@@ -83,18 +76,13 @@ inline svint16_t svquantize_qsymm16_z(svbool_t pg, const svfloat32x2_t qv, float
*/
inline svfloat32x4_t svdequantize_z(svbool_t pg, const svint16x2_t qv, const UniformQuantizationInfo &qi)
{
- const float scale = qi.scale;
- const auto vscale = svdup_n_f32(scale);
- const svfloat32x4_t vdequantized_input =
- {
- { {
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svget2_s16(qv, 0))), vscale),
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svget2_s16(qv, 0))), vscale),
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svget2_s16(qv, 1))), vscale),
- svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svget2_s16(qv, 1))), vscale),
- }
- }
- };
+ const float scale = qi.scale;
+ const auto vscale = svdup_n_f32(scale);
+ const svfloat32x4_t vdequantized_input = svcreate4_f32(
+ svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svget2_s16(qv, 0))), vscale),
+ svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svget2_s16(qv, 0))), vscale),
+ svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svget2_s16(qv, 1))), vscale),
+ svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svget2_s16(qv, 1))), vscale));
return vdequantized_input;
}