diff options
author | Sang-Hoon Park <sang-hoon.park@arm.com> | 2020-12-08 18:50:56 +0000 |
---|---|---|
committer | Sang-Hoon Park <sang-hoon.park@arm.com> | 2021-01-07 13:11:57 +0000 |
commit | af1870b38bd4f86ccbb4152a506586afd6c64e02 (patch) | |
tree | 75e99bdda471dd44b986c689b6bd5799bc36b0d8 /src/core/NEON/SVEMath.inl | |
parent | 7e5b7bfc06c0bd8aecd809817866733c4fdf07fe (diff) | |
download | ComputeLibrary-af1870b38bd4f86ccbb4152a506586afd6c64e02.tar.gz |
Add SVE support to elementwise unary kernels
It also includes decoupling of kernels using different
data types.
Partially implements: COMPMID-3872
Change-Id: I226cb9e55a5d9f8a0c63e37631f087af45f2d640
Signed-off-by: Sang-Hoon Park <sang-hoon.park@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4711
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Diffstat (limited to 'src/core/NEON/SVEMath.inl')
-rw-r--r-- | src/core/NEON/SVEMath.inl | 6 |
1 files changed, 3 insertions, 3 deletions
diff --git a/src/core/NEON/SVEMath.inl b/src/core/NEON/SVEMath.inl index fbf90f9b04..f201e92738 100644 --- a/src/core/NEON/SVEMath.inl +++ b/src/core/NEON/SVEMath.inl @@ -308,15 +308,15 @@ inline svfloat16_t svpow_f16_z(svbool_t pg, svfloat16_t a, svfloat16_t b) #if defined(__ARM_FEATURE_SVE2) auto pg_top = pg; auto a_top = svcvtlt_f32_x(pg, a); - auto b_top = svcvtlt_f32_x(pg, b) + auto b_top = svcvtlt_f32_x(pg, b); #else /* defined(__ARM_FEATURE_SVE2) */ auto pg_top = svptrue_b16(); auto a_top = svcvt_f32_z(pg_top, svreinterpret_f16(svrevh_z(svptrue_b16(), svreinterpret_u32(a)))); auto b_top = svcvt_f32_z(pg_top, svreinterpret_f16(svrevh_z(svptrue_b16(), svreinterpret_u32(b)))); #endif /* defined(__ARM_FEATURE_SVE2) */ - auto res_bottom = svpow_f32_z(pg, a_bottom, b_bottom); - auto res_top = svpow_f32_z(pg_top, a_top, b_top); + auto res_bottom = svpow_f32_z(pg, a_bottom, b_bottom); + auto res_top = svpow_f32_z(pg_top, a_top, b_top); #if defined(__ARM_FEATURE_SVE2) return svcvtnt_f16_m(svcvt_f16_z(pg, res_bottom), pg_top, res_top); |