diff options
author | Viet-Hoa Do <viet-hoa.do@arm.com> | 2022-06-15 16:47:17 +0100 |
---|---|---|
committer | Viet-Hoa Do <viet-hoa.do@arm.com> | 2022-06-16 14:41:40 +0000 |
commit | f1f777998b83289b868c86681d5f99b10119bee6 (patch) | |
tree | 16f8657d110d703fdf138493950ff72fd2b592d7 /src/cpu/kernels/softmax/generic/sve2/impl.cpp | |
parent | 586fff9f1e30ad06b52aec3332f956cc25cfe56f (diff) | |
download | ComputeLibrary-f1f777998b83289b868c86681d5f99b10119bee6.tar.gz |
Fix SVE2 implementation of quantized SoftMax 1D
* Fix integer overflow in substraction step.
* Fix incorrect vector when convert the result to qasymm8_signed.
Resolves: COMPMID-5389
Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Change-Id: Id745f2eb2a1b0823b02b136560351b5f8fb85624
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7738
Reviewed-by: Pablo Marquez Tello <pablo.tello@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/cpu/kernels/softmax/generic/sve2/impl.cpp')
-rw-r--r-- | src/cpu/kernels/softmax/generic/sve2/impl.cpp | 20 |
1 files changed, 10 insertions, 10 deletions
diff --git a/src/cpu/kernels/softmax/generic/sve2/impl.cpp b/src/cpu/kernels/softmax/generic/sve2/impl.cpp index 9cdfe61446..8f677c62d4 100644 --- a/src/cpu/kernels/softmax/generic/sve2/impl.cpp +++ b/src/cpu/kernels/softmax/generic/sve2/impl.cpp @@ -80,13 +80,13 @@ void sve2_softmax_logits_1d_quantized(const ITensor *in, const ITensor *max, voi svbool_t pg_3 = svunpkhi(svunpkhi(pg)); do { - auto vec_elements = svld1(pg, in_ptr + x); - vec_elements = svsub_z(pg, vec_max, vec_elements); + const auto vec_elements = svld1(pg, in_ptr + x); + const auto vec_elements_sub = svreinterpret_u8(svsub_z(pg, vec_max, vec_elements)); - auto vec_elements_flt_0 = svcvt_f32_z(pg_0, svunpklo(svunpklo(vec_elements))); - auto vec_elements_flt_1 = svcvt_f32_z(pg_1, svunpkhi(svunpklo(vec_elements))); - auto vec_elements_flt_2 = svcvt_f32_z(pg_2, svunpklo(svunpkhi(vec_elements))); - auto vec_elements_flt_3 = svcvt_f32_z(pg_3, svunpkhi(svunpkhi(vec_elements))); + auto vec_elements_flt_0 = svcvt_f32_z(pg_0, svunpklo(svunpklo(vec_elements_sub))); + auto vec_elements_flt_1 = svcvt_f32_z(pg_1, svunpkhi(svunpklo(vec_elements_sub))); + auto vec_elements_flt_2 = svcvt_f32_z(pg_2, svunpklo(svunpkhi(vec_elements_sub))); + auto vec_elements_flt_3 = svcvt_f32_z(pg_3, svunpkhi(svunpkhi(vec_elements_sub))); if(is_log) { @@ -180,10 +180,10 @@ void sve2_softmax_logits_1d_quantized(const ITensor *in, const ITensor *max, voi if(is_qasymm8_signed) { const auto offset_vec = svdup_n_f32(128.f); - res_0 = svsub_z(pg_0, vec_in_0, offset_vec); - res_1 = svsub_z(pg_1, vec_in_1, offset_vec); - res_2 = svsub_z(pg_2, vec_in_2, offset_vec); - res_3 = svsub_z(pg_3, vec_in_3, offset_vec); + res_0 = svsub_z(pg_0, res_0, offset_vec); + res_1 = svsub_z(pg_1, res_1, offset_vec); + res_2 = svsub_z(pg_2, res_2, offset_vec); + res_3 = svsub_z(pg_3, res_3, offset_vec); } } |