From 32e0646234e5900f0b0cfa00ddac46bbf4cabb04 Mon Sep 17 00:00:00 2001 From: Sang-Hoon Park Date: Tue, 23 Mar 2021 15:34:04 +0000 Subject: Fix indexing of SVE ArithmeticAddition kernel The kernel used wrong index when different data types with different widths are used as sources. The increment of the index for the loop inside the kernel and offset of that index to load multiple source vectors has been corrected. Resolves: COMPMID-4303 Change-Id: Ib1ad431dc80c937d7f19bafe5cb57fc52b6f3735 Signed-off-by: Sang-Hoon Park Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5304 Reviewed-by: Michele Di Giorgio Reviewed-by: Pablo Marquez Tello Reviewed-by: Georgios Pinitas Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- src/core/cpu/kernels/add/sve/integer.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/core') diff --git a/src/core/cpu/kernels/add/sve/integer.cpp b/src/core/cpu/kernels/add/sve/integer.cpp index 5bd2e12665..ae74bfa3eb 100644 --- a/src/core/cpu/kernels/add/sve/integer.cpp +++ b/src/core/cpu/kernels/add/sve/integer.cpp @@ -154,9 +154,9 @@ void add_s16_u8_s16_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const auto vsrc1_0 = svreinterpret_s16_u16(svunpklo(vsrc1_u8)); const auto vsrc1_1 = svreinterpret_s16_u16(svunpkhi(vsrc1_u8)); svst1_s16(pg_0, output_ptr + x, svadd_s16_z(pg_0, vsrc0_0, vsrc1_0)); - svst1_s16(pg_1, output_ptr + x, svadd_s16_z(pg_1, vsrc0_1, vsrc1_1)); + svst1_s16(pg_1, output_ptr + x + svcnth(), svadd_s16_z(pg_1, vsrc0_1, vsrc1_1)); - x += svcnth(); + x += svcntb(); pg_u = svwhilelt_b8(x, window_end_x); pg_0 = svwhilelt_b16(x, window_end_x); pg_1 = svwhilelt_b16(x + static_cast(svcnth()), window_end_x); @@ -172,15 +172,15 @@ void add_s16_u8_s16_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, do { const auto vsrc0_0 = svld1_s16(pg_0, input1_ptr + x); - const auto vsrc0_1 = svld1_s16(pg_1, input1_ptr + x); + const auto vsrc0_1 = svld1_s16(pg_1, input1_ptr + x + svcnth()); const auto vsrc1_u8 = svld1_u8(pg_u, input2_ptr + x); const auto vsrc1_0 = svreinterpret_s16_u16(svunpklo(vsrc1_u8)); const auto vsrc1_1 = svreinterpret_s16_u16(svunpkhi(vsrc1_u8)); svst1_s16(pg_0, output_ptr + x, svqadd(vsrc0_0, vsrc1_0)); - svst1_s16(pg_1, output_ptr + x, svqadd(vsrc0_1, vsrc1_1)); + svst1_s16(pg_1, output_ptr + x + svcnth(), svqadd(vsrc0_1, vsrc1_1)); - x += svcnth(); + x += svcntb(); pg_u = svwhilelt_b8(x, window_end_x); pg_0 = svwhilelt_b16(x, window_end_x); pg_1 = svwhilelt_b16(x + static_cast(svcnth()), window_end_x); -- cgit v1.2.1