diff options
author | Sang-Hoon Park <sang-hoon.park@arm.com> | 2021-03-23 15:34:04 +0000 |
---|---|---|
committer | Sang-Hoon Park <sang-hoon.park@arm.com> | 2021-03-24 10:32:51 +0000 |
commit | 32e0646234e5900f0b0cfa00ddac46bbf4cabb04 (patch) | |
tree | 4a2fc0842303a10ae761da7b12ec60e2797d0c12 /src/core/cpu/kernels/add/sve/integer.cpp | |
parent | b5323cf4d0de1d49057c7fca79c45d711a3efe8a (diff) | |
download | ComputeLibrary-32e0646234e5900f0b0cfa00ddac46bbf4cabb04.tar.gz |
Fix indexing of SVE ArithmeticAddition kernel
The kernel used wrong index when different data types
with different widths are used as sources.
The increment of the index for the loop inside the kernel
and offset of that index to load multiple source vectors
has been corrected.
Resolves: COMPMID-4303
Change-Id: Ib1ad431dc80c937d7f19bafe5cb57fc52b6f3735
Signed-off-by: Sang-Hoon Park <sang-hoon.park@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5304
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-by: Pablo Marquez Tello <pablo.tello@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/cpu/kernels/add/sve/integer.cpp')
-rw-r--r-- | src/core/cpu/kernels/add/sve/integer.cpp | 10 |
1 files changed, 5 insertions, 5 deletions
diff --git a/src/core/cpu/kernels/add/sve/integer.cpp b/src/core/cpu/kernels/add/sve/integer.cpp index 5bd2e12665..ae74bfa3eb 100644 --- a/src/core/cpu/kernels/add/sve/integer.cpp +++ b/src/core/cpu/kernels/add/sve/integer.cpp @@ -154,9 +154,9 @@ void add_s16_u8_s16_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const auto vsrc1_0 = svreinterpret_s16_u16(svunpklo(vsrc1_u8)); const auto vsrc1_1 = svreinterpret_s16_u16(svunpkhi(vsrc1_u8)); svst1_s16(pg_0, output_ptr + x, svadd_s16_z(pg_0, vsrc0_0, vsrc1_0)); - svst1_s16(pg_1, output_ptr + x, svadd_s16_z(pg_1, vsrc0_1, vsrc1_1)); + svst1_s16(pg_1, output_ptr + x + svcnth(), svadd_s16_z(pg_1, vsrc0_1, vsrc1_1)); - x += svcnth(); + x += svcntb(); pg_u = svwhilelt_b8(x, window_end_x); pg_0 = svwhilelt_b16(x, window_end_x); pg_1 = svwhilelt_b16(x + static_cast<int>(svcnth()), window_end_x); @@ -172,15 +172,15 @@ void add_s16_u8_s16_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, do { const auto vsrc0_0 = svld1_s16(pg_0, input1_ptr + x); - const auto vsrc0_1 = svld1_s16(pg_1, input1_ptr + x); + const auto vsrc0_1 = svld1_s16(pg_1, input1_ptr + x + svcnth()); const auto vsrc1_u8 = svld1_u8(pg_u, input2_ptr + x); const auto vsrc1_0 = svreinterpret_s16_u16(svunpklo(vsrc1_u8)); const auto vsrc1_1 = svreinterpret_s16_u16(svunpkhi(vsrc1_u8)); svst1_s16(pg_0, output_ptr + x, svqadd(vsrc0_0, vsrc1_0)); - svst1_s16(pg_1, output_ptr + x, svqadd(vsrc0_1, vsrc1_1)); + svst1_s16(pg_1, output_ptr + x + svcnth(), svqadd(vsrc0_1, vsrc1_1)); - x += svcnth(); + x += svcntb(); pg_u = svwhilelt_b8(x, window_end_x); pg_0 = svwhilelt_b16(x, window_end_x); pg_1 = svwhilelt_b16(x + static_cast<int>(svcnth()), window_end_x); |