aboutsummaryrefslogtreecommitdiff
path: root/src/core
diff options
context:
space:
mode:
authorSang-Hoon Park <sang-hoon.park@arm.com>2021-03-23 15:34:04 +0000
committerSang-Hoon Park <sang-hoon.park@arm.com>2021-03-24 10:32:51 +0000
commit32e0646234e5900f0b0cfa00ddac46bbf4cabb04 (patch)
tree4a2fc0842303a10ae761da7b12ec60e2797d0c12 /src/core
parentb5323cf4d0de1d49057c7fca79c45d711a3efe8a (diff)
downloadComputeLibrary-32e0646234e5900f0b0cfa00ddac46bbf4cabb04.tar.gz
Fix indexing of SVE ArithmeticAddition kernel
The kernel used wrong index when different data types with different widths are used as sources. The increment of the index for the loop inside the kernel and offset of that index to load multiple source vectors has been corrected. Resolves: COMPMID-4303 Change-Id: Ib1ad431dc80c937d7f19bafe5cb57fc52b6f3735 Signed-off-by: Sang-Hoon Park <sang-hoon.park@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5304 Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-by: Pablo Marquez Tello <pablo.tello@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core')
-rw-r--r--src/core/cpu/kernels/add/sve/integer.cpp10
1 files changed, 5 insertions, 5 deletions
diff --git a/src/core/cpu/kernels/add/sve/integer.cpp b/src/core/cpu/kernels/add/sve/integer.cpp
index 5bd2e12665..ae74bfa3eb 100644
--- a/src/core/cpu/kernels/add/sve/integer.cpp
+++ b/src/core/cpu/kernels/add/sve/integer.cpp
@@ -154,9 +154,9 @@ void add_s16_u8_s16_sve(const ITensor *src0, const ITensor *src1, ITensor *dst,
const auto vsrc1_0 = svreinterpret_s16_u16(svunpklo(vsrc1_u8));
const auto vsrc1_1 = svreinterpret_s16_u16(svunpkhi(vsrc1_u8));
svst1_s16(pg_0, output_ptr + x, svadd_s16_z(pg_0, vsrc0_0, vsrc1_0));
- svst1_s16(pg_1, output_ptr + x, svadd_s16_z(pg_1, vsrc0_1, vsrc1_1));
+ svst1_s16(pg_1, output_ptr + x + svcnth(), svadd_s16_z(pg_1, vsrc0_1, vsrc1_1));
- x += svcnth();
+ x += svcntb();
pg_u = svwhilelt_b8(x, window_end_x);
pg_0 = svwhilelt_b16(x, window_end_x);
pg_1 = svwhilelt_b16(x + static_cast<int>(svcnth()), window_end_x);
@@ -172,15 +172,15 @@ void add_s16_u8_s16_sve(const ITensor *src0, const ITensor *src1, ITensor *dst,
do
{
const auto vsrc0_0 = svld1_s16(pg_0, input1_ptr + x);
- const auto vsrc0_1 = svld1_s16(pg_1, input1_ptr + x);
+ const auto vsrc0_1 = svld1_s16(pg_1, input1_ptr + x + svcnth());
const auto vsrc1_u8 = svld1_u8(pg_u, input2_ptr + x);
const auto vsrc1_0 = svreinterpret_s16_u16(svunpklo(vsrc1_u8));
const auto vsrc1_1 = svreinterpret_s16_u16(svunpkhi(vsrc1_u8));
svst1_s16(pg_0, output_ptr + x, svqadd(vsrc0_0, vsrc1_0));
- svst1_s16(pg_1, output_ptr + x, svqadd(vsrc0_1, vsrc1_1));
+ svst1_s16(pg_1, output_ptr + x + svcnth(), svqadd(vsrc0_1, vsrc1_1));
- x += svcnth();
+ x += svcntb();
pg_u = svwhilelt_b8(x, window_end_x);
pg_0 = svwhilelt_b16(x, window_end_x);
pg_1 = svwhilelt_b16(x + static_cast<int>(svcnth()), window_end_x);