aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2018-07-16 17:20:38 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:54 +0000
commita855af10a486c53c2271361cb87f349eca64b749 (patch)
treeb326b63bdcaf76c9620b1bbf22942d4683503a65 /src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp
parent5a3ee4f708a9e1642b0211955ff905e7b67e831d (diff)
downloadComputeLibrary-a855af10a486c53c2271361cb87f349eca64b749.tar.gz
COMPMID-1401 Implement NEFullyConnectedLayer for QASYMM8
Change-Id: I0404df6d369855e2f458f2db8f26e81c80a1ee87 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/140148 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp')
-rw-r--r--src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp10
1 files changed, 8 insertions, 2 deletions
diff --git a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp
index ee334dfca0..af84d024d5 100644
--- a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp
@@ -193,11 +193,14 @@ void NEGEMMLowpOffsetContributionKernel::run(const Window &window, const ThreadI
Window win_vector_sum_row(collapsed_window);
win_vector_sum_row.set(Window::DimX, Window::Dimension(0, 0, 0));
win_vector_sum_row.set(Window::DimY, Window::Dimension(0, 0, 0));
+ win_vector_sum_row.set(Window::DimZ, Window::Dimension(0, 0, 0));
Iterator vector_sum_col(_vector_sum_col, win_vector_sum_col);
Iterator vector_sum_row(_vector_sum_row, win_vector_sum_row);
Iterator mm_result(_mm_result, window);
+ const size_t sum_row_stride_y = _vector_sum_row->info()->strides_in_bytes().y();
+
execute_window_loop(collapsed_window, [&](const Coordinates & id)
{
// Compute the leftover term due to a_offset.
@@ -217,7 +220,7 @@ void NEGEMMLowpOffsetContributionKernel::run(const Window &window, const ThreadI
a_offset_term_s32.val[3] = vmulq_n_s32(a_offset_term_s32.val[3], _a_offset);
// Compute the leftover term due to b_offset.
- int32x4_t b_offset_term_s32 = vld1q_dup_s32(reinterpret_cast<const int32_t *>(vector_sum_row.ptr()) + id.y());
+ int32x4_t b_offset_term_s32 = vld1q_dup_s32(reinterpret_cast<const int32_t *>(vector_sum_row.ptr() + id.z() * sum_row_stride_y) + id.y());
b_offset_term_s32 = vmulq_n_s32(b_offset_term_s32, _b_offset);
// Add a_offset_term_s32 and b_offset_term_s32
@@ -266,14 +269,17 @@ void NEGEMMLowpOffsetContributionKernel::run(const Window &window, const ThreadI
Window win_vector_sum_row(collapsed_window);
win_vector_sum_row.set(Window::DimX, Window::Dimension(0, 0, 0));
win_vector_sum_row.set(Window::DimY, Window::Dimension(0, 0, 0));
+ win_vector_sum_row.set(Window::DimZ, Window::Dimension(0, 0, 0));
Iterator vector_sum_row(_vector_sum_row, win_vector_sum_row);
Iterator mm_result(_mm_result, window);
+ const size_t sum_row_stride_y = _vector_sum_row->info()->strides_in_bytes().y();
+
execute_window_loop(window, [&](const Coordinates & id)
{
// Compute the leftover term due to b_offset.
- int32x4_t b_offset_term_s32 = vld1q_dup_s32(reinterpret_cast<const int32_t *>(vector_sum_row.ptr()) + id.y());
+ int32x4_t b_offset_term_s32 = vld1q_dup_s32(reinterpret_cast<const int32_t *>(vector_sum_row.ptr() + id.z() * sum_row_stride_y) + id.y());
b_offset_term_s32 = vmulq_n_s32(b_offset_term_s32, _b_offset);
int32x4x4_t in_s32 =