diff options
author | Giorgio Arena <giorgio.arena@arm.com> | 2018-07-16 17:20:38 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:54:54 +0000 |
commit | a855af10a486c53c2271361cb87f349eca64b749 (patch) | |
tree | b326b63bdcaf76c9620b1bbf22942d4683503a65 /src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp | |
parent | 5a3ee4f708a9e1642b0211955ff905e7b67e831d (diff) | |
download | ComputeLibrary-a855af10a486c53c2271361cb87f349eca64b749.tar.gz |
COMPMID-1401 Implement NEFullyConnectedLayer for QASYMM8
Change-Id: I0404df6d369855e2f458f2db8f26e81c80a1ee87
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/140148
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Tested-by: Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp')
-rw-r--r-- | src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp | 10 |
1 files changed, 8 insertions, 2 deletions
diff --git a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp index ee334dfca0..af84d024d5 100644 --- a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp @@ -193,11 +193,14 @@ void NEGEMMLowpOffsetContributionKernel::run(const Window &window, const ThreadI Window win_vector_sum_row(collapsed_window); win_vector_sum_row.set(Window::DimX, Window::Dimension(0, 0, 0)); win_vector_sum_row.set(Window::DimY, Window::Dimension(0, 0, 0)); + win_vector_sum_row.set(Window::DimZ, Window::Dimension(0, 0, 0)); Iterator vector_sum_col(_vector_sum_col, win_vector_sum_col); Iterator vector_sum_row(_vector_sum_row, win_vector_sum_row); Iterator mm_result(_mm_result, window); + const size_t sum_row_stride_y = _vector_sum_row->info()->strides_in_bytes().y(); + execute_window_loop(collapsed_window, [&](const Coordinates & id) { // Compute the leftover term due to a_offset. @@ -217,7 +220,7 @@ void NEGEMMLowpOffsetContributionKernel::run(const Window &window, const ThreadI a_offset_term_s32.val[3] = vmulq_n_s32(a_offset_term_s32.val[3], _a_offset); // Compute the leftover term due to b_offset. - int32x4_t b_offset_term_s32 = vld1q_dup_s32(reinterpret_cast<const int32_t *>(vector_sum_row.ptr()) + id.y()); + int32x4_t b_offset_term_s32 = vld1q_dup_s32(reinterpret_cast<const int32_t *>(vector_sum_row.ptr() + id.z() * sum_row_stride_y) + id.y()); b_offset_term_s32 = vmulq_n_s32(b_offset_term_s32, _b_offset); // Add a_offset_term_s32 and b_offset_term_s32 @@ -266,14 +269,17 @@ void NEGEMMLowpOffsetContributionKernel::run(const Window &window, const ThreadI Window win_vector_sum_row(collapsed_window); win_vector_sum_row.set(Window::DimX, Window::Dimension(0, 0, 0)); win_vector_sum_row.set(Window::DimY, Window::Dimension(0, 0, 0)); + win_vector_sum_row.set(Window::DimZ, Window::Dimension(0, 0, 0)); Iterator vector_sum_row(_vector_sum_row, win_vector_sum_row); Iterator mm_result(_mm_result, window); + const size_t sum_row_stride_y = _vector_sum_row->info()->strides_in_bytes().y(); + execute_window_loop(window, [&](const Coordinates & id) { // Compute the leftover term due to b_offset. - int32x4_t b_offset_term_s32 = vld1q_dup_s32(reinterpret_cast<const int32_t *>(vector_sum_row.ptr()) + id.y()); + int32x4_t b_offset_term_s32 = vld1q_dup_s32(reinterpret_cast<const int32_t *>(vector_sum_row.ptr() + id.z() * sum_row_stride_y) + id.y()); b_offset_term_s32 = vmulq_n_s32(b_offset_term_s32, _b_offset); int32x4x4_t in_s32 = |