diff options
author | Michalis Spyrou <michalis.spyrou@arm.com> | 2020-10-09 11:52:10 +0100 |
---|---|---|
committer | Sheri Zhang <sheri.zhang@arm.com> | 2020-10-09 12:43:56 +0000 |
commit | c226853f80d53619a2f49e646635e04ee0885c3b (patch) | |
tree | 252620ed7db38810c1590d7caab62625ec07ad6f /src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp | |
parent | 4adaddbaa633a4025f29f2e0a63c7126d9d7c530 (diff) | |
download | ComputeLibrary-c226853f80d53619a2f49e646635e04ee0885c3b.tar.gz |
COMPMID-3794: Fix window loops causing performance regression
Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com>
Change-Id: Id4d95c6ce5fed91bb079b8bfe1abceedefd20c97
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4117
Reviewed-by: Sheri Zhang <sheri.zhang@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp')
-rw-r--r-- | src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp index c022fa05a0..8c11574755 100644 --- a/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp +++ b/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp @@ -110,7 +110,7 @@ output_stage_nchw(ITensor *input, const ITensor *bias, const Window &window, ITe execute_window_loop(win, [&](const Coordinates & id) { int x = window_start_x; - for(; x < (window_end_x - window_step_x); x += window_step_x) + for(; x <= (window_end_x - window_step_x); x += window_step_x) { // Get bias and pointer to input const auto in_ptr = reinterpret_cast<const T *>(in.ptr()) + x; @@ -175,7 +175,7 @@ output_stage_nhwc(ITensor *input, const ITensor *bias, const Window &window, ITe execute_window_loop(win, [&](const Coordinates &) { int x = window_start_x; - for(; x < (window_end_x - window_step_x); x += window_step_x) + for(; x <= (window_end_x - window_step_x); x += window_step_x) { // Get bias and pointer to input const auto in_ptr = reinterpret_cast<const T *>(in.ptr()); @@ -238,7 +238,7 @@ void output_stage_nchw(ITensor *input, const ITensor *bias, const Window &window { int x = window_start_x; - for(; x < (window_end_x - window_step_x); x += window_step_x) + for(; x <= (window_end_x - window_step_x); x += window_step_x) { // Get bias and pointer to input const auto in_ptr = reinterpret_cast<int32_t *>(in.ptr()) + x; @@ -323,7 +323,7 @@ void output_stage_nhwc(ITensor *input, const ITensor *bias, const Window &window execute_window_loop(win, [&](const Coordinates &) { int x = window_start_x; - for(; x < (window_end_x - window_step_x); x += window_step_x) + for(; x <= (window_end_x - window_step_x); x += window_step_x) { // Get bias and pointer to input const auto in_ptr = reinterpret_cast<int32_t *>(in.ptr()) + x; |