aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2020-10-09 11:52:10 +0100
committerSheri Zhang <sheri.zhang@arm.com>2020-10-09 12:43:56 +0000
commitc226853f80d53619a2f49e646635e04ee0885c3b (patch)
tree252620ed7db38810c1590d7caab62625ec07ad6f /src/core/NEON/kernels/NEPoolingLayerKernel.cpp
parent4adaddbaa633a4025f29f2e0a63c7126d9d7c530 (diff)
downloadComputeLibrary-c226853f80d53619a2f49e646635e04ee0885c3b.tar.gz
COMPMID-3794: Fix window loops causing performance regression
Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com> Change-Id: Id4d95c6ce5fed91bb079b8bfe1abceedefd20c97 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4117 Reviewed-by: Sheri Zhang <sheri.zhang@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEPoolingLayerKernel.cpp')
-rw-r--r--src/core/NEON/kernels/NEPoolingLayerKernel.cpp10
1 files changed, 5 insertions, 5 deletions
diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
index 1310ef3521..397eae94ea 100644
--- a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
@@ -1316,7 +1316,7 @@ void NEPoolingLayerKernel::pooling2_f16_nhwc_maxpool_indices(const Window &windo
(_input->info()->strides_in_bytes().z());
int x_off = window_start_x;
- for(; x_off < (window_end_x - window_step_x); x_off += window_step_x)
+ for(; x_off <= (window_end_x - window_step_x); x_off += window_step_x)
{
const auto in_x0_ptr = reinterpret_cast<const float16_t *>(input.ptr() + in_x0_offset) + x_off;
const auto in_x1_ptr = reinterpret_cast<const float16_t *>(input.ptr() + in_x1_offset) + x_off;
@@ -1432,7 +1432,7 @@ void NEPoolingLayerKernel::poolingMxN_f16_nhwc(const Window &window_input, const
const int pool_end_x = std::min(pool_size_x, window_input.y().end() + pool_limit_x);
int x_off = window_start_x;
- for(; x_off < (window_end_x - window_step_x); x_off += window_step_x)
+ for(; x_off <= (window_end_x - window_step_x); x_off += window_step_x)
{
if(pooling_type != PoolingType::MAX)
{
@@ -1943,7 +1943,7 @@ void NEPoolingLayerKernel::poolingMxN_f32_nhwc(const Window &window_input, const
const int pool_end_x = std::min(pool_size_x, window_input.y().end() + pool_limit_x);
int x_off = window_start_x;
- for(; x_off < (window_end_x - window_step_x); x_off += window_step_x)
+ for(; x_off <= (window_end_x - window_step_x); x_off += window_step_x)
{
if(pooling_type != PoolingType::MAX)
{
@@ -2113,7 +2113,7 @@ void NEPoolingLayerKernel::pooling2_f32_nhwc_maxpool_indices(const Window &windo
(_input->info()->strides_in_bytes().z());
int x_off = window_start_x;
- for(; x_off < (window_end_x - window_step_x); x_off += window_step_x)
+ for(; x_off <= (window_end_x - window_step_x); x_off += window_step_x)
{
const auto in_x0_ptr = reinterpret_cast<const float *>(input.ptr() + in_x0_offset);
const auto in_x1_ptr = reinterpret_cast<const float *>(input.ptr() + in_x1_offset);
@@ -2337,7 +2337,7 @@ void NEPoolingLayerKernel::poolingMxN_q8_nhwc(const Window &window_input, const
const int pool_end_x = std::min(pool_size_x, window_input.y().end() + pool_limit_x);
int x_off = window_start_x;
- for(; x_off < (window_end_x - window_step_x); x_off += window_step_x)
+ for(; x_off <= (window_end_x - window_step_x); x_off += window_step_x)
{
if(pooling_type != PoolingType::MAX)
{