aboutsummaryrefslogtreecommitdiff
path: root/src/core/cpu/kernels/pooling/neon/fp32.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/cpu/kernels/pooling/neon/fp32.cpp')
-rw-r--r--src/core/cpu/kernels/pooling/neon/fp32.cpp20
1 files changed, 11 insertions, 9 deletions
diff --git a/src/core/cpu/kernels/pooling/neon/fp32.cpp b/src/core/cpu/kernels/pooling/neon/fp32.cpp
index a2bd4a6bb3..4e41fdec7f 100644
--- a/src/core/cpu/kernels/pooling/neon/fp32.cpp
+++ b/src/core/cpu/kernels/pooling/neon/fp32.cpp
@@ -58,9 +58,11 @@ void pooling2_f32_maxpool_indices(const ITensor *src, ITensor *dst0, ITensor *ds
float32x4_t vres;
float res;
- const int pad_right = src->info()->padding().right;
- const int in_stride_y = static_cast<int>(src->info()->strides_in_bytes().y());
- const int in_stride_z = static_cast<int>(src->info()->strides_in_bytes().z());
+ const int pad_right = src->info()->padding().right;
+ const int pad_left = src->info()->padding().left;
+ const int pad_horizontal = pad_right + pad_left;
+ const int in_stride_y = static_cast<int>(src->info()->strides_in_bytes().y());
+ const int in_stride_z = static_cast<int>(src->info()->strides_in_bytes().z());
execute_window_loop(window_out, [&](const Coordinates & id)
{
@@ -97,9 +99,9 @@ void pooling2_f32_maxpool_indices(const ITensor *src, ITensor *dst0, ITensor *ds
const uint32_t offset_base = offset_no_padding<float>(in.offset(), id, *src->info(), pool_stride_x, pool_stride_y, DataLayout::NHWC);
const uint32_t offset_x0 = (uint32_t)offset_base / sizeof(float) + x_off;
- const uint32_t offset_x1 = (uint32_t)offset_x0 + in_stride_y / sizeof(float) - pad_right;
- const uint32_t offset_x2 = (uint32_t)offset_x0 + in_stride_z / sizeof(float) - pad_right * src->info()->tensor_shape()[1];
- const uint32_t offset_x3 = (uint32_t)offset_x2 + in_stride_y / sizeof(float) - pad_right;
+ const uint32_t offset_x1 = (uint32_t)offset_x0 + in_stride_y / sizeof(float) - pad_horizontal;
+ const uint32_t offset_x2 = (uint32_t)offset_x0 + in_stride_z / sizeof(float) - pad_horizontal * src->info()->tensor_shape()[1];
+ const uint32_t offset_x3 = (uint32_t)offset_x2 + in_stride_y / sizeof(float) - pad_horizontal;
const uint32x4_t voffset_x0 = { offset_x0, offset_x0 + 1, offset_x0 + 2, offset_x0 + 3 };
const uint32x4_t voffset_x1 = { offset_x1, offset_x1 + 1, offset_x1 + 2, offset_x1 + 3 };
const uint32x4_t voffset_x2 = { offset_x2, offset_x2 + 1, offset_x2 + 2, offset_x2 + 3 };
@@ -126,9 +128,9 @@ void pooling2_f32_maxpool_indices(const ITensor *src, ITensor *dst0, ITensor *ds
const uint32_t offset_base = offset_no_padding<float>(in.offset(), id, *src->info(), pool_stride_x, pool_stride_y, DataLayout::NHWC);
const uint32_t offset_x0 = (uint32_t)offset_base / sizeof(float) + x_off;
- const uint32_t offset_x1 = (uint32_t)offset_x0 + in_stride_y / sizeof(float) - pad_right;
- const uint32_t offset_x2 = (uint32_t)offset_x0 + in_stride_z / sizeof(float) - pad_right * src->info()->tensor_shape()[1];
- const uint32_t offset_x3 = (uint32_t)offset_x2 + in_stride_y / sizeof(float) - pad_right;
+ const uint32_t offset_x1 = (uint32_t)offset_x0 + in_stride_y / sizeof(float) - pad_horizontal;
+ const uint32_t offset_x2 = (uint32_t)offset_x0 + in_stride_z / sizeof(float) - pad_horizontal * src->info()->tensor_shape()[1];
+ const uint32_t offset_x3 = (uint32_t)offset_x2 + in_stride_y / sizeof(float) - pad_horizontal;
const uint32_t tmp_idx0 = (x0 >= x1) ? offset_x0 : offset_x1;
const uint32_t tmp_idx1 = (x2 >= x3) ? offset_x2 : offset_x3;
const uint32_t tmp_idx2 = (std::max(x0, x1) >= std::max(x2, x3)) ? tmp_idx0 : tmp_idx1;