From cab1ab92813a346779bacd728ef8d7d4159abac6 Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Fri, 12 Feb 2021 17:34:17 +0000 Subject: Fix data layout retention and handling of leftovers when there is no padding Resolves COMPMID-4258 Change-Id: I8782bf725cd0d376d538021406eb1f5be962b2cb Signed-off-by: Michele Di Giorgio Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/c/VisualCompute/ComputeLibrary/+/298627 Reviewed-by: Teresa Charlin Reyes Reviewed-by: Georgios Pinitas Comments-Addressed: Teresa Charlin Reyes Tested-by: Georgios Pinitas Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5082 Reviewed-by: TeresaARM --- .../arm_conv/pooling/pooling_depthfirst_generic.hpp | 17 ++++++++++++++++- .../pooling/pooling_depthfirst_generic_quantized.hpp | 17 ++++++++++++++++- src/core/cpu/kernels/CpuPoolingKernel.cpp | 6 ++++-- 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst_generic.hpp b/src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst_generic.hpp index fa06a0078b..5979862ed8 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst_generic.hpp +++ b/src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst_generic.hpp @@ -191,6 +191,13 @@ class PoolingDepthfirstGeneric : public PoolingCommon(std::max(end_in_i - height, 0)); const auto valid_rows = input_rows() - pad_top - pad_bottom; + // Compute the number of pooling window rows which are contained in + // either the valid region of the input tensor, or the padding. + const auto padded_bottom = std::min( + start_in_i + m_args.pool_window.rows, height + padding.bottom + ); + const auto n_total_rows = padded_bottom - start_in_i; + auto outptr_col = outptr_row; auto inptr_row = inptr_batch + (start_in_i + pad_top) * ld_input_row; @@ -205,6 +212,13 @@ class PoolingDepthfirstGeneric : public PoolingCommon(std::max(0, end_in_j - width)); const auto valid_cols = input_cols() - pad_left - pad_right; + // Compute the number of pooling window columns which are contained + // in either the valid region of the input tensor, or the padding. + const auto padded_right = std::min( + start_in_j + m_args.pool_window.cols, width + padding.right + ); + const auto n_total_cols = padded_right - start_in_j; + // Construct the input pointer array - fill in all valid points // contiguously. const TInput **ptrs = inptr_array; @@ -222,7 +236,8 @@ class PoolingDepthfirstGeneric : public PoolingCommon(-std::min(start_in_i, 0)); const auto pad_bottom = static_cast(-std::min(static_cast(height) - end_in_i, 0)); + // Compute the number of pooling window rows which are contained in + // either the valid region of the input tensor, or the padding. + const auto padded_bottom = std::min( + start_in_i + m_args.pool_window.rows, height + padding.bottom + ); + const auto n_total_rows = padded_bottom - start_in_i; + for (int out_j = 0, start_in_j = -padding.left; out_j < static_cast(output_width); out_j++, start_in_j += m_args.pool_stride.cols) @@ -201,6 +208,13 @@ class PoolingDepthfirstGenericQuantized : public PoolingCommon(-std::min(start_in_j, 0)); const auto pad_right = static_cast(-std::min(static_cast(width) - end_in_j, 0)); + // Compute the number of pooling window columns which are contained + // in either the valid region of the input tensor, or the padding. + const auto padded_right = std::min( + start_in_j + m_args.pool_window.cols, width + padding.right + ); + const auto n_total_cols = padded_right - start_in_j; + // Construct the input pointer array - fill in all valid points // contiguously. const TInput **ptrs = inptr_array; @@ -221,7 +235,8 @@ class PoolingDepthfirstGenericQuantized : public PoolingCommon validate_and_configure_window(ITensorInfo *src, ITenso dst_shape.set(1, pooled_h); TensorInfo dst_info(src->clone()->set_tensor_shape(dst_shape)); win = calculate_max_window(dst_info, Steps(num_elems_processed_per_iteration)); - AccessWindowStatic src_access(src, -pool_pad_left, -pool_pad_top, src_width + border_size.right, src_height + border_size.bottom); + AccessWindowStatic src_access(src, -pool_pad_left, -pool_pad_top, ceil_to_multiple(src_width + border_size.right, pool_size_x), src_height + border_size.bottom); AccessWindowHorizontal dst_access(dst, 0, num_elems_horizontal_window); if(indices) { @@ -368,6 +368,8 @@ std::pair validate_and_configure_window(ITensorInfo *src, ITenso window_changed = update_window_and_padding(win, src_access, dst_access); } dst_access.set_valid_region(win, ValidRegion(Coordinates(), dst->tensor_shape())); + + border_size = src->padding(); } Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; @@ -529,7 +531,7 @@ void CpuPoolingKernel::run_op(ITensorPack &tensors, const Window &window, const window_src.set(Window::DimZ, Window::Dimension(0, src->info()->dimension(2), pool_stride_y)); } - const auto *uk = get_implementation(src->info()->data_type(), src->info()->data_layout(), _pool_stride_x, _pool_size); + const auto *uk = get_implementation(src->info()->data_type(), _data_layout, _pool_stride_x, _pool_size); ARM_COMPUTE_ERROR_ON(uk == nullptr || uk->ukernel == nullptr); uk->ukernel(src, dst, indices, _pool_info, window_src, window); -- cgit v1.2.1