From 621965e3e9ef301d2668c60702f5fb79daea8d26 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Mon, 8 Jan 2018 17:11:26 +0000 Subject: COMPMID-769 Add asymmetric padding support in NEON kernels. - NEDirectConvolutionLayer - NEDepthwiseConvolutionLayer3x3 Change-Id: Id4d7d17ee334639c059015a290b8fc34712706ee Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/115430 Tested-by: Jenkins Reviewed-by: Anthony Barbier --- .../NEDepthwiseConvolutionLayer3x3Kernel.cpp | 10 +++---- .../kernels/NEDirectConvolutionLayerKernel.cpp | 33 ++++++++++++++-------- 2 files changed, 27 insertions(+), 16 deletions(-) (limited to 'src/core/NEON') diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp index 3cdb39ef94..40a8601aaa 100644 --- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp +++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -77,13 +77,13 @@ void NEDepthwiseConvolutionLayer3x3Kernel::configure(const ITensor *input, const _conv_info = conv_info; const unsigned int conv_stride_x = conv_info.stride().first; const unsigned int conv_stride_y = conv_info.stride().second; - const unsigned int conv_pad_x = conv_info.pad().first; - const unsigned int conv_pad_y = conv_info.pad().second; + const unsigned int conv_pad_left = conv_info.pad_left(); + const unsigned int conv_pad_top = conv_info.pad_top(); ARM_COMPUTE_ERROR_ON(conv_stride_x < 1 || conv_stride_x > 3); const unsigned int num_elems_written_per_iteration = 16 >> conv_stride_x; - _border_size = BorderSize(conv_pad_y, conv_pad_x); + _border_size = BorderSize(conv_pad_top, conv_info.pad_right(), conv_info.pad_bottom(), conv_pad_left); // Configure kernel window Window win = calculate_max_window(*output->info(), Steps(num_elems_written_per_iteration)); @@ -91,7 +91,7 @@ void NEDepthwiseConvolutionLayer3x3Kernel::configure(const ITensor *input, const const unsigned int num_x_steps = (output_shape.x() + num_elems_written_per_iteration - 1) / num_elems_written_per_iteration; const int input_num_elems_processed = get_input_num_elems_processed(num_elems_written_per_iteration, conv_stride_x); - AccessWindowStatic input_access(input->info(), -conv_pad_x, -conv_pad_y, (num_x_steps - 1) * input_num_elems_processed + 12, conv_stride_y * (output_shape.y() - 1) + 2); + AccessWindowStatic input_access(input->info(), -conv_pad_left, -conv_pad_top, (num_x_steps - 1) * input_num_elems_processed + 12, conv_stride_y * (output_shape.y() - 1) + 2); AccessWindowStatic weights_access(weights->info(), 0, 0, weights->info()->dimension(0), weights->info()->dimension(1)); AccessWindowStatic output_access(output->info(), 0, 0, num_x_steps * num_elems_written_per_iteration, output_shape.y()); diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp index 2ba0ef2e69..cb8246d09e 100644 --- a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -1052,8 +1052,6 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen { // Calculate right and bottom border unsigned int kernel_size = weights->dimension(0); - const unsigned int conv_pad_x = std::get<0>(conv_info.pad()); - const unsigned int conv_pad_y = std::get<1>(conv_info.pad()); const unsigned int conv_stride_x = std::get<0>(conv_info.stride()); const unsigned int conv_stride_y = std::get<1>(conv_info.stride()); const int input_width = input->dimension(0); @@ -1122,12 +1120,22 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen } } - const int upper_bound_w = ceil_to_multiple(((output->dimension(0) - 1) * conv_stride_x + kernel_size), num_elems_read_per_iteration) - conv_pad_x - input_width; - const int upper_bound_h = ((output->dimension(1) - 1) * conv_stride_y - conv_pad_y + kernel_size) - input_height; - border_size.right = std::max(upper_bound_w, static_cast(kernel_size)); - border_size.bottom = std::max(upper_bound_h, static_cast(kernel_size)); + // Calculate border + int upper_bound_w = ceil_to_multiple(((output->dimension(0) - 1) * conv_stride_x + kernel_size), num_elems_read_per_iteration) - conv_info.pad_left() - conv_info.pad_right() - input_width; + int upper_bound_h = ((output->dimension(1) - 1) * conv_stride_y - conv_info.pad_top() - conv_info.pad_bottom() + kernel_size) - input_height; + + const unsigned int conv_pad_left = std::max(upper_bound_w - static_cast(conv_info.pad_right()), static_cast(kernel_size) / 2); + const unsigned int conv_pad_top = std::max(upper_bound_h - static_cast(conv_info.pad_bottom()), static_cast(kernel_size) / 2); + const unsigned int conv_pad_right = std::max(upper_bound_w - static_cast(conv_info.pad_left()), static_cast(kernel_size) / 2); + const unsigned int conv_pad_bottom = std::max(upper_bound_h - static_cast(conv_info.pad_top()), static_cast(kernel_size) / 2); + + border_size.right = conv_pad_right; + border_size.bottom = conv_pad_bottom; + border_size.left = conv_pad_left; + border_size.top = conv_pad_top; + Window win = calculate_max_window(*output, Steps(num_elems_written_per_iteration)); - AccessWindowStatic input_access(input, -conv_pad_x, -conv_pad_y, input_width + border_size.right, input_height + border_size.bottom); + AccessWindowStatic input_access(input, -conv_pad_left, -conv_pad_top, input_width + conv_pad_right, input_height + conv_pad_bottom); AccessWindowStatic weights_access(weights, 0, 0, num_weight_elems_read_per_row, kernel_size); AccessWindowHorizontal output_access(output, 0, num_elems_written_per_iteration); bool window_changed = update_window_and_padding(win, input_access, weights_access, output_access); @@ -1152,15 +1160,18 @@ BorderSize NEDirectConvolutionLayerKernel::border_size() const void NEDirectConvolutionLayerKernel::configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); - const unsigned int conv_pad_x = std::get<0>(conv_info.pad()); - const unsigned int conv_pad_y = std::get<1>(conv_info.pad()); _input = input; _weights = weights; _output = output; _conv_info = conv_info; _kernel_size = weights->info()->dimension(0); - _border_size = BorderSize(conv_pad_y, conv_pad_x); + + const unsigned int conv_pad_left = conv_info.pad_left(); + const unsigned int conv_pad_top = conv_info.pad_top(); + const unsigned int conv_pad_right = conv_info.pad_right(); + const unsigned int conv_pad_bottom = conv_info.pad_bottom(); + _border_size = BorderSize(conv_pad_top, conv_pad_right, conv_pad_bottom, conv_pad_left); // Get convolved dimensions TensorShape output_shape = get_convolved_dimensions(input->info(), weights->info(), _kernel_size, conv_info); -- cgit v1.2.1