diff options
Diffstat (limited to 'src/core')
4 files changed, 33 insertions, 28 deletions
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp index c24420a7e3..29564b36c9 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp @@ -121,11 +121,6 @@ void CLDepthwiseConvolutionLayer3x3Kernel::configure(const ICLTensor *input, con const GPUTarget gpu_target = get_arch_from_target(get_target()); // Configure kernel window - const unsigned int conv_pad_left = conv_info.pad_left(); - const unsigned int conv_pad_top = conv_info.pad_top(); - const unsigned int conv_pad_right = conv_info.pad_right(); - const unsigned int conv_pad_bottom = conv_info.pad_bottom(); - unsigned int num_elems_read_per_iteration_x = 0; unsigned int num_elems_read_per_iteration_y = 0; unsigned int num_elems_written_per_iteration_x = 0; @@ -139,8 +134,22 @@ void CLDepthwiseConvolutionLayer3x3Kernel::configure(const ICLTensor *input, con kernel_name = "depthwise_convolution_3x3_f16"; num_elems_written_per_iteration_x = 8 / data_size_from_type(input->info()->data_type()); num_elems_written_per_iteration_y = 1; - num_elems_read_per_iteration_x = 3 + (num_elems_written_per_iteration_x - 1) * _conv_stride_x; num_elems_read_per_iteration_y = 3; + switch(_conv_stride_x) + { + case 1: + num_elems_read_per_iteration_x = 8; + break; + case 2: + num_elems_read_per_iteration_x = 9; + break; + case 3: + num_elems_read_per_iteration_x = 16; + break; + default: + num_elems_read_per_iteration_x = 3 + (num_elems_written_per_iteration_x - 1) * _conv_stride_x; + break; + } } else if(input->info()->data_type() == DataType::F32 && gpu_target == GPUTarget::BIFROST) { @@ -178,18 +187,12 @@ void CLDepthwiseConvolutionLayer3x3Kernel::configure(const ICLTensor *input, con num_elems_read_per_iteration_y = num_elems_written_per_iteration_y + 2; } - // Calculate right and bottom border - int input_width = input->info()->dimension(0) + conv_pad_left + conv_pad_right; - int input_height = input->info()->dimension(1) + conv_pad_top + conv_pad_bottom; - - // Add padding only if necessary or it would always result in a window_changed - input_width = ceil_to_multiple(input_width, num_elems_read_per_iteration_x); - input_height = ceil_to_multiple(input_height, num_elems_read_per_iteration_y); - // Create window and update padding Window win = calculate_max_window(*output->info(), Steps(num_elems_written_per_iteration_x, num_elems_written_per_iteration_y)); - AccessWindowStatic input_access(input->info(), -conv_pad_left, -conv_pad_top, input_width, input_height); + AccessWindowRectangle input_access(input->info(), -_conv_pad_left, -_conv_pad_top, + num_elems_read_per_iteration_x, num_elems_read_per_iteration_y, + _conv_stride_x, _conv_stride_y); AccessWindowStatic weights_access(weights->info(), 0, 0, 3, 3); AccessWindowRectangle output_access(output->info(), 0, 0, num_elems_written_per_iteration_x, num_elems_written_per_iteration_y); diff --git a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp index 6f5c7a35f9..c01a6660a7 100644 --- a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp +++ b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp @@ -241,7 +241,9 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen bool window_changed = false; Window win = calculate_max_window(*output, Steps(num_elems_written_per_iteration_x, num_elems_written_per_iteration_y)); - AccessWindowRectangle input_access(input, -conv_pad_left, -conv_pad_top, num_elems_read_per_iteration_x, num_elems_read_per_iteration_y, conv_stride_x, conv_stride_y); + AccessWindowRectangle input_access(input, -conv_pad_left, -conv_pad_top, + num_elems_read_per_iteration_x, num_elems_read_per_iteration_y, + conv_stride_x, conv_stride_y); AccessWindowStatic weights_access(weights, 0, 0, kernel_size, kernel_size); AccessWindowRectangle output_access(output, 0, 0, num_elems_written_per_iteration_x, num_elems_written_per_iteration_y); diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp index dad4fee837..f5ee608b60 100644 --- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp +++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp @@ -238,6 +238,7 @@ void NEDepthwiseConvolutionLayer3x3Kernel::configure_generic() ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(_output->info()->tensor_shape(), output_shape); const unsigned int conv_stride_x = _conv_info.stride().first; + const unsigned int conv_stride_y = _conv_info.stride().second; const unsigned int conv_pad_top = _conv_info.pad_top(); const unsigned int conv_pad_right = _conv_info.pad_right(); const unsigned int conv_pad_bottom = _conv_info.pad_bottom(); @@ -264,15 +265,10 @@ void NEDepthwiseConvolutionLayer3x3Kernel::configure_generic() // Configure kernel window Window win = calculate_max_window(*_output->info(), Steps(_num_elems_written_per_iteration)); - const unsigned int num_x_steps = (output_shape.x() + _num_elems_written_per_iteration - 1) / _num_elems_written_per_iteration; - const int input_num_elems_processed = get_input_num_elems_processed(_num_elems_written_per_iteration, conv_stride_x); - - AccessWindowStatic input_access(_input->info(), - -conv_pad_left, - -conv_pad_top, - (num_x_steps - 1) * input_num_elems_processed + num_elems_read_per_iteration, - _input->info()->tensor_shape().y() + conv_pad_bottom); - AccessWindowStatic weights_access(_weights->info(), 0, 0, _weights->info()->dimension(0), _weights->info()->dimension(1)); + AccessWindowRectangle input_access(_input->info(), -conv_pad_left, -conv_pad_top, + num_elems_read_per_iteration, 3, + conv_stride_x, conv_stride_y); + AccessWindowStatic weights_access(_weights->info(), 0, 0, 3, 3); AccessWindowHorizontal output_access(_output->info(), 0, _num_elems_written_per_iteration); update_window_and_padding(win, input_access, weights_access, output_access); diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp index 4dc186a8a7..285ec2d0a0 100644 --- a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp @@ -1053,8 +1053,8 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen // Calculate right and bottom border unsigned int kernel_size = weights->dimension(0); const int conv_stride_x = std::get<0>(conv_info.stride()); + const int conv_stride_y = std::get<1>(conv_info.stride()); const int input_width = input->dimension(0); - const int input_height = input->dimension(1); switch(kernel_size) { @@ -1135,8 +1135,12 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen border_size.right = conv_pad_right; border_size.bottom = conv_pad_bottom; - Window win = calculate_max_window(*output, Steps(num_elems_written_per_iteration)); - AccessWindowStatic input_access(input, -conv_pad_left, -conv_pad_top, input_width + conv_pad_right, input_height + conv_pad_bottom); + // Configure window + Window win = calculate_max_window(*output, Steps(num_elems_written_per_iteration)); + + AccessWindowRectangle input_access(input, -conv_pad_left, -conv_pad_top, + num_elems_read_per_iteration, kernel_size, + conv_stride_x, conv_stride_y); AccessWindowStatic weights_access(weights, 0, 0, num_weight_elems_read_per_row, kernel_size); AccessWindowHorizontal output_access(output, 0, num_elems_written_per_iteration); bool window_changed = update_window_and_padding(win, input_access, weights_access, output_access); |