From cc9fed5c34e3ab128b2554ab7efaac329ffd49f8 Mon Sep 17 00:00:00 2001 From: Anthony Barbier Date: Wed, 13 Dec 2017 10:46:00 +0000 Subject: COMPMID-747 Fixed AccessWindow in CL Direct convolution Change-Id: I18636cd397c82f6d71751d2076fc100daf98a163 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/113051 Reviewed-by: Michalis Spyrou Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com Reviewed-by: Anthony Barbier --- .../CL/kernels/CLDirectConvolutionLayerKernel.cpp | 74 +++++++++++++++++++--- 1 file changed, 64 insertions(+), 10 deletions(-) (limited to 'src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp') diff --git a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp index b38d9fbb87..4b141f7ecd 100644 --- a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp +++ b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp @@ -136,13 +136,9 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen unsigned int num_elems_written_per_iteration_x = 0; unsigned int num_elems_written_per_iteration_y = 0; - Window win = Window(); - bool window_changed = false; - if((target == GPUTarget::BIFROST) && (kernel_size <= 5) && (conv_stride_x == 1) && (conv_stride_y == 1) && (data_type == DataType::F32)) { // Configure kernel window - win = calculate_max_window(*output); switch(kernel_size) { @@ -178,12 +174,69 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen } else { - bool is_stride2 = ((kernel_size != 1) && (conv_stride_x == 2)); - - num_elems_read_per_iteration_x = 8 + 2 * (kernel_size / 2) + (is_stride2 ? 6 + kernel_size / 2 : 0); num_elems_read_per_iteration_y = kernel_size; num_elems_written_per_iteration_x = 8; num_elems_written_per_iteration_y = 1; + switch(kernel_size) + { + case 1: + switch(conv_stride_x) + { + case 1: + num_elems_read_per_iteration_x = 8; + break; + case 2: + num_elems_read_per_iteration_x = 16; + break; + case 3: + switch(input->element_size()) + { + case 1: + num_elems_read_per_iteration_x = 28; + break; + case 2: + num_elems_read_per_iteration_x = 24; + break; + case 4: + num_elems_read_per_iteration_x = 22; + break; + default: + ARM_COMPUTE_ERROR("Invalid data size"); + } + break; + default: + ARM_COMPUTE_ERROR("Invalid convolution stride X"); + } + break; + case 3: + switch(conv_stride_x) + { + case 1: + num_elems_read_per_iteration_x = 10; + break; + case 2: + num_elems_read_per_iteration_x = 17; + break; + default: + ARM_COMPUTE_ERROR("Invalid convolution stride X"); + } + break; + case 5: + switch(conv_stride_x) + { + case 1: + num_elems_read_per_iteration_x = 12; + break; + case 2: + num_elems_read_per_iteration_x = 20; + break; + default: + ARM_COMPUTE_ERROR("Invalid convolution stride X"); + } + break; + default: + ARM_COMPUTE_ERROR("Invalid direct convolution size"); + } } // Calculate right and bottom border @@ -191,11 +244,12 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen int input_height = input->dimension(1) + conv_pad_top + conv_pad_bottom; // Add padding only if necessary or it would always result in a window_changed - input_width += input_width % num_elems_read_per_iteration_x; - input_height += ((input_height / conv_stride_y) * conv_stride_y) % num_elems_read_per_iteration_y; + input_width = ceil_to_multiple(input_width, num_elems_read_per_iteration_x); + input_height = ceil_to_multiple(input_height, num_elems_read_per_iteration_y); // Create window and update padding - win = calculate_max_window(*output, Steps(num_elems_written_per_iteration_x, num_elems_written_per_iteration_y)); + bool window_changed = false; + Window win = calculate_max_window(*output, Steps(num_elems_written_per_iteration_x, num_elems_written_per_iteration_y)); AccessWindowStatic input_access(input, -conv_pad_left, -conv_pad_top, input_width, input_height); AccessWindowStatic weights_access(weights, 0, 0, kernel_size, kernel_size); -- cgit v1.2.1