diff options
author | Anthony Barbier <anthony.barbier@arm.com> | 2017-12-13 10:46:00 +0000 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:42:33 +0000 |
commit | cc9fed5c34e3ab128b2554ab7efaac329ffd49f8 (patch) | |
tree | c20c799efb64a40480f34c2c7b5550a21d7ea81d /src/core/CL/kernels | |
parent | b660dcf2a7b21d02818e0a5f2c38ecd19d39b5c8 (diff) | |
download | ComputeLibrary-cc9fed5c34e3ab128b2554ab7efaac329ffd49f8.tar.gz |
COMPMID-747 Fixed AccessWindow in CL Direct convolution
Change-Id: I18636cd397c82f6d71751d2076fc100daf98a163
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/113051
Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/CL/kernels')
-rw-r--r-- | src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp | 74 |
1 files changed, 64 insertions, 10 deletions
diff --git a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp index b38d9fbb87..4b141f7ecd 100644 --- a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp +++ b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp @@ -136,13 +136,9 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen unsigned int num_elems_written_per_iteration_x = 0; unsigned int num_elems_written_per_iteration_y = 0; - Window win = Window(); - bool window_changed = false; - if((target == GPUTarget::BIFROST) && (kernel_size <= 5) && (conv_stride_x == 1) && (conv_stride_y == 1) && (data_type == DataType::F32)) { // Configure kernel window - win = calculate_max_window(*output); switch(kernel_size) { @@ -178,12 +174,69 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen } else { - bool is_stride2 = ((kernel_size != 1) && (conv_stride_x == 2)); - - num_elems_read_per_iteration_x = 8 + 2 * (kernel_size / 2) + (is_stride2 ? 6 + kernel_size / 2 : 0); num_elems_read_per_iteration_y = kernel_size; num_elems_written_per_iteration_x = 8; num_elems_written_per_iteration_y = 1; + switch(kernel_size) + { + case 1: + switch(conv_stride_x) + { + case 1: + num_elems_read_per_iteration_x = 8; + break; + case 2: + num_elems_read_per_iteration_x = 16; + break; + case 3: + switch(input->element_size()) + { + case 1: + num_elems_read_per_iteration_x = 28; + break; + case 2: + num_elems_read_per_iteration_x = 24; + break; + case 4: + num_elems_read_per_iteration_x = 22; + break; + default: + ARM_COMPUTE_ERROR("Invalid data size"); + } + break; + default: + ARM_COMPUTE_ERROR("Invalid convolution stride X"); + } + break; + case 3: + switch(conv_stride_x) + { + case 1: + num_elems_read_per_iteration_x = 10; + break; + case 2: + num_elems_read_per_iteration_x = 17; + break; + default: + ARM_COMPUTE_ERROR("Invalid convolution stride X"); + } + break; + case 5: + switch(conv_stride_x) + { + case 1: + num_elems_read_per_iteration_x = 12; + break; + case 2: + num_elems_read_per_iteration_x = 20; + break; + default: + ARM_COMPUTE_ERROR("Invalid convolution stride X"); + } + break; + default: + ARM_COMPUTE_ERROR("Invalid direct convolution size"); + } } // Calculate right and bottom border @@ -191,11 +244,12 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen int input_height = input->dimension(1) + conv_pad_top + conv_pad_bottom; // Add padding only if necessary or it would always result in a window_changed - input_width += input_width % num_elems_read_per_iteration_x; - input_height += ((input_height / conv_stride_y) * conv_stride_y) % num_elems_read_per_iteration_y; + input_width = ceil_to_multiple(input_width, num_elems_read_per_iteration_x); + input_height = ceil_to_multiple(input_height, num_elems_read_per_iteration_y); // Create window and update padding - win = calculate_max_window(*output, Steps(num_elems_written_per_iteration_x, num_elems_written_per_iteration_y)); + bool window_changed = false; + Window win = calculate_max_window(*output, Steps(num_elems_written_per_iteration_x, num_elems_written_per_iteration_y)); AccessWindowStatic input_access(input, -conv_pad_left, -conv_pad_top, input_width, input_height); AccessWindowStatic weights_access(weights, 0, 0, kernel_size, kernel_size); |