aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
diff options
context:
space:
mode:
authorAnthony Barbier <anthony.barbier@arm.com>2017-12-13 10:46:00 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:42:33 +0000
commitcc9fed5c34e3ab128b2554ab7efaac329ffd49f8 (patch)
treec20c799efb64a40480f34c2c7b5550a21d7ea81d /src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
parentb660dcf2a7b21d02818e0a5f2c38ecd19d39b5c8 (diff)
downloadComputeLibrary-cc9fed5c34e3ab128b2554ab7efaac329ffd49f8.tar.gz
COMPMID-747 Fixed AccessWindow in CL Direct convolution
Change-Id: I18636cd397c82f6d71751d2076fc100daf98a163 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/113051 Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com> Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp')
-rw-r--r--src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp74
1 files changed, 64 insertions, 10 deletions
diff --git a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
index b38d9fbb87..4b141f7ecd 100644
--- a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
@@ -136,13 +136,9 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
unsigned int num_elems_written_per_iteration_x = 0;
unsigned int num_elems_written_per_iteration_y = 0;
- Window win = Window();
- bool window_changed = false;
-
if((target == GPUTarget::BIFROST) && (kernel_size <= 5) && (conv_stride_x == 1) && (conv_stride_y == 1) && (data_type == DataType::F32))
{
// Configure kernel window
- win = calculate_max_window(*output);
switch(kernel_size)
{
@@ -178,12 +174,69 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
}
else
{
- bool is_stride2 = ((kernel_size != 1) && (conv_stride_x == 2));
-
- num_elems_read_per_iteration_x = 8 + 2 * (kernel_size / 2) + (is_stride2 ? 6 + kernel_size / 2 : 0);
num_elems_read_per_iteration_y = kernel_size;
num_elems_written_per_iteration_x = 8;
num_elems_written_per_iteration_y = 1;
+ switch(kernel_size)
+ {
+ case 1:
+ switch(conv_stride_x)
+ {
+ case 1:
+ num_elems_read_per_iteration_x = 8;
+ break;
+ case 2:
+ num_elems_read_per_iteration_x = 16;
+ break;
+ case 3:
+ switch(input->element_size())
+ {
+ case 1:
+ num_elems_read_per_iteration_x = 28;
+ break;
+ case 2:
+ num_elems_read_per_iteration_x = 24;
+ break;
+ case 4:
+ num_elems_read_per_iteration_x = 22;
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Invalid data size");
+ }
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Invalid convolution stride X");
+ }
+ break;
+ case 3:
+ switch(conv_stride_x)
+ {
+ case 1:
+ num_elems_read_per_iteration_x = 10;
+ break;
+ case 2:
+ num_elems_read_per_iteration_x = 17;
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Invalid convolution stride X");
+ }
+ break;
+ case 5:
+ switch(conv_stride_x)
+ {
+ case 1:
+ num_elems_read_per_iteration_x = 12;
+ break;
+ case 2:
+ num_elems_read_per_iteration_x = 20;
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Invalid convolution stride X");
+ }
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Invalid direct convolution size");
+ }
}
// Calculate right and bottom border
@@ -191,11 +244,12 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
int input_height = input->dimension(1) + conv_pad_top + conv_pad_bottom;
// Add padding only if necessary or it would always result in a window_changed
- input_width += input_width % num_elems_read_per_iteration_x;
- input_height += ((input_height / conv_stride_y) * conv_stride_y) % num_elems_read_per_iteration_y;
+ input_width = ceil_to_multiple(input_width, num_elems_read_per_iteration_x);
+ input_height = ceil_to_multiple(input_height, num_elems_read_per_iteration_y);
// Create window and update padding
- win = calculate_max_window(*output, Steps(num_elems_written_per_iteration_x, num_elems_written_per_iteration_y));
+ bool window_changed = false;
+ Window win = calculate_max_window(*output, Steps(num_elems_written_per_iteration_x, num_elems_written_per_iteration_y));
AccessWindowStatic input_access(input, -conv_pad_left, -conv_pad_top, input_width, input_height);
AccessWindowStatic weights_access(weights, 0, 0, kernel_size, kernel_size);