aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-02-21 14:47:09 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:47:40 +0000
commit1a03d76786a59a7d20deabb02f047516e98680d4 (patch)
treef679ef0f00662a72880e5e98be8097a7e9ef79e4
parentaad9f2c976ff9bb7022751f4ee8c659194d2b3a6 (diff)
downloadComputeLibrary-1a03d76786a59a7d20deabb02f047516e98680d4.tar.gz
COMPMID-765: Fix windows in DirectConvLayer and DepthwiseConvLayer
Change-Id: I6c68733c8a2ada12aa3994e3e5213d20222df861 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/121637 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp33
-rw-r--r--src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp14
-rw-r--r--src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp10
-rw-r--r--tests/validation/CL/DepthwiseConvolutionLayer.cpp2
5 files changed, 34 insertions, 29 deletions
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp
index c24420a7e3..29564b36c9 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp
@@ -121,11 +121,6 @@ void CLDepthwiseConvolutionLayer3x3Kernel::configure(const ICLTensor *input, con
const GPUTarget gpu_target = get_arch_from_target(get_target());
// Configure kernel window
- const unsigned int conv_pad_left = conv_info.pad_left();
- const unsigned int conv_pad_top = conv_info.pad_top();
- const unsigned int conv_pad_right = conv_info.pad_right();
- const unsigned int conv_pad_bottom = conv_info.pad_bottom();
-
unsigned int num_elems_read_per_iteration_x = 0;
unsigned int num_elems_read_per_iteration_y = 0;
unsigned int num_elems_written_per_iteration_x = 0;
@@ -139,8 +134,22 @@ void CLDepthwiseConvolutionLayer3x3Kernel::configure(const ICLTensor *input, con
kernel_name = "depthwise_convolution_3x3_f16";
num_elems_written_per_iteration_x = 8 / data_size_from_type(input->info()->data_type());
num_elems_written_per_iteration_y = 1;
- num_elems_read_per_iteration_x = 3 + (num_elems_written_per_iteration_x - 1) * _conv_stride_x;
num_elems_read_per_iteration_y = 3;
+ switch(_conv_stride_x)
+ {
+ case 1:
+ num_elems_read_per_iteration_x = 8;
+ break;
+ case 2:
+ num_elems_read_per_iteration_x = 9;
+ break;
+ case 3:
+ num_elems_read_per_iteration_x = 16;
+ break;
+ default:
+ num_elems_read_per_iteration_x = 3 + (num_elems_written_per_iteration_x - 1) * _conv_stride_x;
+ break;
+ }
}
else if(input->info()->data_type() == DataType::F32 && gpu_target == GPUTarget::BIFROST)
{
@@ -178,18 +187,12 @@ void CLDepthwiseConvolutionLayer3x3Kernel::configure(const ICLTensor *input, con
num_elems_read_per_iteration_y = num_elems_written_per_iteration_y + 2;
}
- // Calculate right and bottom border
- int input_width = input->info()->dimension(0) + conv_pad_left + conv_pad_right;
- int input_height = input->info()->dimension(1) + conv_pad_top + conv_pad_bottom;
-
- // Add padding only if necessary or it would always result in a window_changed
- input_width = ceil_to_multiple(input_width, num_elems_read_per_iteration_x);
- input_height = ceil_to_multiple(input_height, num_elems_read_per_iteration_y);
-
// Create window and update padding
Window win = calculate_max_window(*output->info(), Steps(num_elems_written_per_iteration_x, num_elems_written_per_iteration_y));
- AccessWindowStatic input_access(input->info(), -conv_pad_left, -conv_pad_top, input_width, input_height);
+ AccessWindowRectangle input_access(input->info(), -_conv_pad_left, -_conv_pad_top,
+ num_elems_read_per_iteration_x, num_elems_read_per_iteration_y,
+ _conv_stride_x, _conv_stride_y);
AccessWindowStatic weights_access(weights->info(), 0, 0, 3, 3);
AccessWindowRectangle output_access(output->info(), 0, 0, num_elems_written_per_iteration_x, num_elems_written_per_iteration_y);
diff --git a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
index 6f5c7a35f9..c01a6660a7 100644
--- a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
@@ -241,7 +241,9 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
bool window_changed = false;
Window win = calculate_max_window(*output, Steps(num_elems_written_per_iteration_x, num_elems_written_per_iteration_y));
- AccessWindowRectangle input_access(input, -conv_pad_left, -conv_pad_top, num_elems_read_per_iteration_x, num_elems_read_per_iteration_y, conv_stride_x, conv_stride_y);
+ AccessWindowRectangle input_access(input, -conv_pad_left, -conv_pad_top,
+ num_elems_read_per_iteration_x, num_elems_read_per_iteration_y,
+ conv_stride_x, conv_stride_y);
AccessWindowStatic weights_access(weights, 0, 0, kernel_size, kernel_size);
AccessWindowRectangle output_access(output, 0, 0, num_elems_written_per_iteration_x, num_elems_written_per_iteration_y);
diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp
index dad4fee837..f5ee608b60 100644
--- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp
+++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp
@@ -238,6 +238,7 @@ void NEDepthwiseConvolutionLayer3x3Kernel::configure_generic()
ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(_output->info()->tensor_shape(), output_shape);
const unsigned int conv_stride_x = _conv_info.stride().first;
+ const unsigned int conv_stride_y = _conv_info.stride().second;
const unsigned int conv_pad_top = _conv_info.pad_top();
const unsigned int conv_pad_right = _conv_info.pad_right();
const unsigned int conv_pad_bottom = _conv_info.pad_bottom();
@@ -264,15 +265,10 @@ void NEDepthwiseConvolutionLayer3x3Kernel::configure_generic()
// Configure kernel window
Window win = calculate_max_window(*_output->info(), Steps(_num_elems_written_per_iteration));
- const unsigned int num_x_steps = (output_shape.x() + _num_elems_written_per_iteration - 1) / _num_elems_written_per_iteration;
- const int input_num_elems_processed = get_input_num_elems_processed(_num_elems_written_per_iteration, conv_stride_x);
-
- AccessWindowStatic input_access(_input->info(),
- -conv_pad_left,
- -conv_pad_top,
- (num_x_steps - 1) * input_num_elems_processed + num_elems_read_per_iteration,
- _input->info()->tensor_shape().y() + conv_pad_bottom);
- AccessWindowStatic weights_access(_weights->info(), 0, 0, _weights->info()->dimension(0), _weights->info()->dimension(1));
+ AccessWindowRectangle input_access(_input->info(), -conv_pad_left, -conv_pad_top,
+ num_elems_read_per_iteration, 3,
+ conv_stride_x, conv_stride_y);
+ AccessWindowStatic weights_access(_weights->info(), 0, 0, 3, 3);
AccessWindowHorizontal output_access(_output->info(), 0, _num_elems_written_per_iteration);
update_window_and_padding(win, input_access, weights_access, output_access);
diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
index 4dc186a8a7..285ec2d0a0 100644
--- a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
@@ -1053,8 +1053,8 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
// Calculate right and bottom border
unsigned int kernel_size = weights->dimension(0);
const int conv_stride_x = std::get<0>(conv_info.stride());
+ const int conv_stride_y = std::get<1>(conv_info.stride());
const int input_width = input->dimension(0);
- const int input_height = input->dimension(1);
switch(kernel_size)
{
@@ -1135,8 +1135,12 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
border_size.right = conv_pad_right;
border_size.bottom = conv_pad_bottom;
- Window win = calculate_max_window(*output, Steps(num_elems_written_per_iteration));
- AccessWindowStatic input_access(input, -conv_pad_left, -conv_pad_top, input_width + conv_pad_right, input_height + conv_pad_bottom);
+ // Configure window
+ Window win = calculate_max_window(*output, Steps(num_elems_written_per_iteration));
+
+ AccessWindowRectangle input_access(input, -conv_pad_left, -conv_pad_top,
+ num_elems_read_per_iteration, kernel_size,
+ conv_stride_x, conv_stride_y);
AccessWindowStatic weights_access(weights, 0, 0, num_weight_elems_read_per_row, kernel_size);
AccessWindowHorizontal output_access(output, 0, num_elems_written_per_iteration);
bool window_changed = update_window_and_padding(win, input_access, weights_access, output_access);
diff --git a/tests/validation/CL/DepthwiseConvolutionLayer.cpp b/tests/validation/CL/DepthwiseConvolutionLayer.cpp
index 20bf6cd46f..8ac882cc60 100644
--- a/tests/validation/CL/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/CL/DepthwiseConvolutionLayer.cpp
@@ -120,7 +120,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<uin
{
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
framework::dataset::make("DataType", DataType::QASYMM8)),
framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })))
{