aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/kernels/CLIm2ColKernel.cpp
diff options
context:
space:
mode:
authorSiCong Li <sicong.li@arm.com>2020-08-05 15:04:00 +0100
committerSiCong Li <sicong.li@arm.com>2020-08-11 13:04:57 +0000
commitf650ea5be66be5c7b16faf5482e793e3a6d2430c (patch)
treea4140b49225fef07d418a13ac981af02ca94c42f /src/core/CL/kernels/CLIm2ColKernel.cpp
parent77d3d2445fa590dee505c4171daef9dd6c8124ca (diff)
downloadComputeLibrary-f650ea5be66be5c7b16faf5482e793e3a6d2430c.tar.gz
COMPMID-3339: Patch2: Remove paddings from im2col*_nhwc cl kernel
* Remove channel paddings from all nhwc kernels (im2col_3x3_nhwc, im2col_9x9_nhwc, im2col_generic_nhwc) * Validate that input total spatial dimensions (with x and y paddings) are bigger than or equal to the kernel spatial dimension. - Otherwise it would result in invalid memory reads. - This problem likely existed before, but was made obvious with the removal of implicit paddings * Add zero padding validation tests * Fix Im2ColValidationFixture by not permuting the input shape in case of NHWC Change-Id: I1f895e8938af0e9130cb516106f0b4b665531709 Signed-off-by: SiCong Li <sicong.li@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3696 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/kernels/CLIm2ColKernel.cpp')
-rw-r--r--src/core/CL/kernels/CLIm2ColKernel.cpp24
1 files changed, 18 insertions, 6 deletions
diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp
index de34ed384f..c94e313b9a 100644
--- a/src/core/CL/kernels/CLIm2ColKernel.cpp
+++ b/src/core/CL/kernels/CLIm2ColKernel.cpp
@@ -70,6 +70,13 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::NHWC && num_groups > 1);
ARM_COMPUTE_RETURN_ERROR_ON((input->dimension(channel_idx) % num_groups) != 0);
+ // Since there's no implicit padding added, check the total input spatial dimensions (with conv paddings) are big enough for the kernel dimensions
+ const unsigned int width_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
+ const unsigned int height_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
+ const unsigned total_width = input->dimension(width_idx) + conv_info.pad_left() + conv_info.pad_right();
+ const unsigned total_height = input->dimension(height_idx) + conv_info.pad_top() + conv_info.pad_bottom();
+ ARM_COMPUTE_RETURN_ERROR_ON((total_width < kernel_dims.width) || (total_height < kernel_dims.height));
+
if(output->total_size() > 0)
{
const TensorInfo tensor_info_output = output->clone()->set_tensor_shape(compute_im2col_conv_shape(input, kernel_dims, conv_info, has_bias, dilation, num_groups == 1, num_groups));
@@ -106,12 +113,12 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
const int xin_start = 0;
- const int xin_end = input->dimension(0) < num_elems_processed_per_iteration ? ceil_to_multiple(input->dimension(0), num_elems_processed_per_iteration) : input->dimension(0);
+ const int xin_end = input->dimension(0);
const int yin_start = 0;
const int yin_end = input->dimension(1);
const int xout_start = 0;
- const int xout_end = input->dimension(0) < num_elems_processed_per_iteration ? output->dimension(0) + (num_elems_processed_per_iteration - input->dimension(0)) : output->dimension(0);
+ const int xout_end = output->dimension(0);
const int yout_start = 0;
const int yout_end = output->dimension(1);
@@ -140,7 +147,6 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
win = calculate_max_window(*input, Steps());
}
}
-
output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape()));
// set the Z dimension's step same size as the whole dimension so that one can't split across the Z dimension
win.set_dimension_step(Window::DimZ, win[Window::DimZ].end() - win[Window::DimZ].start());
@@ -192,7 +198,7 @@ Im2ColConfiguration configure_opencl_kernel(const ITensorInfo *input, const Size
if(data_layout == DataLayout::NHWC)
{
- num_elems_processed_per_iteration = 2;
+ num_elems_processed_per_iteration = std::min(2U, input_channel);
is_padding_required_nchw = false;
// Only the 3x3 and 9x9 cases are optimized for NHWC
@@ -205,8 +211,14 @@ Im2ColConfiguration configure_opencl_kernel(const ITensorInfo *input, const Size
kernel_name = "im2col9x9_";
}
- build_opts.add_option("-DVECTOR_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
- build_opts.add_option("-DLAST_ACCESSED=" + support::cpp11::to_string(std::max(static_cast<int>(input_channel - num_elems_processed_per_iteration), 0)));
+ // Get boundary vector (the first/last vector with potentially a partial vector size) size
+ // If input_channel is a multiple of num_elems_processed_per_iteration, the boundary vec size is the (full) vector size
+ // otherwise, the boundary vec size is the (partial) remainder vector size
+ const unsigned int vec_size = num_elems_processed_per_iteration;
+ const unsigned int partial_vec_size = input_channel % vec_size;
+ const unsigned int boundary_vec_size = vec_size - ((vec_size - partial_vec_size) % vec_size);
+ build_opts.add_option("-DVECTOR_SIZE=" + support::cpp11::to_string(vec_size));
+ build_opts.add_option("-DBOUNDARY_VECTOR_SIZE=" + support::cpp11::to_string(boundary_vec_size));
}
else
{