From f650ea5be66be5c7b16faf5482e793e3a6d2430c Mon Sep 17 00:00:00 2001 From: SiCong Li Date: Wed, 5 Aug 2020 15:04:00 +0100 Subject: COMPMID-3339: Patch2: Remove paddings from im2col*_nhwc cl kernel * Remove channel paddings from all nhwc kernels (im2col_3x3_nhwc, im2col_9x9_nhwc, im2col_generic_nhwc) * Validate that input total spatial dimensions (with x and y paddings) are bigger than or equal to the kernel spatial dimension. - Otherwise it would result in invalid memory reads. - This problem likely existed before, but was made obvious with the removal of implicit paddings * Add zero padding validation tests * Fix Im2ColValidationFixture by not permuting the input shape in case of NHWC Change-Id: I1f895e8938af0e9130cb516106f0b4b665531709 Signed-off-by: SiCong Li Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3696 Tested-by: Arm Jenkins Reviewed-by: Gian Marco Iodice Comments-Addressed: Arm Jenkins --- src/core/CL/kernels/CLIm2ColKernel.cpp | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'src/core/CL/kernels/CLIm2ColKernel.cpp') diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp index de34ed384f..c94e313b9a 100644 --- a/src/core/CL/kernels/CLIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLIm2ColKernel.cpp @@ -70,6 +70,13 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::NHWC && num_groups > 1); ARM_COMPUTE_RETURN_ERROR_ON((input->dimension(channel_idx) % num_groups) != 0); + // Since there's no implicit padding added, check the total input spatial dimensions (with conv paddings) are big enough for the kernel dimensions + const unsigned int width_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH); + const unsigned int height_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT); + const unsigned total_width = input->dimension(width_idx) + conv_info.pad_left() + conv_info.pad_right(); + const unsigned total_height = input->dimension(height_idx) + conv_info.pad_top() + conv_info.pad_bottom(); + ARM_COMPUTE_RETURN_ERROR_ON((total_width < kernel_dims.width) || (total_height < kernel_dims.height)); + if(output->total_size() > 0) { const TensorInfo tensor_info_output = output->clone()->set_tensor_shape(compute_im2col_conv_shape(input, kernel_dims, conv_info, has_bias, dilation, num_groups == 1, num_groups)); @@ -106,12 +113,12 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); const int xin_start = 0; - const int xin_end = input->dimension(0) < num_elems_processed_per_iteration ? ceil_to_multiple(input->dimension(0), num_elems_processed_per_iteration) : input->dimension(0); + const int xin_end = input->dimension(0); const int yin_start = 0; const int yin_end = input->dimension(1); const int xout_start = 0; - const int xout_end = input->dimension(0) < num_elems_processed_per_iteration ? output->dimension(0) + (num_elems_processed_per_iteration - input->dimension(0)) : output->dimension(0); + const int xout_end = output->dimension(0); const int yout_start = 0; const int yout_end = output->dimension(1); @@ -140,7 +147,6 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen win = calculate_max_window(*input, Steps()); } } - output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape())); // set the Z dimension's step same size as the whole dimension so that one can't split across the Z dimension win.set_dimension_step(Window::DimZ, win[Window::DimZ].end() - win[Window::DimZ].start()); @@ -192,7 +198,7 @@ Im2ColConfiguration configure_opencl_kernel(const ITensorInfo *input, const Size if(data_layout == DataLayout::NHWC) { - num_elems_processed_per_iteration = 2; + num_elems_processed_per_iteration = std::min(2U, input_channel); is_padding_required_nchw = false; // Only the 3x3 and 9x9 cases are optimized for NHWC @@ -205,8 +211,14 @@ Im2ColConfiguration configure_opencl_kernel(const ITensorInfo *input, const Size kernel_name = "im2col9x9_"; } - build_opts.add_option("-DVECTOR_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); - build_opts.add_option("-DLAST_ACCESSED=" + support::cpp11::to_string(std::max(static_cast(input_channel - num_elems_processed_per_iteration), 0))); + // Get boundary vector (the first/last vector with potentially a partial vector size) size + // If input_channel is a multiple of num_elems_processed_per_iteration, the boundary vec size is the (full) vector size + // otherwise, the boundary vec size is the (partial) remainder vector size + const unsigned int vec_size = num_elems_processed_per_iteration; + const unsigned int partial_vec_size = input_channel % vec_size; + const unsigned int boundary_vec_size = vec_size - ((vec_size - partial_vec_size) % vec_size); + build_opts.add_option("-DVECTOR_SIZE=" + support::cpp11::to_string(vec_size)); + build_opts.add_option("-DBOUNDARY_VECTOR_SIZE=" + support::cpp11::to_string(boundary_vec_size)); } else { -- cgit v1.2.1