From 7da29b6b12ff319ed2b6e2c46588dfa1991556fb Mon Sep 17 00:00:00 2001 From: Alex Gilday Date: Fri, 23 Mar 2018 14:16:00 +0000 Subject: COMPMID-1017: Implement dilated convolution in NEON, OpenCL, and GC Change-Id: If4626ec9e215e14dffe22e80812da5bac84a52e2 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/125734 Reviewed-by: Anthony Barbier Tested-by: Jenkins --- src/core/CL/cl_kernels/im2col.cl | 7 +- src/core/CL/kernels/CLIm2ColKernel.cpp | 136 +++++++++++---------- .../GLES_COMPUTE/cs_shaders/convolution_layer.cs | 5 +- src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp | 12 +- src/core/NEON/kernels/NEIm2ColKernel.cpp | 45 ++++--- src/core/Utils.cpp | 11 +- src/runtime/CL/functions/CLConvolutionLayer.cpp | 18 +-- .../CL/functions/CLGEMMConvolutionLayer.cpp | 16 +-- .../GLES_COMPUTE/functions/GCConvolutionLayer.cpp | 7 +- src/runtime/NEON/functions/NEConvolutionLayer.cpp | 19 +-- .../NEON/functions/NEGEMMConvolutionLayer.cpp | 17 +-- 11 files changed, 161 insertions(+), 132 deletions(-) (limited to 'src') diff --git a/src/core/CL/cl_kernels/im2col.cl b/src/core/CL/cl_kernels/im2col.cl index 75d99bda85..1e85e1b303 100644 --- a/src/core/CL/cl_kernels/im2col.cl +++ b/src/core/CL/cl_kernels/im2col.cl @@ -680,6 +680,7 @@ __kernel void im2col_generic_padx0_pady0_dchw( * @note The pad_left, pad_right, pad_top and pad_bottom must be passed at compile time using -DPAD_LEFT, -DPAD_RIGHT, -DPAD_TOP and -DPAD_BOTTOM: e.g. -DPAD_LEFT=1, -DPAD_RIGHT=2, -DPAD_TOP=3 and -DPAD_BOTTOM=2 * @note The zero value to store in case we load values out-of-bounds must be passed at compile time using -DPAD_VALUE: e.g. -DPAD_VALUE=0.0 * @note The stride along the X and Y directions must be passed at compile time using -DSTRIDE_X and -DSTRIDE_Y: e.g. -DSTRIDE_X=1 and -DSTRIDE_Y=1 + * @note The dilation_x and dilation_y must be passed at compile time using -DDILATION_X and -DDILATION_Y: e.g. -DDILATION_X=1, -DDILATION_Y=1 * @note In case biases will be added to the convolution -DHAS_BIAS has to be passed to append the final matrix with 1 in each row. * * @param[in] src_ptr Pointer to the source tensor. Supported data types: QS8/QASYMM8/QS16/F16/F32 @@ -722,10 +723,12 @@ __kernel void im2col_generic_dchw( __global DATA_TYPE *output_ptr = ((__global DATA_TYPE *)(dst_ptr + dst_offset_first_element_in_bytes + yo * dst_stride_y + batch * dst_stride_w)) + xo; // Linearize convolution elements - for(int y = yi, y_e = yi + KERNEL_HEIGHT; y < y_e; ++y) + for(int yk = 0; yk < KERNEL_HEIGHT; ++yk) { - for(int x = xi, x_e = xi + KERNEL_WIDTH; x < x_e; ++x, ++output_ptr) + int y = yi + yk * DILATION_Y; + for(int xk = 0; xk < KERNEL_WIDTH; ++xk, ++output_ptr) { + int x = xi + xk * DILATION_X; #if PAD_LEFT == 0 && PAD_TOP == 0 && PAD_RIGHT == 0 && PAD_BOTTOM == 0 *output_ptr = *((__global DATA_TYPE *)(input_ptr + x * src_stride_x + y * src_stride_y)); #else // PAD_LEFT == 0 && PAD_TOP == 0 && PAD_RIGHT == 0 && PAD_BOTTOM == 0 diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp index 9bc4787384..cc19d3c263 100644 --- a/src/core/CL/kernels/CLIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLIm2ColKernel.cpp @@ -41,11 +41,12 @@ using namespace arm_compute; namespace { -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, bool has_bias) +Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, bool has_bias, const Size2D &dilation) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::QASYMM8 && has_bias); ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); + ARM_COMPUTE_RETURN_ERROR_ON((dilation.x() < 1) || (dilation.y() < 1)); // Checks performed when output is configured if(output->total_size() != 0) @@ -63,12 +64,12 @@ CLIm2ColKernel::CLIm2ColKernel() { } -void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias) +void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); // Perform validation step - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), has_bias)); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), has_bias, dilation)); _input = input; _output = output; @@ -107,7 +108,7 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const _convolved_dims = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), kernel_dims.width, kernel_dims.height, - conv_info); + conv_info, dilation); build_opts.add_option("-DKERNEL_WIDTH=" + support::cpp11::to_string(kernel_dims.width)); build_opts.add_option("-DKERNEL_HEIGHT=" + support::cpp11::to_string(kernel_dims.height)); @@ -122,77 +123,82 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const build_opts.add_option("-DPAD_BOTTOM=" + support::cpp11::to_string(conv_info.pad_bottom())); build_opts.add_option("-DSRC_WIDTH=" + support::cpp11::to_string(input->info()->dimension(0))); build_opts.add_option("-DSRC_HEIGHT=" + support::cpp11::to_string(input->info()->dimension(1))); + build_opts.add_option("-DDILATION_X=" + support::cpp11::to_string(dilation.x())); + build_opts.add_option("-DDILATION_Y=" + support::cpp11::to_string(dilation.y())); build_opts.add_option_if_else(is_data_type_quantized(data_type), "-DPAD_VALUE=" + support::cpp11::to_string(input->info()->quantization_info().offset), "-DPAD_VALUE=0"); const bool squared_im2col = kernel_dims.width == kernel_dims.height; - if(squared_im2col && !is_data_type_fixed_point(data_type)) + if(dilation == Size2D(1U, 1U)) { - // Check if we can run an optimized im2col - switch(kernel_dims.width) + if(squared_im2col && !is_data_type_fixed_point(data_type)) { - case 1: - // Optimized im2col1x1 if stride_x = 1 and conv_info.has_padding() = false - if(conv_info.stride().first == 1 && !conv_info.has_padding()) - { - // Set hint for LWS + // Check if we can run an optimized im2col + switch(kernel_dims.width) + { + case 1: + // Optimized im2col1x1 if stride_x = 1 and conv_info.has_padding() = false + if(conv_info.stride().first == 1 && !conv_info.has_padding()) + { + // Set hint for LWS + _lws_hint = cl::NDRange(1, 1, 8); + _num_elems_processed_per_iteration = 4; + is_optimized_path = true; + kernel_name = "im2col1x1_stridex1_dchw"; + } + break; + case 3: _lws_hint = cl::NDRange(1, 1, 8); - _num_elems_processed_per_iteration = 4; + _num_elems_processed_per_iteration = 1; is_optimized_path = true; - kernel_name = "im2col1x1_stridex1_dchw"; - } - break; - case 3: - _lws_hint = cl::NDRange(1, 1, 8); - _num_elems_processed_per_iteration = 1; - is_optimized_path = true; - kernel_name = "im2col3x3_dchw"; - break; - case 5: - _num_elems_processed_per_iteration = 1; - is_optimized_path = true; - kernel_name = "im2col5x5_dchw"; - break; - case 11: - // Optimized im2col11x11 if pad_x = pad_y = 0 - if(!conv_info.has_padding()) - { + kernel_name = "im2col3x3_dchw"; + break; + case 5: _num_elems_processed_per_iteration = 1; is_optimized_path = true; - kernel_name = "im2col11x11_padx0_pady0_dchw"; - } - break; - default: - is_optimized_path = false; - break; - } - } - else if(kernel_dims.width > 1 && !conv_info.has_padding()) - { - _num_elems_processed_per_iteration = 1; - kernel_name = "im2col_generic_padx0_pady0_dchw"; - - // Optimized im2col is performed using one or more vector operations with the specified vector size - // and a remainder. For example, for 5x5 convolutions, im2col is performed using vectors of size 4 - // and scalars; for 7x7 convolutions, using vectors of size 4 and vectors of size 3. - // Using the vector size of 4 is always safe since OpenCL supports vectors of size 2 and 3. - // Using the vector size of 8, however, may be faster. - size_t vector_size = 4; - // For 2x2 convolutions, use vectors of size 2. (For 3x3 convolutions, im2col_kernel3x3_padx0_pady0 - // is used instead.) - if(kernel_dims.width < vector_size) - { - vector_size = kernel_dims.width; + kernel_name = "im2col5x5_dchw"; + break; + case 11: + // Optimized im2col11x11 if pad_x = pad_y = 0 + if(!conv_info.has_padding()) + { + _num_elems_processed_per_iteration = 1; + is_optimized_path = true; + kernel_name = "im2col11x11_padx0_pady0_dchw"; + } + break; + default: + is_optimized_path = false; + break; + } } - // Local work size and vector size optimized for the 11x11 AlexNet convolution on Bifrost. - if(gpu_target_is_in(gpu_target, GPUTarget::G71, GPUTarget::G72) && kernel_dims.width == 11) + else if(kernel_dims.width > 1 && !conv_info.has_padding()) { - _lws_hint = cl::NDRange(1, 1, 1); - vector_size = 8; + _num_elems_processed_per_iteration = 1; + kernel_name = "im2col_generic_padx0_pady0_dchw"; + + // Optimized im2col is performed using one or more vector operations with the specified vector size + // and a remainder. For example, for 5x5 convolutions, im2col is performed using vectors of size 4 + // and scalars; for 7x7 convolutions, using vectors of size 4 and vectors of size 3. + // Using the vector size of 4 is always safe since OpenCL supports vectors of size 2 and 3. + // Using the vector size of 8, however, may be faster. + size_t vector_size = 4; + // For 2x2 convolutions, use vectors of size 2. (For 3x3 convolutions, im2col_kernel3x3_padx0_pady0 + // is used instead.) + if(kernel_dims.width < vector_size) + { + vector_size = kernel_dims.width; + } + // Local work size and vector size optimized for the 11x11 AlexNet convolution on Bifrost. + if(gpu_target_is_in(gpu_target, GPUTarget::G71, GPUTarget::G72) && kernel_dims.width == 11) + { + _lws_hint = cl::NDRange(1, 1, 1); + vector_size = 8; + } + const size_t width_mod_vector_size = kernel_dims.width % vector_size; + build_opts.add_option("-DVECTOR_SIZE=" + support::cpp11::to_string(vector_size)); + build_opts.add_option("-DWIDTH_MOD_VECTOR_SIZE=" + support::cpp11::to_string(width_mod_vector_size)); } - const size_t width_mod_vector_size = kernel_dims.width % vector_size; - build_opts.add_option("-DVECTOR_SIZE=" + support::cpp11::to_string(vector_size)); - build_opts.add_option("-DWIDTH_MOD_VECTOR_SIZE=" + support::cpp11::to_string(width_mod_vector_size)); } _run_func = &CLIm2ColKernel::run_generic; } @@ -206,7 +212,7 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const // Create kernel _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); - // Configure kernel window + // Configure kernel window Window win; if(is_optimized_path) { @@ -250,12 +256,12 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const _config_id += support::cpp11::to_string(output->info()->dimension(1)); } -Status CLIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias) +Status CLIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation) { ARM_COMPUTE_UNUSED(kernel_dims); ARM_COMPUTE_UNUSED(conv_info); ARM_COMPUTE_UNUSED(has_bias); - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, has_bias)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, has_bias, dilation)); return Status{}; } diff --git a/src/core/GLES_COMPUTE/cs_shaders/convolution_layer.cs b/src/core/GLES_COMPUTE/cs_shaders/convolution_layer.cs index 2701f5b262..ad3f14d442 100644 --- a/src/core/GLES_COMPUTE/cs_shaders/convolution_layer.cs +++ b/src/core/GLES_COMPUTE/cs_shaders/convolution_layer.cs @@ -164,6 +164,7 @@ void main() * @note STRIDE_X/STRIDE_Y must be passed for stride info, e.g. "#define STRIDE_X xxx" * @note CONVOLVED_WIDTH/CONVOLVED_HEIGHT must be passed for convolved dimension, e.g. "#define CONVOLVED_WIDTH xxx" * @note SRC_WIDTH/SRC_HEIGHT must be passed for input dimension, e.g. "#define SRC_WIDTH xxx" + * @note DILATION_X/DILATION_Y must be passed for dilation sizes, e.g. "#define DILATION_X xxx" * @note In case biases will be added to the convolution "#define HAS_BIAS" has to be passed to append the final matrix with 1 in each row. * * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32 @@ -210,9 +211,9 @@ void main(void) uint src_pos = 0u; // Linearize convolution elements - for(uint y = yi, y_e = yi + uint(KERNEL_HEIGHT); y < y_e; ++y) + for(uint y = yi, y_e = yi + uint(KERNEL_HEIGHT) * uint(DILATION_Y); y < y_e; y += uint(DILATION_Y)) { - for(uint x = xi, x_e = xi + uint(KERNEL_WIDTH); x < x_e; ++x, TENSOR_OFFSET_ADVANCE(dst_iter, 1u)) + for(uint x = xi, x_e = xi + uint(KERNEL_WIDTH) * uint(DILATION_X); x < x_e; x += uint(DILATION_X), TENSOR_OFFSET_ADVANCE(dst_iter, 1u)) { #if PAD_LEFT == 0 && PAD_TOP == 0 && PAD_RIGHT == 0 && PAD_BOTTOM == 0 src_pos = TENSOR_OFFSET_ADVANCE_IN_BYTES(src_iter, x * src_attrs.stride_x + y * src_attrs.stride_y); diff --git a/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp index 47bfebcc09..eb790471fb 100644 --- a/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp @@ -65,7 +65,7 @@ GCIm2ColKernel::GCIm2ColKernel() { } -void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias) +void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); @@ -98,7 +98,8 @@ void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, const && (std::equal(input->info()->tensor_shape().cbegin() + 3, input->info()->tensor_shape().cend(), output->info()->tensor_shape().cbegin() + 1)) - && ((stride_x == 1) && (stride_y == 1) && !conv_info.has_padding()); + && ((stride_x == 1) && (stride_y == 1) && !conv_info.has_padding()) + && (dilation == Size2D(1U, 1U)); std::string kernel_name = "im2col_generic"; if(!run_img2col_reduced) @@ -111,7 +112,7 @@ void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, const build_opts.emplace("#define IM2COL_GENERIC"); _convolved_dims = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), kernel_dims.width, kernel_dims.height, - conv_info); + conv_info, dilation); _num_elems_processed_per_iteration = 2; build_opts.emplace("#define KERNEL_WIDTH " + support::cpp11::to_string(kernel_dims.width)); @@ -127,6 +128,8 @@ void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, const build_opts.emplace("#define PAD_BOTTOM " + support::cpp11::to_string(conv_info.pad_bottom())); build_opts.emplace("#define SRC_WIDTH " + support::cpp11::to_string(input->info()->dimension(0))); build_opts.emplace("#define SRC_HEIGHT " + support::cpp11::to_string(input->info()->dimension(1))); + build_opts.emplace("#define DILATION_X " + support::cpp11::to_string(dilation.x())); + build_opts.emplace("#define DILATION_Y " + support::cpp11::to_string(dilation.y())); _run_func = &GCIm2ColKernel::run_generic; } @@ -206,11 +209,12 @@ void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, const IGCKernel::configure(win); } -Status GCIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias) +Status GCIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation) { ARM_COMPUTE_UNUSED(kernel_dims); ARM_COMPUTE_UNUSED(conv_info); ARM_COMPUTE_UNUSED(has_bias); + ARM_COMPUTE_UNUSED(dilation); ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output)); return Status{}; } diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp index dee1608c43..348722c55d 100644 --- a/src/core/NEON/kernels/NEIm2ColKernel.cpp +++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp @@ -45,12 +45,13 @@ using namespace arm_compute; namespace { Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, - bool has_bias, bool is_fully_connected, bool is_flatten) + bool has_bias, bool is_fully_connected, bool is_flatten, const Size2D &dilation) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, output); ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::QASYMM8 && has_bias); + ARM_COMPUTE_RETURN_ERROR_ON((dilation.x() < 1) || (dilation.y() < 1)); if(is_flatten) /* Called by FlattenLayer */ { @@ -59,7 +60,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c } else if(!is_fully_connected) /* Called by ConvolutionLayer */ { - std::pair out_dims = scaled_dimensions(input->dimension(0), input->dimension(1), kernel_dims.width, kernel_dims.height, conv_info); + std::pair out_dims = scaled_dimensions(input->dimension(0), input->dimension(1), kernel_dims.width, kernel_dims.height, conv_info, dilation); ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(0) != (input->dimension(2) * kernel_dims.area() + (has_bias ? 1 : 0))); ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(1) != (out_dims.first * out_dims.second)); ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(2) != 1); @@ -91,11 +92,13 @@ inline void linearize_volume(const uint8_t *const in_ptr, int input_stride_y, int input_stride_z, int fixed_point_position, - int pad_value) + int pad_value, + int dilation_x, + int dilation_y) { const int kernel_size2 = kernel_width * kernel_height; - const int x_e = top_left_x + kernel_width; - const int y_e = top_left_y + kernel_height; + const int x_e = top_left_x + kernel_width * dilation_x; + const int y_e = top_left_y + kernel_height * dilation_y; // Linearize volume int d = 0; @@ -104,12 +107,12 @@ inline void linearize_volume(const uint8_t *const in_ptr, // 2) to have an optimized im2col for the first convolution layer where usually we have 3 IFMs for(; d <= (kernel_depth - 3); d += 3) { - for(int y = top_left_y; y < y_e; ++y) + for(int y = top_left_y; y < y_e; y += dilation_y) { if((y < 0 || y >= input_h) && has_pads) { // All the values will be the offset (will be zeros when not quantized) - for(int x = top_left_x; x < x_e; ++x, ++out_ptr) + for(int x = top_left_x; x < x_e; x += dilation_x, ++out_ptr) { *(out_ptr + 0 * kernel_size2) = pad_value; *(out_ptr + 1 * kernel_size2) = pad_value; @@ -118,7 +121,7 @@ inline void linearize_volume(const uint8_t *const in_ptr, } else { - for(int x = top_left_x; x < x_e; ++x, ++out_ptr) + for(int x = top_left_x; x < x_e; x += dilation_x, ++out_ptr) { if((x < 0 || x >= input_w) && has_pads) { @@ -141,7 +144,7 @@ inline void linearize_volume(const uint8_t *const in_ptr, // Left over for(; d < kernel_depth; d++) { - for(int y = top_left_y; y < y_e; ++y) + for(int y = top_left_y; y < y_e; y += dilation_y) { if((y < 0 || y >= input_h) && has_pads) { @@ -151,7 +154,7 @@ inline void linearize_volume(const uint8_t *const in_ptr, } else { - for(int x = top_left_x; x < x_e; ++x, ++out_ptr) + for(int x = top_left_x; x < x_e; x += dilation_x, ++out_ptr) { if((x < 0 || x >= input_w) && has_pads) { @@ -251,7 +254,9 @@ void NEIm2ColKernel::run_generic(const Window &window) input_stride_y, input_stride_z, _input->info()->fixed_point_position(), - offset); + offset, + _dilation.x(), + _dilation.y()); }, in, out); } @@ -309,27 +314,28 @@ void NEIm2ColKernel::run_reduced(const Window &window) } NEIm2ColKernel::NEIm2ColKernel() - : _func(), _input(nullptr), _output(nullptr), _convolved_dims(), _conv_info(), _kernel_width(0), _kernel_height(0), _has_bias(false) + : _func(), _input(nullptr), _output(nullptr), _convolved_dims(), _conv_info(), _kernel_width(0), _kernel_height(0), _has_bias(false), _dilation(1U, 1U) { } void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, - bool has_bias, bool is_fully_connected, bool is_flatten) + bool has_bias, bool is_fully_connected, bool is_flatten, const Size2D &dilation) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); // Perform validation step ARM_COMPUTE_UNUSED(is_fully_connected, is_flatten); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), kernel_dims, conv_info, has_bias, is_fully_connected, is_flatten)); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), kernel_dims, conv_info, has_bias, is_fully_connected, is_flatten, dilation)); _input = input; _output = output; _conv_info = conv_info; _kernel_width = kernel_dims.width; - _kernel_height = kernel_dims.height, + _kernel_height = kernel_dims.height; + _dilation = dilation; _convolved_dims = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), _kernel_width, _kernel_height, - _conv_info); + _conv_info, _dilation); _has_bias = has_bias; unsigned int stride_x = 0; @@ -340,7 +346,8 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size && (std::equal(input->info()->tensor_shape().cbegin() + 3, input->info()->tensor_shape().cend(), output->info()->tensor_shape().cbegin() + 1)) - && ((stride_x == 1) && (stride_y == 1) && !conv_info.has_padding()); + && ((stride_x == 1) && (stride_y == 1) && !conv_info.has_padding()) + && ((dilation.x() == 1) && (dilation.y() == 1)); Window window = calculate_max_window(*input->info(), Steps()); @@ -407,9 +414,9 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size } Status NEIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, - bool has_bias, bool is_fully_connected, bool is_flatten) + bool has_bias, bool is_fully_connected, bool is_flatten, const Size2D &dilation) { - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, kernel_dims, conv_info, has_bias, is_fully_connected, is_flatten)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, kernel_dims, conv_info, has_bias, is_fully_connected, is_flatten, dilation)); return Status{}; } diff --git a/src/core/Utils.cpp b/src/core/Utils.cpp index f4b45532cf..4a237f9daa 100644 --- a/src/core/Utils.cpp +++ b/src/core/Utils.cpp @@ -292,7 +292,8 @@ const std::pair arm_compute::deconvolution_output_di const std::pair arm_compute::scaled_dimensions(unsigned int width, unsigned int height, unsigned int kernel_width, unsigned int kernel_height, - const PadStrideInfo &pad_stride_info) + const PadStrideInfo &pad_stride_info, + const Size2D &dilation) { const unsigned int pad_left = pad_stride_info.pad_left(); const unsigned int pad_top = pad_stride_info.pad_top(); @@ -305,12 +306,12 @@ const std::pair arm_compute::scaled_dimensions(unsig switch(pad_stride_info.round()) { case DimensionRoundingType::FLOOR: - w = static_cast(std::floor((static_cast(width + pad_left + pad_right - kernel_width) / stride_x) + 1)); - h = static_cast(std::floor((static_cast(height + pad_top + pad_bottom - kernel_height) / stride_y) + 1)); + w = static_cast(std::floor((static_cast(width + pad_left + pad_right - (dilation.x() * (kernel_width - 1) + 1)) / stride_x) + 1)); + h = static_cast(std::floor((static_cast(height + pad_top + pad_bottom - (dilation.y() * (kernel_height - 1) + 1)) / stride_y) + 1)); break; case DimensionRoundingType::CEIL: - w = static_cast(std::ceil((static_cast(width + pad_left + pad_right - kernel_width) / stride_x) + 1)); - h = static_cast(std::ceil((static_cast(height + pad_top + pad_bottom - kernel_height) / stride_y) + 1)); + w = static_cast(std::ceil((static_cast(width + pad_left + pad_right - (dilation.x() * (kernel_width - 1) + 1)) / stride_x) + 1)); + h = static_cast(std::ceil((static_cast(height + pad_top + pad_bottom - (dilation.y() * (kernel_height - 1) + 1)) / stride_y) + 1)); break; default: ARM_COMPUTE_ERROR("Unsupported rounding type"); diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp index 1a486ce5c7..64bda93ff0 100644 --- a/src/runtime/CL/functions/CLConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp @@ -42,13 +42,14 @@ CLConvolutionLayer::CLConvolutionLayer(std::shared_ptr memory_ma { } -void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info) +void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, + const Size2D &dilation) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); - ARM_COMPUTE_ERROR_THROW_ON(CLConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info)); + ARM_COMPUTE_ERROR_THROW_ON(CLConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation)); switch(CLConvolutionLayer::get_convolution_method(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, - weights_info, CLScheduler::get().target())) + weights_info, CLScheduler::get().target(), dilation)) { case ConvolutionMethod::DIRECT: { @@ -60,7 +61,7 @@ void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, c case ConvolutionMethod::GEMM: { auto f = arm_compute::support::cpp14::make_unique(_memory_manager); - f->configure(input, weights, biases, output, conv_info, weights_info); + f->configure(input, weights, biases, output, conv_info, weights_info, dilation); _function = std::move(f); break; } @@ -71,14 +72,14 @@ void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, c } Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info) + const WeightsInfo &weights_info, const Size2D &dilation) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); //Configure if the parameters match the direct convolution or the gemm-based const GPUTarget gpu_target = CLScheduler::get().target(); - switch(CLConvolutionLayer::get_convolution_method(input, weights, biases, output, conv_info, weights_info, gpu_target)) + switch(CLConvolutionLayer::get_convolution_method(input, weights, biases, output, conv_info, weights_info, gpu_target, dilation)) { case ConvolutionMethod::DIRECT: { @@ -89,7 +90,7 @@ Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo case ConvolutionMethod::GEMM: { // Validate gemm-based convolution layer - CLGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info); + CLGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info, dilation); break; } default: @@ -101,7 +102,7 @@ Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo } ConvolutionMethod CLConvolutionLayer::get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info, const GPUTarget gpu_target) + const WeightsInfo &weights_info, const GPUTarget gpu_target, const Size2D &dilation) { ARM_COMPUTE_UNUSED(input); ARM_COMPUTE_UNUSED(weights); @@ -110,6 +111,7 @@ ConvolutionMethod CLConvolutionLayer::get_convolution_method(const ITensorInfo * ARM_COMPUTE_UNUSED(conv_info); ARM_COMPUTE_UNUSED(weights_info); ARM_COMPUTE_UNUSED(gpu_target); + ARM_COMPUTE_UNUSED(dilation); return ConvolutionMethod::GEMM; } diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp index bc339f176f..e7ad62f5ff 100644 --- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp @@ -151,7 +151,8 @@ Status CLGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITens return Status{}; } -void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info) +void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, + const Size2D &dilation) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); @@ -160,7 +161,8 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * biases != nullptr ? biases->info() : nullptr, output->info(), conv_info, - weights_info)); + weights_info, + dilation)); _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type()); @@ -187,7 +189,7 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * const unsigned int kernel_width = weights->info()->dimension(0); const unsigned int kernel_height = weights->info()->dimension(1); std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), kernel_width, kernel_height, - conv_info); + conv_info, dilation); unsigned int mat_weights_cols = weights->info()->dimension(3); unsigned int mat_weights_rows = weights->info()->dimension(0) * weights->info()->dimension(1) * weights->info()->dimension(2) + bias_element; @@ -224,7 +226,7 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * _memory_group.manage(&_gemm_output); // Configure im2col - _im2col_kernel.configure(input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, append_bias); + _im2col_kernel.configure(input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, append_bias, dilation); // Configure GEMM configure_mm(&_im2col_output, weights, &_gemm_output); @@ -260,7 +262,7 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * } Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info) + const WeightsInfo &weights_info, const Size2D &dilation) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights_info.are_reshaped(), "Weights already reshaped are not supported!"); @@ -282,7 +284,7 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI const unsigned int kernel_width = weights->dimension(0); const unsigned int kernel_height = weights->dimension(1); - std::tie(conv_w, conv_h) = scaled_dimensions(input->dimension(0), input->dimension(1), kernel_width, kernel_height, conv_info); + std::tie(conv_w, conv_h) = scaled_dimensions(input->dimension(0), input->dimension(1), kernel_width, kernel_height, conv_info, dilation); unsigned int mat_weights_cols = weights->dimension(3); unsigned int mat_weights_rows = weights->dimension(0) * weights->dimension(1) * weights->dimension(2) + bias_element; @@ -298,7 +300,7 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI shape_im2col.set(2, 1); TensorInfo im2col_reshaped_info(shape_im2col, 1, dt, input->fixed_point_position()); im2col_reshaped_info.set_quantization_info(input->quantization_info()); - CLIm2ColKernel::validate(input, &im2col_reshaped_info, Size2D(kernel_width, kernel_height), conv_info, append_bias); + CLIm2ColKernel::validate(input, &im2col_reshaped_info, Size2D(kernel_width, kernel_height), conv_info, append_bias, dilation); // Create GEMM output tensor TensorShape shape_gemm = im2col_reshaped_info.tensor_shape(); diff --git a/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp index f4c073668a..c2b7e02284 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp @@ -102,7 +102,8 @@ void GCConvolutionLayer::configure_mm(const IGCTensor *input, const IGCTensor *w _mm_kernel.configure(input, weights, output, 1.f, is_interleaved_transposed); } -void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info) +void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, + const Size2D &dilation) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); @@ -136,7 +137,7 @@ void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weig const unsigned int kernel_width = (_are_weights_reshaped) ? weights_info.kernel_size().first : weights->info()->dimension(0); const unsigned int kernel_height = (_are_weights_reshaped) ? weights_info.kernel_size().second : weights->info()->dimension(1); std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), kernel_width, kernel_height, - conv_info); + conv_info, dilation); // Check if its a "fully connected" convolution _is_fully_connected_convolution = ((conv_w == 1) && (conv_h == 1)); @@ -229,7 +230,7 @@ void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weig input->info()->extend_padding(border_size); _fill_border.configure(input, border_size, BorderMode::CONSTANT, PixelValue(0)); // for PAD of im2col fp16: consider it as border } - _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _append_bias); + _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _append_bias, dilation); // Configure matrix multiply if(run_interleaved) diff --git a/src/runtime/NEON/functions/NEConvolutionLayer.cpp b/src/runtime/NEON/functions/NEConvolutionLayer.cpp index d4421e8429..e659495b7c 100644 --- a/src/runtime/NEON/functions/NEConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEConvolutionLayer.cpp @@ -40,14 +40,15 @@ NEConvolutionLayer::NEConvolutionLayer(std::shared_ptr memory_ma { } -void NEConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info) +void NEConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, + const Size2D &dilation) { // Perform validate step ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); - ARM_COMPUTE_ERROR_THROW_ON(NEConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info)); + ARM_COMPUTE_ERROR_THROW_ON(NEConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation)); switch(NEConvolutionLayer::get_convolution_method(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, - weights_info)) + weights_info, dilation)) { case ConvolutionMethod::WINOGRAD: { @@ -59,7 +60,7 @@ void NEConvolutionLayer::configure(ITensor *input, const ITensor *weights, const case ConvolutionMethod::GEMM: { auto f = arm_compute::support::cpp14::make_unique(_memory_manager); - f->configure(input, weights, biases, output, conv_info, weights_info); + f->configure(input, weights, biases, output, conv_info, weights_info, dilation); _function = std::move(f); break; } @@ -77,9 +78,9 @@ void NEConvolutionLayer::configure(ITensor *input, const ITensor *weights, const } Status NEConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info) + const WeightsInfo &weights_info, const Size2D &dilation) { - switch(NEConvolutionLayer::get_convolution_method(input, weights, biases, output, conv_info, weights_info)) + switch(NEConvolutionLayer::get_convolution_method(input, weights, biases, output, conv_info, weights_info, dilation)) { case ConvolutionMethod::WINOGRAD: //Validate Winograd @@ -87,7 +88,7 @@ Status NEConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo break; case ConvolutionMethod::GEMM: //Validate Gemm-based Convolution - NEGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info); + NEGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info, dilation); break; case ConvolutionMethod::DIRECT: //Validate Gemm-based Convolution @@ -101,12 +102,12 @@ Status NEConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo } ConvolutionMethod NEConvolutionLayer::get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info) + const WeightsInfo &weights_info, const Size2D &dilation) { ARM_COMPUTE_UNUSED(output); ARM_COMPUTE_UNUSED(weights_info); if((input->data_type() == DataType::F32) && (weights->dimension(0) == 3) && (weights->dimension(1) == 3) && (weights->num_dimensions() <= 4) && (conv_info.stride().first == 1) - && (conv_info.stride().second == 1) && (biases != nullptr)) + && (conv_info.stride().second == 1) && (biases != nullptr) && (dilation == Size2D(1U, 1U))) { return ConvolutionMethod::WINOGRAD; } diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp index 3b8b4243e5..d9707d95e0 100644 --- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp @@ -170,7 +170,7 @@ Status validate_and_initialize_values(const ITensorInfo *input, const ITensorInf bool &are_weights_reshaped, unsigned int &kernel_width, unsigned int &kernel_height, bool &is_fully_connected_convolution, bool &is_interleaved, bool &is_quantized, unsigned int &mat_weights_cols, unsigned int &mat_weights_rows, - unsigned int &conv_w, unsigned int &conv_h) + unsigned int &conv_w, unsigned int &conv_h, const Size2D &dilation) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); @@ -205,7 +205,7 @@ Status validate_and_initialize_values(const ITensorInfo *input, const ITensorInf mat_weights_rows = weights->dimension(0) * weights->dimension(1) * weights->dimension(2) + (append_bias ? 1 : 0); std::tie(conv_w, conv_h) = scaled_dimensions(input->dimension(0), input->dimension(1), kernel_width, kernel_height, - conv_info); + conv_info, dilation); // Check if its a "fully connected" convolution is_fully_connected_convolution = ((conv_w == 1) && (conv_h == 1)); @@ -246,7 +246,8 @@ void NEGEMMConvolutionLayer::configure_mm(const ITensor *input, const ITensor *w } } -void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info) +void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, + const Size2D &dilation) { // Perform validate step ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); @@ -262,7 +263,7 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig Status status = validate_and_initialize_values(input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(), conv_info, weights_info, dt, _append_bias, _are_weights_reshaped, kernel_width, kernel_height, _is_fully_connected_convolution, _is_interleaved, _is_quantized, - mat_weights_cols, mat_weights_rows, conv_w, conv_h); + mat_weights_cols, mat_weights_rows, conv_w, conv_h, dilation); ARM_COMPUTE_ERROR_THROW_ON(status); @@ -362,7 +363,7 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig // Configure kernels // Configure im2col - _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _append_bias); + _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _append_bias, false, false, dilation); // Configure matrix multiply if(run_optimised) @@ -420,7 +421,7 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig } Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info) + const WeightsInfo &weights_info, const Size2D &dilation) { ARM_COMPUTE_UNUSED(output); @@ -439,7 +440,7 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI Status status = validate_and_initialize_values(input, weights, biases, conv_info, weights_info, dt, append_bias, are_weights_reshaped, kernel_width, kernel_height, is_fully_connected_convolution, is_interleaved, is_quantized, mat_weights_cols, mat_weights_rows, - conv_w, conv_h); + conv_w, conv_h, dilation); const Size2D kernel_weights = Size2D(kernel_width, kernel_height); @@ -517,7 +518,7 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI shape_im2col.set(1, mat_input_rows); shape_im2col.set(2, 1); TensorInfo im2_col_info = input->clone()->set_tensor_shape(shape_im2col); - ARM_COMPUTE_RETURN_ON_ERROR(NEIm2ColKernel::validate(input, &im2_col_info, kernel_weights, conv_info, append_bias, false)); + ARM_COMPUTE_RETURN_ON_ERROR(NEIm2ColKernel::validate(input, &im2_col_info, kernel_weights, conv_info, append_bias, false, false, dilation)); // Create GEMM output tensor TensorShape shape_gemm(im2_col_info.tensor_shape()); -- cgit v1.2.1