From b9070a42a44ec1a0102e2f0b04523d2e96392903 Mon Sep 17 00:00:00 2001 From: Matthew Jackson Date: Thu, 22 Aug 2019 16:13:27 +0100 Subject: COMPMID-2605: Add asymmetric padding support for Deconvolution layer Change-Id: I63b773bdce25f1342ccd3a08ded623a1508f70fe Signed-off-by: Matthew Jackson Reviewed-on: https://review.mlplatform.org/c/1797 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio Reviewed-by: Giuseppe Rossini --- arm_compute/core/Utils.h | 16 ++--- .../kernels/CLDeconvolutionLayerUpsampleKernel.cpp | 9 ++- .../kernels/CLDeconvolutionReshapeOutputKernel.cpp | 17 ++--- src/core/CPP/kernels/CPPUpsampleKernel.cpp | 8 +-- src/core/Utils.cpp | 21 ++++--- src/graph/nodes/DeconvolutionLayerNode.cpp | 5 +- .../CL/functions/CLDirectDeconvolutionLayer.cpp | 44 ++++++++----- .../CL/functions/CLGEMMDeconvolutionLayer.cpp | 4 +- .../NEON/functions/NEDeconvolutionLayer.cpp | 72 +++++++++++++++------- tests/validation/CL/DeconvolutionLayer.cpp | 22 +++++-- tests/validation/NEON/DeconvolutionLayer.cpp | 35 ++++++++--- .../fixtures/DeconvolutionLayerFixture.h | 24 +++++++- tests/validation/reference/DeconvolutionLayer.cpp | 50 ++++++++++----- 13 files changed, 218 insertions(+), 109 deletions(-) diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h index 6ffab4b9ea..0ce2ee0161 100644 --- a/arm_compute/core/Utils.h +++ b/arm_compute/core/Utils.h @@ -853,21 +853,17 @@ PadStrideInfo calculate_same_pad(TensorShape input_shape, TensorShape weights_sh /** Returns expected width and height of the deconvolution's output tensor. * - * @param[in] in_width Width of input tensor (Number of columns) - * @param[in] in_height Height of input tensor (Number of rows) - * @param[in] kernel_width Kernel width. - * @param[in] kernel_height Kernel height. - * @param[in] padx X axis padding. - * @param[in] pady Y axis padding. - * @param[in] stride_x X axis input stride. - * @param[in] stride_y Y axis input stride. + * @param[in] in_width Width of input tensor (Number of columns) + * @param[in] in_height Height of input tensor (Number of rows) + * @param[in] kernel_width Kernel width. + * @param[in] kernel_height Kernel height. + * @param[in] pad_stride_info Pad and stride information. * * @return A pair with the new width in the first position and the new height in the second. */ std::pair deconvolution_output_dimensions(unsigned int in_width, unsigned int in_height, unsigned int kernel_width, unsigned int kernel_height, - unsigned int padx, unsigned int pady, - unsigned int stride_x, unsigned int stride_y); + const PadStrideInfo &pad_stride_info); /** Returns expected width and height of output scaled tensor depending on dimensions rounding mode. * diff --git a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp index 50f654680c..4ae9cabd1f 100644 --- a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp +++ b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp @@ -56,7 +56,6 @@ Status CLDeconvolutionLayerUpsampleKernel::validate(const ITensorInfo *input, co ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(idx_w) == 0); ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(idx_h) == 0); - ARM_COMPUTE_RETURN_ERROR_ON(!info.padding_is_symmetric()); ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(idx_c) != output->dimension(idx_c)); for(size_t i = 3; i < Coordinates::num_max_dimensions; ++i) @@ -104,12 +103,12 @@ void CLDeconvolutionLayerUpsampleKernel::run(const Window &window, cl::CommandQu const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - const int out_start_x = _info.pad().first; - const int out_end_x = _output->info()->dimension(idx_w) - _info.pad().first + _info.stride().first - 1; + const int out_start_x = _info.pad_left(); + const int out_end_x = _output->info()->dimension(idx_w) - _info.pad_right() + _info.stride().first - 1; const int out_step_x = _info.stride().first; - const int out_start_y = _info.pad().second; - const int out_end_y = _output->info()->dimension(idx_h) - _info.pad().second + _info.stride().second - 1; + const int out_start_y = _info.pad_top(); + const int out_end_y = _output->info()->dimension(idx_h) - _info.pad_bottom() + _info.stride().second - 1; const int out_step_y = _info.stride().second; switch(data_layout) diff --git a/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp b/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp index 71218f5b52..69e5eff213 100644 --- a/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp +++ b/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp @@ -40,8 +40,6 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output, input_info, weights_info); const DataLayout data_layout = input_info->data_layout(); - const unsigned int stride_x = deconv_info.stride().first; - const unsigned int stride_y = deconv_info.stride().second; const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); @@ -77,8 +75,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con if(output->total_size() != 0) { - auto out_dims = deconvolution_output_dimensions(input_info->dimension(idx_w), input_info->dimension(idx_h), weights_info->dimension(idx_w), weights_info->dimension(idx_h), - 0, 0, stride_x, stride_y); + const PadStrideInfo stride_info(deconv_info.stride().first, deconv_info.stride().second); + auto out_dims = deconvolution_output_dimensions(input_info->dimension(idx_w), input_info->dimension(idx_h), weights_info->dimension(idx_w), weights_info->dimension(idx_h), stride_info); const TensorShape output_shape = misc::shape_calculator::compute_deconvolution_output_shape(out_dims, *input_info, *weights_info); @@ -92,14 +90,11 @@ std::pair validate_and_configure_window(const ITensorInfo *input ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); const DataLayout data_layout = input_info->data_layout(); + const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const PadStrideInfo stride_info(deconv_info.stride().first, deconv_info.stride().second); - const unsigned int stride_x = deconv_info.stride().first; - const unsigned int stride_y = deconv_info.stride().second; - const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - - auto out_dims = deconvolution_output_dimensions(input_info->dimension(idx_w), input_info->dimension(idx_h), weights_info->dimension(idx_w), weights_info->dimension(idx_h), - 0, 0, stride_x, stride_y); + auto out_dims = deconvolution_output_dimensions(input_info->dimension(idx_w), input_info->dimension(idx_h), weights_info->dimension(idx_w), weights_info->dimension(idx_h), stride_info); const TensorShape output_shape = misc::shape_calculator::compute_deconvolution_output_shape(out_dims, *input_info, *weights_info); diff --git a/src/core/CPP/kernels/CPPUpsampleKernel.cpp b/src/core/CPP/kernels/CPPUpsampleKernel.cpp index ad2d54a0f8..e63808f80e 100644 --- a/src/core/CPP/kernels/CPPUpsampleKernel.cpp +++ b/src/core/CPP/kernels/CPPUpsampleKernel.cpp @@ -76,10 +76,10 @@ void CPPUpsampleKernel::run(const Window &window, const ThreadInfo &info) const int height_scaled = _output->info()->dimension(1); const int stride_x = _info.stride().first; const int stride_y = _info.stride().second; - const int start_x = _info.pad().first; - const int start_y = _info.pad().second; - const int end_y = height_scaled - _info.pad().second; - const int end_x = width_scaled - _info.pad().first; + const int start_x = _info.pad_left(); + const int start_y = _info.pad_top(); + const int end_x = width_scaled - _info.pad_right(); + const int end_y = height_scaled - _info.pad_bottom(); const size_t element_size = _input->info()->element_size(); //The fill value is normally 0, but for QASYMM8 the '0' corresponds to the offset diff --git a/src/core/Utils.cpp b/src/core/Utils.cpp index 122373f5c6..d11788acd3 100644 --- a/src/core/Utils.cpp +++ b/src/core/Utils.cpp @@ -374,15 +374,22 @@ PadStrideInfo arm_compute::calculate_same_pad(TensorShape input_shape, TensorSha return same_info; } -std::pair arm_compute::deconvolution_output_dimensions( - unsigned int in_width, unsigned int in_height, unsigned int kernel_width, unsigned int kernel_height, unsigned int padx, unsigned int pady, - unsigned int stride_x, unsigned int stride_y) +std::pair arm_compute::deconvolution_output_dimensions(unsigned int in_width, unsigned int in_height, + unsigned int kernel_width, unsigned int kernel_height, + const PadStrideInfo &pad_stride_info) { + const unsigned int pad_left = pad_stride_info.pad_left(); + const unsigned int pad_top = pad_stride_info.pad_top(); + const unsigned int pad_right = pad_stride_info.pad_right(); + const unsigned int pad_bottom = pad_stride_info.pad_bottom(); + const unsigned int stride_x = pad_stride_info.stride().first; + const unsigned int stride_y = pad_stride_info.stride().second; + ARM_COMPUTE_ERROR_ON(in_width < 1 || in_height < 1); - ARM_COMPUTE_ERROR_ON(((in_width - 1) * stride_x + kernel_width) < 2 * padx); - ARM_COMPUTE_ERROR_ON(((in_height - 1) * stride_y + kernel_height) < 2 * pady); - const int w = stride_x * (in_width - 1) + kernel_width - 2 * padx; - const int h = stride_y * (in_height - 1) + kernel_height - 2 * pady; + ARM_COMPUTE_ERROR_ON(((in_width - 1) * stride_x + kernel_width) < (pad_left + pad_right)); + ARM_COMPUTE_ERROR_ON(((in_height - 1) * stride_y + kernel_height) < (pad_top + pad_bottom)); + const int w = stride_x * (in_width - 1) + kernel_width - (pad_left + pad_right); + const int h = stride_y * (in_height - 1) + kernel_height - (pad_top + pad_bottom); return std::make_pair(w, h); } diff --git a/src/graph/nodes/DeconvolutionLayerNode.cpp b/src/graph/nodes/DeconvolutionLayerNode.cpp index 28c75297a5..d4a5b769e7 100644 --- a/src/graph/nodes/DeconvolutionLayerNode.cpp +++ b/src/graph/nodes/DeconvolutionLayerNode.cpp @@ -56,10 +56,7 @@ TensorDescriptor DeconvolutionLayerNode::compute_output_descriptor(const TensorD const unsigned int kernel_width = get_dimension_size(weights_descriptor, DataLayoutDimension::WIDTH); const unsigned int kernel_height = get_dimension_size(weights_descriptor, DataLayoutDimension::HEIGHT); - std::tie(output_width, output_height) = deconvolution_output_dimensions(input_width, input_height, - kernel_width, kernel_height, - info.pad().first, info.pad().second, - info.stride().first, info.stride().second); + std::tie(output_width, output_height) = deconvolution_output_dimensions(input_width, input_height, kernel_width, kernel_height, info); const DataLayout data_layout = input_descriptor.layout; TensorDescriptor output_descriptor = input_descriptor; diff --git a/src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp index c1a39ef26a..b8089d8229 100644 --- a/src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp @@ -63,13 +63,8 @@ Status CLDirectDeconvolutionLayer::validate(const ITensorInfo *input, const ITen ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) != weights->dimension(idx_h)); ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) < 1); - ARM_COMPUTE_RETURN_ERROR_ON(!info.padding_is_symmetric()); - const unsigned int stride_x = info.stride().first; - const unsigned int stride_y = info.stride().second; - - auto out_dims = deconvolution_output_dimensions(input->dimension(idx_w), input->dimension(idx_h), weights->dimension(idx_w), weights->dimension(idx_h), - info.pad().first, info.pad().second, stride_x, stride_y); + auto out_dims = deconvolution_output_dimensions(input->dimension(idx_w), input->dimension(idx_h), weights->dimension(idx_w), weights->dimension(idx_h), info); const TensorShape output_shape = compute_deconvolution_output_shape(out_dims, *input, *weights); @@ -92,9 +87,11 @@ Status CLDirectDeconvolutionLayer::validate(const ITensorInfo *input, const ITen ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_h) != output_shape[idx_h], "Output's height is invalid."); ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_c) != output_shape[idx_c], "Output's depth is invalid."); - unsigned int padx = 0; - unsigned int pady = 0; - const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*input, *weights, stride_x, stride_y, out_dims, padx, pady); + unsigned int deconv_pad_x = 0; + unsigned int deconv_pad_y = 0; + const unsigned int stride_x = info.stride().first; + const unsigned int stride_y = info.stride().second; + const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*input, *weights, stride_x, stride_y, out_dims, deconv_pad_x, deconv_pad_y); TensorInfo scale_out_info(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(scale_out_shape).set_data_layout(data_layout)); const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); @@ -109,6 +106,10 @@ void CLDirectDeconvolutionLayer::configure(ICLTensor *input, ICLTensor *weights, { ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); + const unsigned int pad_left = info.pad_left(); + const unsigned int pad_right = info.pad_right(); + const unsigned int pad_top = info.pad_top(); + const unsigned int pad_bottom = info.pad_bottom(); const unsigned int stride_x = info.stride().first; const unsigned int stride_y = info.stride().second; @@ -122,8 +123,7 @@ void CLDirectDeconvolutionLayer::configure(ICLTensor *input, ICLTensor *weights, _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout)); _flip_weights.configure(weights, &_weights_flipped, &_flip_axis); - auto out_dims = deconvolution_output_dimensions(input->info()->dimension(idx_w), input->info()->dimension(idx_h), weights->info()->dimension(idx_w), weights->info()->dimension(idx_h), - info.pad().first, info.pad().second, stride_x, stride_y); + auto out_dims = deconvolution_output_dimensions(input->info()->dimension(idx_w), input->info()->dimension(idx_h), weights->info()->dimension(idx_w), weights->info()->dimension(idx_h), info); const TensorShape output_shape = compute_deconvolution_output_shape(out_dims, *input->info(), *weights->info()); @@ -138,16 +138,30 @@ void CLDirectDeconvolutionLayer::configure(ICLTensor *input, ICLTensor *weights, _memory_group.manage(&_scaled_output); // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order to match output shape - unsigned int padx = 0; - unsigned int pady = 0; - const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*input->info(), *weights->info(), stride_x, stride_y, out_dims, padx, pady); + unsigned int deconv_pad_x = 0; + unsigned int deconv_pad_y = 0; + const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*input->info(), *weights->info(), stride_x, stride_y, out_dims, deconv_pad_x, deconv_pad_y); + + unsigned int deconv_pad_left = pad_right > pad_left ? pad_right - pad_left : 0; + unsigned int deconv_pad_right = pad_left > pad_right ? pad_left - pad_right : 0; + deconv_pad_x -= deconv_pad_left + deconv_pad_right; + ARM_COMPUTE_ERROR_ON((deconv_pad_x % 2) != 0); + deconv_pad_left += deconv_pad_x / 2; + deconv_pad_right += deconv_pad_x / 2; + + unsigned int deconv_pad_top = pad_bottom > pad_top ? pad_bottom - pad_top : 0; + unsigned int deconv_pad_bottom = pad_top > pad_bottom ? pad_top - pad_bottom : 0; + deconv_pad_y -= deconv_pad_top + deconv_pad_bottom; + ARM_COMPUTE_ERROR_ON((deconv_pad_y % 2) != 0); + deconv_pad_top += deconv_pad_y / 2; + deconv_pad_bottom += deconv_pad_y / 2; TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), input->info()->quantization_info()); scale_out_info.set_data_layout(data_layout); _scaled_output.allocator()->init(scale_out_info); // configure scale function - const PadStrideInfo upsample_info(stride_x, stride_y, padx / 2, pady / 2); + const PadStrideInfo upsample_info(stride_x, stride_y, deconv_pad_left, deconv_pad_right, deconv_pad_top, deconv_pad_bottom, DimensionRoundingType::FLOOR); _scale_f.configure(input, &_scaled_output, upsample_info); // Setup the function to convolve the upscaled output diff --git a/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp index 36a120e4ef..78e1ae7f05 100644 --- a/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp @@ -151,8 +151,8 @@ Status CLGEMMDeconvolutionLayer::validate(const ITensorInfo *input, const ITenso ARM_COMPUTE_RETURN_ON_ERROR(CLGEMM::validate(&input->clone()->set_tensor_shape(nhwc_input_shape).set_is_resizable(true), &reshaped_t_info, nullptr, &gemm_output_info, 1.0f, 0.0f, gemm_info)); } - auto out_dims = deconvolution_output_dimensions(input->dimension(idx_w), input->dimension(idx_h), weights->dimension(idx_w), weights->dimension(idx_h), - 0, 0, deconv_info.stride().first, deconv_info.stride().second); + const PadStrideInfo stride_info(deconv_info.stride().first, deconv_info.stride().second); + auto out_dims = deconvolution_output_dimensions(input->dimension(idx_w), input->dimension(idx_h), weights->dimension(idx_w), weights->dimension(idx_h), stride_info); const TensorShape deconv_shape = misc::shape_calculator::compute_deconvolution_output_shape(out_dims, *input, *weights); TensorInfo col2im_output_info = gemm_output_info.clone()->set_tensor_shape(deconv_shape).set_is_resizable(true); diff --git a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp index 1f2cc3d73b..bbb91b4651 100644 --- a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp @@ -64,13 +64,8 @@ Status NEDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInf const unsigned int height_idx = get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::HEIGHT); ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(width_idx) != weights->dimension(height_idx)); ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(width_idx) < 1); - ARM_COMPUTE_RETURN_ERROR_ON(!info.padding_is_symmetric()); - const unsigned int stride_x = info.stride().first; - const unsigned int stride_y = info.stride().second; - - auto out_dims = deconvolution_output_dimensions(input->dimension(width_idx), input->dimension(height_idx), weights->dimension(width_idx), weights->dimension(height_idx), - info.pad().first, info.pad().second, stride_x, stride_y); + auto out_dims = deconvolution_output_dimensions(input->dimension(width_idx), input->dimension(height_idx), weights->dimension(width_idx), weights->dimension(height_idx), info); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); if(bias != nullptr) @@ -96,9 +91,11 @@ Status NEDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInf ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimZ) != output_shape.z(), "Output's depth is invalid."); } - unsigned int padx = 0; - unsigned int pady = 0; - const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*input, *weights, stride_x, stride_y, out_dims, padx, pady); + unsigned int deconv_pad_x = 0; + unsigned int deconv_pad_y = 0; + const unsigned int stride_x = info.stride().first; + const unsigned int stride_y = info.stride().second; + const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*input, *weights, stride_x, stride_y, out_dims, deconv_pad_x, deconv_pad_y); TensorInfo scale_out_info(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(scale_out_shape)); const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); @@ -126,14 +123,17 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con _is_prepared = false; _is_nchw = data_layout == DataLayout::NCHW; + const unsigned int pad_left = info.pad_left(); + const unsigned int pad_right = info.pad_right(); + const unsigned int pad_top = info.pad_top(); + const unsigned int pad_bottom = info.pad_bottom(); const unsigned int stride_x = info.stride().first; const unsigned int stride_y = info.stride().second; const unsigned int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); const unsigned int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - auto out_dims = deconvolution_output_dimensions(input->info()->dimension(width_idx), input->info()->dimension(height_idx), weights->info()->dimension(width_idx), - weights->info()->dimension(height_idx), - info.pad().first, info.pad().second, stride_x, stride_y); + auto out_dims = deconvolution_output_dimensions(input->info()->dimension(width_idx), input->info()->dimension(height_idx), + weights->info()->dimension(width_idx), weights->info()->dimension(height_idx), info); const TensorShape output_shape = compute_deconvolution_output_shape(out_dims, *input->info(), *weights->info()); // Output auto initialization if not yet initialized @@ -157,16 +157,30 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con _permuted_weights.info()->set_data_layout(DataLayout::NCHW); // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order to match output shape - unsigned int padx = 0; - unsigned int pady = 0; - const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*_permuted_input.info(), *_permuted_weights.info(), stride_x, stride_y, out_dims, padx, - pady); + unsigned int deconv_pad_x = 0; + unsigned int deconv_pad_y = 0; + const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*_permuted_input.info(), *_permuted_weights.info(), stride_x, stride_y, out_dims, + deconv_pad_x, deconv_pad_y); + + unsigned int deconv_pad_left = pad_right > pad_left ? pad_right - pad_left : 0; + unsigned int deconv_pad_right = pad_left > pad_right ? pad_left - pad_right : 0; + deconv_pad_x -= deconv_pad_left + deconv_pad_right; + ARM_COMPUTE_ERROR_ON((deconv_pad_x % 2) != 0); + deconv_pad_left += deconv_pad_x / 2; + deconv_pad_right += deconv_pad_x / 2; + + unsigned int deconv_pad_top = pad_bottom > pad_top ? pad_bottom - pad_top : 0; + unsigned int deconv_pad_bottom = pad_top > pad_bottom ? pad_top - pad_bottom : 0; + deconv_pad_y -= deconv_pad_top + deconv_pad_bottom; + ARM_COMPUTE_ERROR_ON((deconv_pad_y % 2) != 0); + deconv_pad_top += deconv_pad_y / 2; + deconv_pad_bottom += deconv_pad_y / 2; TensorInfo scale_out_info(scale_out_shape, 1, _permuted_input.info()->data_type(), _permuted_input.info()->quantization_info()); scale_out_info.set_data_layout(DataLayout::NCHW); _scaled_output.allocator()->init(scale_out_info); - const PadStrideInfo upsample_info(stride_x, stride_y, padx / 2, pady / 2); + const PadStrideInfo upsample_info(stride_x, stride_y, deconv_pad_left, deconv_pad_right, deconv_pad_top, deconv_pad_bottom, DimensionRoundingType::FLOOR); _upsample_f.configure(&_permuted_input, &_scaled_output, upsample_info); _weights_flipped.allocator()->init(*_permuted_weights.info()->clone()); @@ -189,14 +203,30 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con else { // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order to match output shape - unsigned int padx = 0; - unsigned int pady = 0; - const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*input->info(), *weights->info(), stride_x, stride_y, out_dims, padx, pady); + unsigned int deconv_pad_x = 0; + unsigned int deconv_pad_y = 0; + const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*input->info(), *weights->info(), stride_x, stride_y, + out_dims, deconv_pad_x, deconv_pad_y); + + unsigned int deconv_pad_left = pad_right > pad_left ? pad_right - pad_left : 0; + unsigned int deconv_pad_right = pad_left > pad_right ? pad_left - pad_right : 0; + deconv_pad_x -= deconv_pad_left + deconv_pad_right; + ARM_COMPUTE_ERROR_ON((deconv_pad_x % 2) != 0); + deconv_pad_left += deconv_pad_x / 2; + deconv_pad_right += deconv_pad_x / 2; + + unsigned int deconv_pad_top = pad_bottom > pad_top ? pad_bottom - pad_top : 0; + unsigned int deconv_pad_bottom = pad_top > pad_bottom ? pad_top - pad_bottom : 0; + deconv_pad_y -= deconv_pad_top + deconv_pad_bottom; + ARM_COMPUTE_ERROR_ON((deconv_pad_y % 2) != 0); + deconv_pad_top += deconv_pad_y / 2; + deconv_pad_bottom += deconv_pad_y / 2; TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), input->info()->quantization_info()); scale_out_info.set_data_layout(data_layout); _scaled_output.allocator()->init(scale_out_info); - const PadStrideInfo upsample_info(stride_x, stride_y, padx / 2, pady / 2); + + const PadStrideInfo upsample_info(stride_x, stride_y, deconv_pad_left, deconv_pad_right, deconv_pad_top, deconv_pad_bottom, DimensionRoundingType::FLOOR); _upsample_f.configure(input, &_scaled_output, upsample_info); _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout)); diff --git a/tests/validation/CL/DeconvolutionLayer.cpp b/tests/validation/CL/DeconvolutionLayer.cpp index 44b3428c52..9dafd1ef89 100644 --- a/tests/validation/CL/DeconvolutionLayer.cpp +++ b/tests/validation/CL/DeconvolutionLayer.cpp @@ -55,6 +55,9 @@ const auto data4x4 = datasets::SmallDeconvolutionShapes() * framework::dataset:: const auto data3x3 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 2) * framework::dataset::make("PadY", 0, 2) * framework::dataset::make("NumKernels", { 3 }); +const auto data3x3_asymm = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 2) * framework::dataset::make("StrideY", 1, 2) * framework::dataset::make("PadLeft", 0, 1) + * framework::dataset::make("PadRight", 0, 1) * framework::dataset::make("PadTop", 0, 1) * framework::dataset::make("PadBottom", 0, 1) * framework::dataset::make("NumKernels", { 3 }); + const auto data3x3_precommit = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 2) * framework::dataset::make("StrideY", 1, 2) * framework::dataset::make("PadX", 0, 2) * framework::dataset::make("PadY", 0, 2) * framework::dataset::make("NumKernels", { 3 }); @@ -120,16 +123,19 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( // *INDENT-ON* template -using CLDeconvolutionLayerFixture4x4 = DeconvolutionValidationFixture; +using CLDeconvolutionLayerFixture4x4 = DeconvolutionValidationFixture; + +template +using CLDeconvolutionLayerFixture3x3 = DeconvolutionValidationFixture; template -using CLDeconvolutionLayerFixture3x3 = DeconvolutionValidationFixture; +using CLDeconvolutionLayerAsymmFixture3x3 = DeconvolutionValidationAsymmFixture; template -using CLDeconvolutionLayerFixture2x2 = DeconvolutionValidationFixture; +using CLDeconvolutionLayerFixture2x2 = DeconvolutionValidationFixture; template -using CLDeconvolutionLayerFixture1x1 = DeconvolutionValidationFixture; +using CLDeconvolutionLayerFixture1x1 = DeconvolutionValidationFixture; TEST_SUITE(Float) TEST_SUITE(FP32) @@ -153,6 +159,14 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerFixture3x3, framewor // Validate output validate(CLAccessor(_target), _reference, tolerance_fp32); } +FIXTURE_DATA_TEST_CASE(RunAsymm, CLDeconvolutionLayerAsymmFixture3x3, framework::DatasetMode::NIGHTLY, combine(combine(combine(data3x3_asymm, framework::dataset::make("DataType", + DataType::F32)), + data_layouts_dataset), + add_bias_dataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_fp32); +} FIXTURE_DATA_TEST_CASE(RunLarge, CLDeconvolutionLayerFixture3x3, framework::DatasetMode::NIGHTLY, combine(combine(combine(data3x3, framework::dataset::make("DataType", DataType::F32)), data_layouts_dataset), add_bias_dataset)) diff --git a/tests/validation/NEON/DeconvolutionLayer.cpp b/tests/validation/NEON/DeconvolutionLayer.cpp index 727f501393..500ef10661 100644 --- a/tests/validation/NEON/DeconvolutionLayer.cpp +++ b/tests/validation/NEON/DeconvolutionLayer.cpp @@ -56,6 +56,9 @@ const auto data4x4 = datasets::SmallDeconvolutionShapes() * framework::dataset:: const auto data3x3 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 2) * framework::dataset::make("PadY", 0, 2) * framework::dataset::make("NumKernels", { 3 }); +const auto data3x3_asymm = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 2) * framework::dataset::make("StrideY", 1, 2) * framework::dataset::make("PadLeft", 0, 1) + * framework::dataset::make("PadRight", 0, 1) * framework::dataset::make("PadTop", 0, 1) * framework::dataset::make("PadBottom", 0, 1) * framework::dataset::make("NumKernels", { 3 }); + const auto data3x3_precommit = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 2) * framework::dataset::make("StrideY", 1, 2) * framework::dataset::make("PadX", 0, 2) * framework::dataset::make("PadY", 0, 2) * framework::dataset::make("NumKernels", { 3 }); @@ -74,13 +77,14 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, (combine(datasets::Sm input_shape, data_type) { // Create shapes - const unsigned int kernel_size_x = 3; - const unsigned int kernel_size_y = 3; - const unsigned int num_kernels = 1; - const TensorShape weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels); - const TensorShape bias_shape(num_kernels); - auto out_dim = deconvolution_output_dimensions(input_shape.x(), input_shape.y(), kernel_size_x, kernel_size_y, 1, 1, 1, 1); - TensorShape output_shape = compute_deconvolution_output_shape(out_dim, TensorInfo(input_shape, 1, data_type), TensorInfo(weights_shape, 1, data_type)); + const unsigned int kernel_size_x = 3; + const unsigned int kernel_size_y = 3; + const unsigned int num_kernels = 1; + const TensorShape weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels); + const TensorShape bias_shape(num_kernels); + const PadStrideInfo info(1, 1, 1, 1); + auto out_dim = deconvolution_output_dimensions(input_shape.x(), input_shape.y(), kernel_size_x, kernel_size_y, info); + TensorShape output_shape = compute_deconvolution_output_shape(out_dim, TensorInfo(input_shape, 1, data_type), TensorInfo(weights_shape, 1, data_type)); // Create tensors Tensor src = create_tensor(input_shape, data_type, 1); @@ -157,13 +161,16 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( // *INDENT-ON* template -using NEDeconvolutionLayerFixture4x4 = DeconvolutionValidationFixture; +using NEDeconvolutionLayerFixture4x4 = DeconvolutionValidationFixture; + +template +using NEDeconvolutionLayerFixture3x3 = DeconvolutionValidationFixture; template -using NEDeconvolutionLayerFixture3x3 = DeconvolutionValidationFixture; +using NEDeconvolutionLayerAsymmFixture3x3 = DeconvolutionValidationAsymmFixture; template -using NEDeconvolutionLayerFixture1x1 = DeconvolutionValidationFixture; +using NEDeconvolutionLayerFixture1x1 = DeconvolutionValidationFixture; TEST_SUITE(Float) TEST_SUITE(FP32) @@ -185,6 +192,14 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDeconvolutionLayerFixture3x3, framewor // Validate output validate(Accessor(_target), _reference, tolerance_fp32); } +FIXTURE_DATA_TEST_CASE(RunAsymm, NEDeconvolutionLayerAsymmFixture3x3, framework::DatasetMode::NIGHTLY, combine(combine(combine(data3x3_asymm, framework::dataset::make("DataType", + DataType::F32)), + data_layouts_dataset), + add_bias_dataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_fp32); +} FIXTURE_DATA_TEST_CASE(RunLarge, NEDeconvolutionLayerFixture3x3, framework::DatasetMode::NIGHTLY, combine(combine(combine(data3x3, framework::dataset::make("DataType", DataType::F32)), data_layouts_dataset), add_bias_dataset)) diff --git a/tests/validation/fixtures/DeconvolutionLayerFixture.h b/tests/validation/fixtures/DeconvolutionLayerFixture.h index 9f90f07c97..a25a65f997 100644 --- a/tests/validation/fixtures/DeconvolutionLayerFixture.h +++ b/tests/validation/fixtures/DeconvolutionLayerFixture.h @@ -218,7 +218,27 @@ public: const TensorShape weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels); const TensorShape bias_shape(num_kernels); const PadStrideInfo info(sx, sy, padx, pady, DimensionRoundingType::CEIL); - auto out_dim = deconvolution_output_dimensions(input_shape.x(), input_shape.y(), kernel_size_x, kernel_size_y, padx, pady, sx, sy); + auto out_dim = deconvolution_output_dimensions(input_shape.x(), input_shape.y(), kernel_size_x, kernel_size_y, info); + TensorInfo input_info(input_shape, 1, data_type); + TensorInfo weights_info(weights_shape, 1, data_type); + TensorShape output_shape = compute_deconvolution_output_shape(out_dim, input_info, weights_info); + DeconvolutionLayerFixtureBase::setup(input_shape, weights_shape, bias_shape, output_shape, info, data_type, data_layout, QuantizationInfo(), add_bias); + } +}; + +template +class DeconvolutionValidationAsymmFixture : public DeconvolutionLayerFixtureBase +{ +public: + template + void setup(TensorShape input_shape, unsigned int sx, unsigned int sy, unsigned int pad_left, unsigned int pad_right, unsigned int pad_top, + unsigned int pad_bottom, unsigned int num_kernels, DataType data_type, DataLayout data_layout, bool add_bias) + { + ARM_COMPUTE_ERROR_ON_MSG(kernel_size_x != kernel_size_y, "Only square kernels supported"); + const TensorShape weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels); + const TensorShape bias_shape(num_kernels); + const PadStrideInfo info(sx, sy, pad_left, pad_right, pad_top, pad_bottom, DimensionRoundingType::CEIL); + auto out_dim = deconvolution_output_dimensions(input_shape.x(), input_shape.y(), kernel_size_x, kernel_size_y, info); TensorInfo input_info(input_shape, 1, data_type); TensorInfo weights_info(weights_shape, 1, data_type); TensorShape output_shape = compute_deconvolution_output_shape(out_dim, input_info, weights_info); @@ -238,7 +258,7 @@ public: const TensorShape weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels); const TensorShape bias_shape(num_kernels); const PadStrideInfo info(sx, sy, padx, pady, DimensionRoundingType::CEIL); - auto out_dim = deconvolution_output_dimensions(input_shape.x(), input_shape.y(), kernel_size_x, kernel_size_y, padx, pady, sx, sy); + auto out_dim = deconvolution_output_dimensions(input_shape.x(), input_shape.y(), kernel_size_x, kernel_size_y, info); TensorInfo input_info(input_shape, 1, data_type, quantization_info); TensorInfo weights_info(weights_shape, 1, data_type, quantization_info); TensorShape output_shape = compute_deconvolution_output_shape(out_dim, input_info, weights_info); diff --git a/tests/validation/reference/DeconvolutionLayer.cpp b/tests/validation/reference/DeconvolutionLayer.cpp index af59830722..343ea5e725 100644 --- a/tests/validation/reference/DeconvolutionLayer.cpp +++ b/tests/validation/reference/DeconvolutionLayer.cpp @@ -38,21 +38,44 @@ SimpleTensor deconvolution_layer(const SimpleTensor &src, const SimpleTens const PadStrideInfo &info) { // Create reference - const int stride_x = info.stride().first; - const int stride_y = info.stride().second; - const int weights_width = weights.shape().x(); - const int weights_height = weights.shape().y(); - const int weights_upper_dims = weights.shape().total_size() / (weights_width * weights_height); + const unsigned int pad_left = info.pad_left(); + const unsigned int pad_right = info.pad_right(); + const unsigned int pad_top = info.pad_top(); + const unsigned int pad_bottom = info.pad_bottom(); + const int stride_x = info.stride().first; + const int stride_y = info.stride().second; + const int weights_width = weights.shape().x(); + const int weights_height = weights.shape().y(); + const int weights_upper_dims = weights.shape().total_size() / (weights_width * weights_height); + + ARM_COMPUTE_ERROR_ON(pad_left > (weights.shape().x() - 1)); + ARM_COMPUTE_ERROR_ON(pad_right > (weights.shape().x() - 1)); + ARM_COMPUTE_ERROR_ON(pad_top > (weights.shape().y() - 1)); + ARM_COMPUTE_ERROR_ON(pad_bottom > (weights.shape().y() - 1)); // Find the upsampled dimensions unsigned int out_x = (src.shape().x() - 1) * stride_x + 1; unsigned int out_y = (src.shape().y() - 1) * stride_y + 1; // Find the padding needed for the convolution with stride 1 in order to match output shape - unsigned int padx = output_shape.x() - (out_x - weights_width + 1); - unsigned int pady = output_shape.y() - (out_y - weights_height + 1); - out_x += padx; - out_y += pady; + unsigned int deconv_pad_x = output_shape.x() - (out_x - weights_width + 1); + unsigned int deconv_pad_y = output_shape.y() - (out_y - weights_height + 1); + out_x += deconv_pad_x; + out_y += deconv_pad_y; + + unsigned int deconv_pad_left = pad_right > pad_left ? pad_right - pad_left : 0; + unsigned int deconv_pad_right = pad_left > pad_right ? pad_left - pad_right : 0; + deconv_pad_x -= deconv_pad_left + deconv_pad_right; + ARM_COMPUTE_ERROR_ON((deconv_pad_x % 2) != 0); + deconv_pad_left += deconv_pad_x / 2; + deconv_pad_right += deconv_pad_x / 2; + + unsigned int deconv_pad_top = pad_bottom > pad_top ? pad_bottom - pad_top : 0; + unsigned int deconv_pad_bottom = pad_top > pad_bottom ? pad_top - pad_bottom : 0; + deconv_pad_y -= deconv_pad_top + deconv_pad_bottom; + ARM_COMPUTE_ERROR_ON((deconv_pad_y % 2) != 0); + deconv_pad_top += deconv_pad_y / 2; + deconv_pad_bottom += deconv_pad_y / 2; TensorShape scaled_shape = src.shape(); scaled_shape.set(0, out_x); @@ -64,7 +87,6 @@ SimpleTensor deconvolution_layer(const SimpleTensor &src, const SimpleTens const int width_scaled = scaled.shape().x(); const int height_scaled = scaled.shape().y(); const int num_2d_slices = src.shape().total_size() / (width_in * height_in); - ARM_COMPUTE_ERROR_ON(info.pad().first > (weights.shape().x() - 1)); if(src.data_type() == DataType::QASYMM8) { @@ -94,10 +116,10 @@ SimpleTensor deconvolution_layer(const SimpleTensor &src, const SimpleTens { const int offset_slice_in = slice * width_in * height_in; const int offset_slice_out = slice * width_scaled * height_scaled; - const int start_x = padx / 2; - const int start_y = pady / 2; - const int end_y = height_scaled - pady / 2; - const int end_x = width_scaled - padx / 2; + const int start_x = deconv_pad_left; + const int start_y = deconv_pad_top; + const int end_x = width_scaled - deconv_pad_right; + const int end_y = height_scaled - deconv_pad_bottom; for(int yi = start_y, in_y = 0; yi < end_y; yi += stride_y, in_y++) { -- cgit v1.2.1