aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnnop Wongwathanarat <annop.wongwathanarat@arm.com>2023-02-09 16:33:13 +0000
committerAnnop Wongwathanarat <annop.wongwathanarat@arm.com>2023-02-14 09:54:13 +0000
commitc4ed2fdf5fdc11ca54770c8a9a11576053fc2b0b (patch)
treed29bbf86b0c5a60ae1acdb87b0bdfff60de8b4ba
parenta4ff9d0489e67dcda6e26cbcf9fed3b4e23ae17a (diff)
downloadComputeLibrary-c4ed2fdf5fdc11ca54770c8a9a11576053fc2b0b.tar.gz
Extend skip upsampling for deconvolution for non-1x1 kernels
Skip upsampling step for deconvolution when input strides are 1 regardless of kernel size. This is achieved by setting correct paddings for unit strides convolution. Resolve: [ONCPUML-1183] Change-Id: Ief88f9fe30f6f56d3358e3cf6a506ab8b5691f18 Signed-off-by: Annop Wongwathanarat <annop.wongwathanarat@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9134 Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/runtime/NEON/functions/NEDeconvolutionLayer.cpp36
1 files changed, 25 insertions, 11 deletions
diff --git a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
index c30870dc52..96ffedd5b4 100644
--- a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
@@ -142,14 +142,26 @@ Status NEDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInf
const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*input, *weights, stride_x, stride_y, out_dims, deconv_pad_x, deconv_pad_y);
TensorInfo scale_out_info(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(scale_out_shape));
- const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
+ const PadStrideInfo upsample_info = compute_upsample_info(info, deconv_pad_x, deconv_pad_y);
+
+ // Do not perform upsampling when the operation uses unit stride in all dimensions
+ const bool do_upsampling = stride_x != 1 || stride_y != 1;
const unsigned int batches_idx = get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::BATCHES);
const unsigned int channel_idx = get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::CHANNEL);
ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(batches_idx) != scale_out_info.dimension(batches_idx));
ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(channel_idx) != scale_out_info.dimension(channel_idx));
- ARM_COMPUTE_RETURN_ON_ERROR(NEConvolutionLayer::validate(&scale_out_info, weights, bias, output, conv_info, WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), enable_fast_math));
+ if (do_upsampling)
+ {
+ const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
+ ARM_COMPUTE_RETURN_ON_ERROR(NEConvolutionLayer::validate(&scale_out_info, weights, bias, output, conv_info, WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), enable_fast_math));
+ }
+ else
+ {
+ const PadStrideInfo conv_info(1, 1, upsample_info.pad_left(), upsample_info.pad_right(), upsample_info.pad_top(), upsample_info.pad_bottom(), DimensionRoundingType::CEIL);
+ ARM_COMPUTE_RETURN_ON_ERROR(NEConvolutionLayer::validate(input, weights, bias, output, conv_info, WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), enable_fast_math));
+ }
return Status{};
}
@@ -177,9 +189,6 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con
const unsigned int stride_x = info.stride().first;
const unsigned int stride_y = info.stride().second;
- // Do not perform upsampling when input is unit stride and weight shape is 1x1
- _do_upsampling = stride_x != 1 || stride_y != 1 || weights->info()->dimension(width_idx) != 1 || weights->info()->dimension(height_idx) != 1;
-
// Output auto initialization if not yet initialized
auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->quantization_info());
@@ -189,9 +198,15 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con
_flip_weights.configure(weights, &_weights_flipped, &_flip_axis);
// setup the function to convolve the upscaled output
- const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
uint32_t deconv_pad_x = 0;
uint32_t deconv_pad_y = 0;
+ const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*input->info(), *weights->info(),
+ stride_x, stride_y,
+ out_dims, deconv_pad_x, deconv_pad_y);
+ const PadStrideInfo upsample_info = compute_upsample_info(info, deconv_pad_x, deconv_pad_y);
+
+ // Do not perform upsampling when the operation uses unit stride in all dimensions
+ _do_upsampling = stride_x != 1 || stride_y != 1;
// Setup flip axis data
_flip_axis.allocator()->allocate();
@@ -203,16 +218,14 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con
if (_do_upsampling)
{
_memory_group.manage(&_scaled_output);
- const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*input->info(), *weights->info(),
- stride_x, stride_y,
- out_dims, deconv_pad_x, deconv_pad_y);
-
- const PadStrideInfo upsample_info = compute_upsample_info(info, deconv_pad_x, deconv_pad_y);
+ const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), input->info()->quantization_info());
scale_out_info.set_data_layout(data_layout);
_scaled_output.allocator()->init(scale_out_info);
+ // Minor optimization: In the upsampling step, we do not need to allocate space for the padding in the upsampled image.
+ // The padding amount can be given as input to the convolution layer.
_upsample_f.configure(input, &_scaled_output, upsample_info);
_conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info, WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), enable_fast_math);
@@ -221,6 +234,7 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con
}
else
{
+ const PadStrideInfo conv_info(1, 1, upsample_info.pad_left(), upsample_info.pad_right(), upsample_info.pad_top(), upsample_info.pad_bottom(), DimensionRoundingType::CEIL);
_conv_f.configure(input, &_weights_flipped, bias, output, conv_info, WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), enable_fast_math);
}
}