From b609c93e81b1ff60764c5b724f4f5511215c84d6 Mon Sep 17 00:00:00 2001 From: Annop Wongwathanarat Date: Mon, 16 Jan 2023 14:36:45 +0000 Subject: Skip upsampling for deconvolution when not needed If the input tensor's stride is 1 and the kernel size is 1x1, skip upsampling step and pass the input tensor pointer for convolution directly. Partially resolve: [ONCPUML-1137] Change-Id: I9de9444ff99cf35d44a51ccbe0fa6facc1035d27 Signed-off-by: Annop Wongwathanarat Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8994 Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins Reviewed-by: Gunes Bayir Benchmark: Arm Jenkins --- .../runtime/NEON/functions/NEDeconvolutionLayer.h | 1 + .../NEON/functions/NEDeconvolutionLayer.cpp | 51 ++++++++++++++-------- 2 files changed, 34 insertions(+), 18 deletions(-) diff --git a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h index 15124d6041..869df69f11 100644 --- a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h @@ -148,6 +148,7 @@ private: ITensor *_input; PadStrideInfo _info; bool _is_prepared; + bool _do_upsampling; }; } // arm_compute #endif /* ARM_COMPUTE_NEDECONVOLUTIONLAYER_H */ diff --git a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp index 1a75c14896..c30870dc52 100644 --- a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp @@ -77,7 +77,8 @@ NEDeconvolutionLayer::NEDeconvolutionLayer(std::shared_ptr memor _original_weights(nullptr), _input(nullptr), _info(), - _is_prepared(false) + _is_prepared(false), + _do_upsampling(true) { } @@ -176,11 +177,13 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con const unsigned int stride_x = info.stride().first; const unsigned int stride_y = info.stride().second; + // Do not perform upsampling when input is unit stride and weight shape is 1x1 + _do_upsampling = stride_x != 1 || stride_y != 1 || weights->info()->dimension(width_idx) != 1 || weights->info()->dimension(height_idx) != 1; + // Output auto initialization if not yet initialized auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->quantization_info()); _flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32)); - _memory_group.manage(&_scaled_output); _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout)); _flip_weights.configure(weights, &_weights_flipped, &_flip_axis); @@ -190,27 +193,36 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con uint32_t deconv_pad_x = 0; uint32_t deconv_pad_y = 0; - const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*input->info(), *weights->info(), - stride_x, stride_y, - out_dims, deconv_pad_x, deconv_pad_y); - - const PadStrideInfo upsample_info = compute_upsample_info(info, deconv_pad_x, deconv_pad_y); - - TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), input->info()->quantization_info()); - scale_out_info.set_data_layout(data_layout); - _scaled_output.allocator()->init(scale_out_info); - - _upsample_f.configure(input, &_scaled_output, upsample_info); - - _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info, WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), enable_fast_math); - // Setup flip axis data _flip_axis.allocator()->allocate(); auto axis_data = reinterpret_cast(_flip_axis.buffer()); axis_data[0] = static_cast(width_idx); axis_data[1] = static_cast(height_idx); - _scaled_output.allocator()->allocate(); + // Setup convolution and upsampling, if needed + if (_do_upsampling) + { + _memory_group.manage(&_scaled_output); + const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*input->info(), *weights->info(), + stride_x, stride_y, + out_dims, deconv_pad_x, deconv_pad_y); + + const PadStrideInfo upsample_info = compute_upsample_info(info, deconv_pad_x, deconv_pad_y); + + TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), input->info()->quantization_info()); + scale_out_info.set_data_layout(data_layout); + _scaled_output.allocator()->init(scale_out_info); + + _upsample_f.configure(input, &_scaled_output, upsample_info); + + _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info, WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), enable_fast_math); + + _scaled_output.allocator()->allocate(); + } + else + { + _conv_f.configure(input, &_weights_flipped, bias, output, conv_info, WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), enable_fast_math); + } } void NEDeconvolutionLayer::run() @@ -219,7 +231,10 @@ void NEDeconvolutionLayer::run() MemoryGroupResourceScope scope_mg(_memory_group); - _upsample_f.run(); + if(_do_upsampling) + { + _upsample_f.run(); + } _conv_f.run(); } -- cgit v1.2.1