diff options
Diffstat (limited to 'src/runtime/NEON')
-rw-r--r-- | src/runtime/NEON/functions/NEDeconvolutionLayer.cpp | 51 |
1 files changed, 33 insertions, 18 deletions
diff --git a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp index 1a75c14896..c30870dc52 100644 --- a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp @@ -77,7 +77,8 @@ NEDeconvolutionLayer::NEDeconvolutionLayer(std::shared_ptr<IMemoryManager> memor _original_weights(nullptr), _input(nullptr), _info(), - _is_prepared(false) + _is_prepared(false), + _do_upsampling(true) { } @@ -176,11 +177,13 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con const unsigned int stride_x = info.stride().first; const unsigned int stride_y = info.stride().second; + // Do not perform upsampling when input is unit stride and weight shape is 1x1 + _do_upsampling = stride_x != 1 || stride_y != 1 || weights->info()->dimension(width_idx) != 1 || weights->info()->dimension(height_idx) != 1; + // Output auto initialization if not yet initialized auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->quantization_info()); _flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32)); - _memory_group.manage(&_scaled_output); _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout)); _flip_weights.configure(weights, &_weights_flipped, &_flip_axis); @@ -190,27 +193,36 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con uint32_t deconv_pad_x = 0; uint32_t deconv_pad_y = 0; - const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*input->info(), *weights->info(), - stride_x, stride_y, - out_dims, deconv_pad_x, deconv_pad_y); - - const PadStrideInfo upsample_info = compute_upsample_info(info, deconv_pad_x, deconv_pad_y); - - TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), input->info()->quantization_info()); - scale_out_info.set_data_layout(data_layout); - _scaled_output.allocator()->init(scale_out_info); - - _upsample_f.configure(input, &_scaled_output, upsample_info); - - _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info, WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), enable_fast_math); - // Setup flip axis data _flip_axis.allocator()->allocate(); auto axis_data = reinterpret_cast<uint32_t *>(_flip_axis.buffer()); axis_data[0] = static_cast<uint32_t>(width_idx); axis_data[1] = static_cast<uint32_t>(height_idx); - _scaled_output.allocator()->allocate(); + // Setup convolution and upsampling, if needed + if (_do_upsampling) + { + _memory_group.manage(&_scaled_output); + const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*input->info(), *weights->info(), + stride_x, stride_y, + out_dims, deconv_pad_x, deconv_pad_y); + + const PadStrideInfo upsample_info = compute_upsample_info(info, deconv_pad_x, deconv_pad_y); + + TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), input->info()->quantization_info()); + scale_out_info.set_data_layout(data_layout); + _scaled_output.allocator()->init(scale_out_info); + + _upsample_f.configure(input, &_scaled_output, upsample_info); + + _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info, WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), enable_fast_math); + + _scaled_output.allocator()->allocate(); + } + else + { + _conv_f.configure(input, &_weights_flipped, bias, output, conv_info, WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), enable_fast_math); + } } void NEDeconvolutionLayer::run() @@ -219,7 +231,10 @@ void NEDeconvolutionLayer::run() MemoryGroupResourceScope scope_mg(_memory_group); - _upsample_f.run(); + if(_do_upsampling) + { + _upsample_f.run(); + } _conv_f.run(); } |