aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnnop Wongwathanarat <annop.wongwathanarat@arm.com>2023-01-16 14:36:45 +0000
committerAnnop Wongwathanarat <annop.wongwathanarat@arm.com>2023-01-30 15:59:43 +0000
commitb609c93e81b1ff60764c5b724f4f5511215c84d6 (patch)
tree3822ed89e9b99778994362cadb1f62b97aef6764
parentfbbfa5388d7e430ff7e9a3fb0bff8045574706e4 (diff)
downloadComputeLibrary-b609c93e81b1ff60764c5b724f4f5511215c84d6.tar.gz
Skip upsampling for deconvolution when not needed
If the input tensor's stride is 1 and the kernel size is 1x1, skip upsampling step and pass the input tensor pointer for convolution directly. Partially resolve: [ONCPUML-1137] Change-Id: I9de9444ff99cf35d44a51ccbe0fa6facc1035d27 Signed-off-by: Annop Wongwathanarat <annop.wongwathanarat@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8994 Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h1
-rw-r--r--src/runtime/NEON/functions/NEDeconvolutionLayer.cpp51
2 files changed, 34 insertions, 18 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
index 15124d6041..869df69f11 100644
--- a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
@@ -148,6 +148,7 @@ private:
ITensor *_input;
PadStrideInfo _info;
bool _is_prepared;
+ bool _do_upsampling;
};
} // arm_compute
#endif /* ARM_COMPUTE_NEDECONVOLUTIONLAYER_H */
diff --git a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
index 1a75c14896..c30870dc52 100644
--- a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
@@ -77,7 +77,8 @@ NEDeconvolutionLayer::NEDeconvolutionLayer(std::shared_ptr<IMemoryManager> memor
_original_weights(nullptr),
_input(nullptr),
_info(),
- _is_prepared(false)
+ _is_prepared(false),
+ _do_upsampling(true)
{
}
@@ -176,11 +177,13 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con
const unsigned int stride_x = info.stride().first;
const unsigned int stride_y = info.stride().second;
+ // Do not perform upsampling when input is unit stride and weight shape is 1x1
+ _do_upsampling = stride_x != 1 || stride_y != 1 || weights->info()->dimension(width_idx) != 1 || weights->info()->dimension(height_idx) != 1;
+
// Output auto initialization if not yet initialized
auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->quantization_info());
_flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32));
- _memory_group.manage(&_scaled_output);
_weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout));
_flip_weights.configure(weights, &_weights_flipped, &_flip_axis);
@@ -190,27 +193,36 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con
uint32_t deconv_pad_x = 0;
uint32_t deconv_pad_y = 0;
- const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*input->info(), *weights->info(),
- stride_x, stride_y,
- out_dims, deconv_pad_x, deconv_pad_y);
-
- const PadStrideInfo upsample_info = compute_upsample_info(info, deconv_pad_x, deconv_pad_y);
-
- TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), input->info()->quantization_info());
- scale_out_info.set_data_layout(data_layout);
- _scaled_output.allocator()->init(scale_out_info);
-
- _upsample_f.configure(input, &_scaled_output, upsample_info);
-
- _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info, WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), enable_fast_math);
-
// Setup flip axis data
_flip_axis.allocator()->allocate();
auto axis_data = reinterpret_cast<uint32_t *>(_flip_axis.buffer());
axis_data[0] = static_cast<uint32_t>(width_idx);
axis_data[1] = static_cast<uint32_t>(height_idx);
- _scaled_output.allocator()->allocate();
+ // Setup convolution and upsampling, if needed
+ if (_do_upsampling)
+ {
+ _memory_group.manage(&_scaled_output);
+ const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*input->info(), *weights->info(),
+ stride_x, stride_y,
+ out_dims, deconv_pad_x, deconv_pad_y);
+
+ const PadStrideInfo upsample_info = compute_upsample_info(info, deconv_pad_x, deconv_pad_y);
+
+ TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), input->info()->quantization_info());
+ scale_out_info.set_data_layout(data_layout);
+ _scaled_output.allocator()->init(scale_out_info);
+
+ _upsample_f.configure(input, &_scaled_output, upsample_info);
+
+ _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info, WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), enable_fast_math);
+
+ _scaled_output.allocator()->allocate();
+ }
+ else
+ {
+ _conv_f.configure(input, &_weights_flipped, bias, output, conv_info, WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), enable_fast_math);
+ }
}
void NEDeconvolutionLayer::run()
@@ -219,7 +231,10 @@ void NEDeconvolutionLayer::run()
MemoryGroupResourceScope scope_mg(_memory_group);
- _upsample_f.run();
+ if(_do_upsampling)
+ {
+ _upsample_f.run();
+ }
_conv_f.run();
}