From 061dd36716a17c1065de6143f61e41d03896d541 Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Wed, 17 Oct 2018 17:10:27 +0100 Subject: COMPMID-1451: Fix allocation of weights in Deconvolution Change-Id: If3ca0b034a7448df1e5349b51a2b124f1b4e99c1 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/153956 Reviewed-by: Georgios Pinitas Tested-by: bsgcomp --- .../runtime/CL/functions/CLDeconvolutionLayer.h | 2 +- .../runtime/NEON/functions/NEDeconvolutionLayer.h | 1 + src/runtime/CL/functions/CLDeconvolutionLayer.cpp | 20 +++++++++++++++----- .../NEON/functions/NEDeconvolutionLayer.cpp | 22 ++++++++++++++++++---- 4 files changed, 35 insertions(+), 10 deletions(-) diff --git a/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h index 39cbe0cafa..7a58c5acef 100644 --- a/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h @@ -123,7 +123,7 @@ private: CLConvolutionLayer _conv_f; CPPFlipWeightsKernel _flip_weights; CLTensor _scaled_output; - ICLTensor *_weights; + ICLTensor *_original_weights; CLTensor _weights_flipped; bool _is_prepared; }; diff --git a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h index 73870093b7..277945d617 100644 --- a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h @@ -125,6 +125,7 @@ private: CPPFlipWeightsKernel _flip_weights; Tensor _scaled_output; Tensor _weights_flipped; + const ITensor *_original_weights; ITensor *_input; PadStrideInfo _info; std::pair _inner_border; diff --git a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp index 951d1ec4f0..bbf4e66adb 100644 --- a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp @@ -42,7 +42,7 @@ CLDeconvolutionLayer::CLDeconvolutionLayer(std::shared_ptr memor _conv_f(), _flip_weights(), _scaled_output(), - _weights(), + _original_weights(nullptr), _weights_flipped(), _is_prepared(false) { @@ -120,7 +120,7 @@ void CLDeconvolutionLayer::configure(ICLTensor *input, ICLTensor *weights, const const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - _weights = weights; + _original_weights = weights; _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout)); _flip_weights.configure(weights, &_weights_flipped); @@ -138,7 +138,6 @@ void CLDeconvolutionLayer::configure(ICLTensor *input, ICLTensor *weights, const _is_prepared = false; _memory_group.manage(&_scaled_output); - _memory_group.manage(&_weights_flipped); // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order to match output shape unsigned int padx = 0; @@ -175,14 +174,25 @@ void CLDeconvolutionLayer::prepare() { if(!_is_prepared) { + ARM_COMPUTE_ERROR_ON(!_original_weights->is_used()); + + // Run weights flipping and mark original weights tensor as unused _weights_flipped.allocator()->allocate(); _weights_flipped.map(true); - _weights->map(CLScheduler::get().queue(), true); + _original_weights->map(CLScheduler::get().queue(), true); CPPScheduler::get().schedule(&_flip_weights, Window::DimZ); _weights_flipped.unmap(); - _weights->unmap(CLScheduler::get().queue()); + _original_weights->unmap(CLScheduler::get().queue()); + _original_weights->mark_as_unused(); + + // Prepare convolution _conv_f.prepare(); + if(!_weights_flipped.is_used()) + { + _weights_flipped.allocator()->free(); + } + _is_prepared = true; } } diff --git a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp index cbe7c51662..23def5959b 100644 --- a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp @@ -39,6 +39,7 @@ NEDeconvolutionLayer::NEDeconvolutionLayer(std::shared_ptr memor _flip_weights(), _scaled_output(), _weights_flipped(), + _original_weights(nullptr), _input(nullptr), _info(), _inner_border(), @@ -104,10 +105,11 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con { ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); - _input = input; - _info = info; - _inner_border = std::make_pair(inner_border_right, inner_border_top); - _is_prepared = false; + _input = input; + _original_weights = weights; + _info = info; + _inner_border = std::make_pair(inner_border_right, inner_border_top); + _is_prepared = false; const unsigned int stride_x = info.stride().first; const unsigned int stride_y = info.stride().second; @@ -160,9 +162,21 @@ void NEDeconvolutionLayer::prepare() { if(!_is_prepared) { + ARM_COMPUTE_ERROR_ON(!_original_weights->is_used()); + + // Run weights flipping and mark original weights tensor as unused _weights_flipped.allocator()->allocate(); CPPScheduler::get().schedule(&_flip_weights, Window::DimZ); + _original_weights->mark_as_unused(); + + // Prepare convolution _conv_f.prepare(); + + if(!_weights_flipped.is_used()) + { + _weights_flipped.allocator()->free(); + } + _is_prepared = true; } } -- cgit v1.2.1