From 061dd36716a17c1065de6143f61e41d03896d541 Mon Sep 17 00:00:00 2001
From: Michele Di Giorgio <michele.digiorgio@arm.com>
Date: Wed, 17 Oct 2018 17:10:27 +0100
Subject: COMPMID-1451: Fix allocation of weights in Deconvolution

Change-Id: If3ca0b034a7448df1e5349b51a2b124f1b4e99c1
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/153956
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Tested-by: bsgcomp <bsgcomp@arm.com>
---
 .../runtime/CL/functions/CLDeconvolutionLayer.h    |  2 +-
 .../runtime/NEON/functions/NEDeconvolutionLayer.h  |  1 +
 src/runtime/CL/functions/CLDeconvolutionLayer.cpp  | 20 +++++++++++++++-----
 .../NEON/functions/NEDeconvolutionLayer.cpp        | 22 ++++++++++++++++++----
 4 files changed, 35 insertions(+), 10 deletions(-)
diff --git a/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h
index 39cbe0cafa..7a58c5acef 100644
--- a/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h
@@ -123,7 +123,7 @@ private:
     CLConvolutionLayer           _conv_f;
     CPPFlipWeightsKernel         _flip_weights;
     CLTensor                     _scaled_output;
-    ICLTensor                   *_weights;
+    ICLTensor                   *_original_weights;
     CLTensor                     _weights_flipped;
     bool                         _is_prepared;
 };
diff --git a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
index 73870093b7..277945d617 100644
--- a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
@@ -125,6 +125,7 @@ private:
     CPPFlipWeightsKernel _flip_weights;
     Tensor               _scaled_output;
     Tensor               _weights_flipped;
+    const ITensor       *_original_weights;
     ITensor             *_input;
     PadStrideInfo        _info;
     std::pair<unsigned int, unsigned int> _inner_border;
diff --git a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp
index 951d1ec4f0..bbf4e66adb 100644
--- a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp
@@ -42,7 +42,7 @@ CLDeconvolutionLayer::CLDeconvolutionLayer(std::shared_ptr<IMemoryManager> memor
       _conv_f(),
       _flip_weights(),
       _scaled_output(),
-      _weights(),
+      _original_weights(nullptr),
       _weights_flipped(),
       _is_prepared(false)
 {
@@ -120,7 +120,7 @@ void CLDeconvolutionLayer::configure(ICLTensor *input, ICLTensor *weights, const
     const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
     const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
 
-    _weights = weights;
+    _original_weights = weights;
     _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout));
     _flip_weights.configure(weights, &_weights_flipped);
 
@@ -138,7 +138,6 @@ void CLDeconvolutionLayer::configure(ICLTensor *input, ICLTensor *weights, const
     _is_prepared = false;
 
     _memory_group.manage(&_scaled_output);
-    _memory_group.manage(&_weights_flipped);
 
     // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order to match output shape
     unsigned int      padx            = 0;
@@ -175,14 +174,25 @@ void CLDeconvolutionLayer::prepare()
 {
     if(!_is_prepared)
     {
+        ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
+
+        // Run weights flipping and mark original weights tensor as unused
         _weights_flipped.allocator()->allocate();
         _weights_flipped.map(true);
-        _weights->map(CLScheduler::get().queue(), true);
+        _original_weights->map(CLScheduler::get().queue(), true);
         CPPScheduler::get().schedule(&_flip_weights, Window::DimZ);
         _weights_flipped.unmap();
-        _weights->unmap(CLScheduler::get().queue());
+        _original_weights->unmap(CLScheduler::get().queue());
+        _original_weights->mark_as_unused();
+
+        // Prepare convolution
         _conv_f.prepare();
 
+        if(!_weights_flipped.is_used())
+        {
+            _weights_flipped.allocator()->free();
+        }
+
         _is_prepared = true;
     }
 }
diff --git a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
index cbe7c51662..23def5959b 100644
--- a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
@@ -39,6 +39,7 @@ NEDeconvolutionLayer::NEDeconvolutionLayer(std::shared_ptr<IMemoryManager> memor
       _flip_weights(),
       _scaled_output(),
       _weights_flipped(),
+      _original_weights(nullptr),
       _input(nullptr),
       _info(),
       _inner_border(),
@@ -104,10 +105,11 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
 
-    _input        = input;
-    _info         = info;
-    _inner_border = std::make_pair(inner_border_right, inner_border_top);
-    _is_prepared  = false;
+    _input            = input;
+    _original_weights = weights;
+    _info             = info;
+    _inner_border     = std::make_pair(inner_border_right, inner_border_top);
+    _is_prepared      = false;
 
     const unsigned int stride_x = info.stride().first;
     const unsigned int stride_y = info.stride().second;
@@ -160,9 +162,21 @@ void NEDeconvolutionLayer::prepare()
 {
     if(!_is_prepared)
     {
+        ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
+
+        // Run weights flipping and mark original weights tensor as unused
         _weights_flipped.allocator()->allocate();
         CPPScheduler::get().schedule(&_flip_weights, Window::DimZ);
+        _original_weights->mark_as_unused();
+
+        // Prepare convolution
         _conv_f.prepare();
+
+        if(!_weights_flipped.is_used())
+        {
+            _weights_flipped.allocator()->free();
+        }
+
         _is_prepared = true;
     }
 }
-- 
cgit v1.2.1