COMPMID-1451: Fix MemoryManager usage.

-Fixes NEWinogradConvolution memory manager usage -Moves allocations in prepare staged for GEMMDispatchWrapper. Change-Id: Ic1c709ee473eb4968f5a081f2bc26960f882f8db
author: Georgios Pinitas <georgios.pinitas@arm.com> 2018-11-22 19:38:27 +0000
committer: Georgios Pinitas <georgios.pinitas@arm.com> 2018-11-23 14:04:27 +0000
commit: ca1250d548961f43ecf8499a78b5cc0c9609a37a (patch)
tree: 14a5ad99569819091e861595a1dc0dd592ed98be /src/runtime
parent: fea8ec3da3afd0aee3b9c228f46e7dbd52e7de2b (diff)
download: ComputeLibrary-ca1250d548961f43ecf8499a78b5cc0c9609a37a.tar.gz
4 files changed, 15 insertions, 9 deletions
diff --git a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp
index 922f757497..25be4a5349 100644
--- a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp
+++ b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp
@@ -229,8 +229,6 @@ void Fallback<TypeInput, TypeOutput>::configure(const ITensor *a, const ITensor
         const unsigned int alignment           = 128;
         const size_t       B_pretranspose_size = _gemm_kernel_asm->get_B_pretransposed_array_size();
         _pretranspose.allocator()->init(TensorInfo(TensorShape{ (B_pretranspose_size + alignment /* FIXME: remove alignment after COMPMID-1088 */) }, 1, DataType::S8), alignment);
-        _pretranspose.allocator()->allocate();
-        ARM_COMPUTE_ERROR_ON_NULLPTR(_pretranspose.buffer());
     }
 }
 
@@ -242,6 +240,7 @@ void Fallback<TypeInput, TypeOutput>::prepare()
         // Pretranspose B if required
         if(_gemm_kernel_asm->B_pretranspose_required())
         {
+            _pretranspose.allocator()->allocate();
             ARM_COMPUTE_ERROR_ON(_pretranspose.buffer() == nullptr);
             const int  ldb            = _b->info()->strides_in_bytes().y() / sizeof(TypeInput);
             const auto in1_ptr        = reinterpret_cast<const TypeInput *>(_b->buffer() + _b->info()->offset_first_element_in_bytes());
diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
index 0232a83171..be7cc2d0e1 100644
--- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
@@ -90,7 +90,7 @@ void NEConvolutionLayerReshapeWeights::run()
 }
 
 NEGEMMConvolutionLayer::NEGEMMConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager)
-    : _memory_group(memory_manager), _reshape_weights(), _im2col_kernel(), _mm_gemm(), _mm_gemmlowp(memory_manager), _gemmlowp_output_stage(), _col2im_kernel(), _activationlayer_function(),
+    : _memory_group(memory_manager), _reshape_weights(), _im2col_kernel(), _mm_gemm(memory_manager), _mm_gemmlowp(memory_manager), _gemmlowp_output_stage(), _col2im_kernel(), _activationlayer_function(),
       _add_bias_kernel(), _reshape_layer(), _original_weights(nullptr), _im2col_output(), _weights_reshaped(), _gemm_output(), _tmp_output(), _data_layout(DataLayout::NCHW), _append_bias(false),
       _skip_im2col(false), _skip_col2im(false), _is_quantized(false), _is_activationlayer_enabled(false), _is_prepared(false)
 {
diff --git a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp
index c8e3b3b38c..e37f8abfaf 100644
--- a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp
@@ -464,6 +464,7 @@ void NEWinogradConvolutionLayer::configure(const ITensor *input, const ITensor *
         transform_weights_kernel->configure(&_weights_hwio, &_kernel_storage, kernel_matrix_stride, out_channels, in_channels);
 
         //The biases tensor has not been allocated at this point in time, the output transform will add the biases to the final result in the run() method
+        _memory_group.manage(&_output_nhwc);
         transform_output_kernel->configure(biases, &_output_workspace,
                                            output_matrix_stride, &_output_nhwc,
                                            in_shape.n_batches, output_shape.n_rows, output_shape.n_cols, out_channels);
@@ -483,16 +484,16 @@ void NEWinogradConvolutionLayer::configure(const ITensor *input, const ITensor *
                                            in_shape.n_batches, output_shape.n_rows, output_shape.n_cols, out_channels);
     }
 
-    _weights_hwio.allocator()->allocate();
     _gemm_function.configure(&_input_workspace, &_kernel_storage, nullptr, &_output_workspace, 1.0f, 0.f);
     _input_workspace.allocator()->allocate();
-    _kernel_storage.allocator()->allocate();
     _output_workspace.allocator()->allocate();
 
     // Reorder the convoluted output to ACL's ordering NCHW
-    _permute_output.configure(&_output_nhwc, _output, PermutationVector(1U, 2U, 0U));
-
-    _output_nhwc.allocator()->allocate();
+    if(data_layout == DataLayout::NCHW)
+    {
+        _permute_output.configure(&_output_nhwc, _output, PermutationVector(1U, 2U, 0U));
+        _output_nhwc.allocator()->allocate();
+    }
 
     _transform_input_kernel   = std::move(transform_input_kernel);
     _transform_weights_kernel = std::move(transform_weights_kernel);
@@ -656,10 +657,12 @@ void NEWinogradConvolutionLayer::prepare()
     if(!_is_prepared)
     {
         // Permute weights
+        _weights_hwio.allocator()->allocate();
         _permute_weights.run();
         _weights->mark_as_unused();
 
         // Transform weights
+        _kernel_storage.allocator()->allocate();
         NEScheduler::get().schedule(_transform_weights_kernel.get(), Window::DimX);
 
         _weights_hwio.allocator()->free();
diff --git a/src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp b/src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp
index c87e82afb8..dcb2f856f0 100644
--- a/src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp
+++ b/src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp
@@ -53,6 +53,7 @@ void NEGEMMInterleavedWrapper::prepare()
     {
         if(_pretranspose_b)
         {
+            _transformed_b.allocator()->allocate();
             NEScheduler::get().schedule(_prepare_b.get(), Window::DimX);
             _b->mark_as_unused();
         }
@@ -264,6 +265,9 @@ void NEGEMMInterleavedWrapper::configure(const ITensor *a, const ITensor *b, ITe
     ARM_COMPUTE_ERROR_ON(_matrix_multiply == nullptr);
     _transformed_a.allocator()->allocate();
     _tmp_c.allocator()->allocate();
-    _transformed_b.allocator()->allocate();
+    if(!_pretranspose_b)
+    {
+        _transformed_b.allocator()->allocate();
+    }
 }
 } // namespace arm_compute
author	Georgios Pinitas <georgios.pinitas@arm.com>	2018-11-22 19:38:27 +0000
committer	Georgios Pinitas <georgios.pinitas@arm.com>	2018-11-23 14:04:27 +0000
commit	ca1250d548961f43ecf8499a78b5cc0c9609a37a (patch)
tree	14a5ad99569819091e861595a1dc0dd592ed98be /src/runtime
parent	fea8ec3da3afd0aee3b9c228f46e7dbd52e7de2b (diff)
download	ComputeLibrary-ca1250d548961f43ecf8499a78b5cc0c9609a37a.tar.gz