COMPMID-1145: (API) Introduce prepare() stage (NEON/CL/GLES)

Change-Id: I5b46764f9c3154ec3e3b9c951cc9e6dfbcb81dfb Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/134255 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Pablo Tello <pablo.tello@arm.com> Reviewed-by: Michele DiGiorgio <michele.digiorgio@arm.com>
author: Georgios Pinitas <georgios.pinitas@arm.com> 2018-06-05 14:56:06 +0100
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-11-02 16:53:09 +0000
commit: 72219330fd85b1271e714d4ba894d6d8e26340c9 (patch)
tree: 9ae0510087a1ca77b1695252a8621de3f2ab98af /src/runtime/GLES_COMPUTE
parent: c42f28d45e9b990276d54880d2cee9c9ee675a41 (diff)
download: ComputeLibrary-72219330fd85b1271e714d4ba894d6d8e26340c9.tar.gz
3 files changed, 79 insertions, 43 deletions
diff --git a/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp
index d1ef87d32c..67b2ae9d61 100644
--- a/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp
+++ b/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp
@@ -37,7 +37,7 @@
 using namespace arm_compute;
 
 GCConvolutionLayerReshapeWeights::GCConvolutionLayerReshapeWeights()
-    : _weights_reshape_kernel(), _weights_reshaped()
+    : _weights_reshape_kernel()
 {
 }
 
@@ -68,7 +68,7 @@ void GCConvolutionLayerReshapeWeights::run()
 
 GCConvolutionLayer::GCConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
     : _memory_group(std::move(memory_manager)), _reshape_weights(), _input_im2col_kernel(), _mm_gemm(), _output_col2im_kernel(), _fill_border(), _activationlayer_function(), _original_weights(nullptr),
-      _input_im2col_reshaped(), _input_interleaved_reshaped(), _weights_reshaped(), _weights_transposed(), _gemm_output(), _tmp_output(), _is_first_run(true), _is_activationlayer_enabled(false)
+      _input_im2col_reshaped(), _input_interleaved_reshaped(), _weights_reshaped(), _weights_transposed(), _gemm_output(), _tmp_output(), _is_activationlayer_enabled(false), _is_prepared(false)
 {
 }
 
@@ -97,7 +97,7 @@ void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weig
     ARM_COMPUTE_ERROR_ON(weights->info()->dimension(2) != input->info()->dimension(2));
     ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 4);
 
-    _is_first_run     = true;
+    _is_prepared      = false;
     _original_weights = weights;
 
     if(biases != nullptr)
@@ -184,9 +184,6 @@ void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weig
 
     ARM_COMPUTE_ERROR_ON_MSG((output->info()->dimension(0) != conv_w) || (output->info()->dimension(1) != conv_h), "Output shape does not match the expected one");
 
-    // Allocate intermediate tensor
-    _weights_reshaped.allocator()->allocate();
-
     //Configure Activation Layer
     _is_activationlayer_enabled = act_info.enabled();
 
@@ -200,17 +197,7 @@ void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weig
 
 void GCConvolutionLayer::run()
 {
-    // Run weights reshaping (Runs once for every configure)
-    if(_is_first_run)
-    {
-        ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
-
-        _reshape_weights.run();
-        _is_first_run = false;
-
-        // Mark original weights tensor as unused
-        _original_weights->mark_as_unused();
-    }
+    prepare();
 
     _memory_group.acquire();
 
@@ -221,17 +208,34 @@ void GCConvolutionLayer::run()
 
     // Run gemm on reshaped matrices
     _mm_gemm.run();
-
     GCScheduler::get().memory_barrier();
+
     // Reshape output matrix
     GCScheduler::get().dispatch(_output_col2im_kernel, false);
+    GCScheduler::get().memory_barrier();
 
     _memory_group.release();
 
-    GCScheduler::get().memory_barrier();
     // Run Activation Layer
     if(_is_activationlayer_enabled)
     {
         _activationlayer_function.run();
     }
 }
+
+void GCConvolutionLayer::prepare()
+{
+    if(!_is_prepared)
+    {
+        ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
+
+        // Run weights reshaping and mark as unused
+        _weights_reshaped.allocator()->allocate();
+        _reshape_weights.run();
+
+        // Mark original weights tensor as unused
+        _original_weights->mark_as_unused();
+
+        _is_prepared = true;
+    }
+}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp
index a300033bb2..ab2c6c2813 100644
--- a/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp
+++ b/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp
@@ -40,7 +40,7 @@ void GCFullyConnectedLayerReshapeWeights::configure(const IGCTensor *input, IGCT
 
 GCFullyConnectedLayer::GCFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager)
     : _memory_group(std::move(memory_manager)), _im2col_kernel(), _reshape_weights_kernel(), _mm_kernel(), _accumulate_biases_kernel(), _im2col_output(), _reshape_weights_output(),
-      _are_weights_reshaped(true), _is_fc_after_conv(true), _accumulate_biases(false)
+      _original_weights(nullptr), _are_weights_reshaped(true), _is_fc_after_conv(true), _accumulate_biases(false)
 {
 }
 
@@ -86,6 +86,7 @@ void GCFullyConnectedLayer::configure(const IGCTensor *input, const IGCTensor *w
     ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output);
     ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 2);
 
+    _original_weights     = weights;
     _are_weights_reshaped = transpose_weights ? are_weights_reshaped : true;
     _is_fc_after_conv     = true;
     _accumulate_biases    = false;
@@ -141,25 +142,13 @@ void GCFullyConnectedLayer::configure(const IGCTensor *input, const IGCTensor *w
         configure_fc_fc(input, weights_to_use, output);
     }
 
-    // Allocate the transpose tensor if the are_weights_reshaped flag is false and once all the configure methods have been called
-    if(!_are_weights_reshaped && !retain_internal_weights)
-    {
-        // Allocate the tensor for the weights reshaped
-        _reshape_weights_output.allocator()->allocate();
-    }
-
     ARM_COMPUTE_ERROR_ON(retain_internal_weights && _reshape_weights_output.gc_buffer() == 0);
     _are_weights_reshaped = _are_weights_reshaped || retain_internal_weights;
 }
 
 void GCFullyConnectedLayer::run()
 {
-    // Reshape of the weights (happens only once)
-    if(!_are_weights_reshaped)
-    {
-        _are_weights_reshaped = true;
-        _reshape_weights_kernel.run();
-    }
+    prepare();
 
     _memory_group.acquire();
 
@@ -187,3 +176,21 @@ void GCFullyConnectedLayer::run()
 
     _memory_group.release();
 }
+
+void GCFullyConnectedLayer::prepare()
+{
+    // Reshape of the weights (happens only once)
+    if(!_are_weights_reshaped)
+    {
+        ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
+
+        // Run reshape weights kernel and mark weights as unused
+        _reshape_weights_output.allocator()->allocate();
+        _reshape_weights_kernel.run();
+
+        // Mark original weights tensor as unused
+        _original_weights->mark_as_unused();
+
+        _are_weights_reshaped = true;
+    }
+}
+\ No newline at end of file
diff --git a/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp b/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp
index 79f8f71713..8ae91ee82c 100644
--- a/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp
+++ b/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp
@@ -73,8 +73,8 @@ Status validate_arguments(const ITensorInfo *a, const ITensorInfo *b, const IGCT
 } // namespace
 
 GCGEMM::GCGEMM(std::shared_ptr<IMemoryManager> memory_manager)
-    : _memory_group(std::move(memory_manager)), _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _ma_kernel(), _tmp_a(), _tmp_b(), _is_interleaved_transposed(false), _run_addition(false),
-      _is_first_run(true), _reshape_b_only_on_first_run(false)
+    : _memory_group(std::move(memory_manager)), _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _ma_kernel(), _tmp_a(), _tmp_b(), _original_b(nullptr), _is_interleaved_transposed(false),
+      _run_addition(false), _reshape_b_only_on_first_run(false), _is_prepared(false)
 {
 }
 
@@ -87,6 +87,8 @@ void GCGEMM::configure(const IGCTensor *a, const IGCTensor *b, const IGCTensor *
 
     // Check if we need to reshape the matrix B only on the first run
     _reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run();
+    _is_prepared                 = false;
+    _original_b                  = b;
 
     const IGCTensor *matrix_a = a;
     const IGCTensor *matrix_b = b;
@@ -136,7 +138,10 @@ void GCGEMM::configure(const IGCTensor *a, const IGCTensor *b, const IGCTensor *
     {
         // Allocate intermediate tensors
         _tmp_a.allocator()->allocate();
-        _tmp_b.allocator()->allocate();
+        if(!_reshape_b_only_on_first_run)
+        {
+            _tmp_b.allocator()->allocate();
+        }
     }
 
     // Configure matrix addition kernel
@@ -155,23 +160,21 @@ Status GCGEMM::validate(const ITensorInfo *a, const ITensorInfo *b, const IGCTen
 
 void GCGEMM::run()
 {
+    prepare();
+
     _memory_group.acquire();
+
     if(_is_interleaved_transposed)
     {
         // Run interleave kernel
         GCScheduler::get().dispatch(_interleave_kernel, false);
 
-        if(_is_first_run)
-        {
-            // Run transpose kernel
-            GCScheduler::get().dispatch(_transpose_kernel, false);
-            _is_first_run = false;
-        }
-        else if(!_reshape_b_only_on_first_run)
+        if(!_reshape_b_only_on_first_run)
         {
             // Run transpose kernel
             GCScheduler::get().dispatch(_transpose_kernel, false);
         }
+
         GCScheduler::get().memory_barrier();
     }
 
@@ -184,5 +187,27 @@ void GCGEMM::run()
         GCScheduler::get().memory_barrier();
         GCScheduler::get().dispatch(_ma_kernel);
     }
+
     _memory_group.release();
 }
+
+void GCGEMM::prepare()
+{
+    if(!_is_prepared)
+    {
+        if(_is_interleaved_transposed && _reshape_b_only_on_first_run)
+        {
+            ARM_COMPUTE_ERROR_ON(!_original_b->is_used());
+
+            // Run transpose kernel
+            _tmp_b.allocator()->allocate();
+            GCScheduler::get().dispatch(_transpose_kernel, false);
+            GCScheduler::get().memory_barrier();
+
+            // Mark original weights tensor as unused
+            _original_b->mark_as_unused();
+        }
+
+        _is_prepared = true;
+    }
+}
author	Georgios Pinitas <georgios.pinitas@arm.com>	2018-06-05 14:56:06 +0100
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-11-02 16:53:09 +0000
commit	72219330fd85b1271e714d4ba894d6d8e26340c9 (patch)
tree	9ae0510087a1ca77b1695252a8621de3f2ab98af /src/runtime/GLES_COMPUTE
parent	c42f28d45e9b990276d54880d2cee9c9ee675a41 (diff)
download	ComputeLibrary-72219330fd85b1271e714d4ba894d6d8e26340c9.tar.gz