From 3f217ec4ff11e20fe686beb9a28d0bbd80a56cd6 Mon Sep 17 00:00:00 2001
From: Isabella Gottardi <isabella.gottardi@arm.com>
Date: Mon, 12 Feb 2018 14:59:19 +0000
Subject: COMPMID-908 - Merge Activation layer with Convolution Layer (NEON.
 CL, GLES)

Change-Id: Iab06d0768ecf805b841e601185608aae88cf9166
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/120874
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
---
 .../GLES_COMPUTE/functions/GCConvolutionLayer.cpp  | 22 +++++++++++++++++++---
 .../functions/GCDirectConvolutionLayer.cpp         | 11 +++++++----
 2 files changed, 26 insertions(+), 7 deletions(-)

(limited to 'src/runtime/GLES_COMPUTE/functions')
diff --git a/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp
index c2b7e02284..b1c8665216 100644
--- a/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp
+++ b/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp
@@ -92,8 +92,9 @@ void GCConvolutionLayerReshapeWeights::run()
 }
 
 GCConvolutionLayer::GCConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
-    : _memory_group(std::move(memory_manager)), _reshape_weights(), _input_im2col_kernel(), _input_interleave_kernel(), _mm_kernel(), _output_col2im_kernel(), _fill_border(), _input_im2col_reshaped(),
-      _input_interleaved_reshaped(), _weights_reshaped(), _weights_transposed(), _gemm_output(), _tmp_output(), _append_bias(false), _is_fully_connected_convolution(false), _are_weights_reshaped(false)
+    : _memory_group(std::move(memory_manager)), _reshape_weights(), _input_im2col_kernel(), _input_interleave_kernel(), _mm_kernel(), _output_col2im_kernel(), _fill_border(), _activationlayer_function(),
+      _input_im2col_reshaped(), _input_interleaved_reshaped(), _weights_reshaped(), _weights_transposed(), _gemm_output(), _tmp_output(), _append_bias(false), _is_fully_connected_convolution(false),
+      _are_weights_reshaped(false), _is_activationlayer_enabled(false)
 {
 }
 
@@ -103,7 +104,7 @@ void GCConvolutionLayer::configure_mm(const IGCTensor *input, const IGCTensor *w
 }
 
 void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
-                                   const Size2D &dilation)
+                                   const Size2D &dilation, const ActivationLayerInfo &act_info)
 {
     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
     ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
@@ -256,6 +257,14 @@ void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weig
     {
         _weights_reshaped.allocator()->allocate();
     }
+
+    //Configure Activation Layer
+    _is_activationlayer_enabled = act_info.enabled();
+
+    if(_is_activationlayer_enabled)
+    {
+        _activationlayer_function.configure(output, nullptr, act_info);
+    }
 }
 
 void GCConvolutionLayer::run()
@@ -290,4 +299,11 @@ void GCConvolutionLayer::run()
     GCScheduler::get().dispatch(_output_col2im_kernel, false);
 
     _memory_group.release();
+
+    GCScheduler::get().memory_barrier();
+    // Run Activation Layer
+    if(_is_activationlayer_enabled)
+    {
+        _activationlayer_function.run();
+    }
 }
diff --git a/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp
index a2607d4c2d..c0cf09836f 100644
--- a/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp
+++ b/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp
@@ -39,26 +39,27 @@ GCDirectConvolutionLayer::GCDirectConvolutionLayer()
 {
 }
 
-void GCDirectConvolutionLayer::configure(IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info)
+void GCDirectConvolutionLayer::configure(IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info,
+                                         const ActivationLayerInfo &act_info)
 {
     int kernel_size = weights->info()->dimension(0);
 
     if(kernel_size == 1)
     {
         auto k = arm_compute::support::cpp14::make_unique<GCDirectConvolutionLayer1x1Kernel>();
-        k->configure(input, weights, biases, output, conv_info);
+        k->configure(input, weights, biases, output, conv_info, act_info);
         _kernel = std::move(k);
     }
     else if(kernel_size == 3)
     {
         auto k = arm_compute::support::cpp14::make_unique<GCDirectConvolutionLayer3x3Kernel>();
-        k->configure(input, weights, biases, output, conv_info);
+        k->configure(input, weights, biases, output, conv_info, act_info);
         _kernel = std::move(k);
     }
     else if(kernel_size == 5)
     {
         auto k = arm_compute::support::cpp14::make_unique<GCDirectConvolutionLayer5x5Kernel>();
-        k->configure(input, weights, biases, output, conv_info);
+        k->configure(input, weights, biases, output, conv_info, act_info);
         _kernel = std::move(k);
     }
     else
@@ -79,4 +80,6 @@ void GCDirectConvolutionLayer::run()
     GCScheduler::get().dispatch(_border_handler, false);
     GCScheduler::get().memory_barrier();
     GCScheduler::get().dispatch(*_kernel);
+    GCScheduler::get().memory_barrier();
+    GCScheduler::get().dispatch(_shift_handler);
 }
-- 
cgit v1.2.1