COMPMID-908 - Merge Activation layer with Convolution Layer (NEON. CL, GLES)

Change-Id: Iab06d0768ecf805b841e601185608aae88cf9166 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/120874 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
author: Isabella Gottardi <isabella.gottardi@arm.com> 2018-02-12 14:59:19 +0000
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-11-02 16:49:16 +0000
commit: 3f217ec4ff11e20fe686beb9a28d0bbd80a56cd6 (patch)
tree: 81db8baab925af5b416b66d0328be2eb49543824 /src/runtime
parent: d9eb27597eabe5b7c17520f4f9b3f8a282d72573 (diff)
download: ComputeLibrary-3f217ec4ff11e20fe686beb9a28d0bbd80a56cd6.tar.gz
10 files changed, 202 insertions, 58 deletions
diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp
index 64bda93ff0..bcb5424aab 100644
--- a/src/runtime/CL/functions/CLConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp
@@ -43,13 +43,13 @@ CLConvolutionLayer::CLConvolutionLayer(std::shared_ptr<IMemoryManager> memory_ma
 }
 
 void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
-                                   const Size2D &dilation)
+                                   const Size2D &dilation, const ActivationLayerInfo &act_info)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
-    ARM_COMPUTE_ERROR_THROW_ON(CLConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation));
+    ARM_COMPUTE_ERROR_THROW_ON(CLConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation, act_info));
 
     switch(CLConvolutionLayer::get_convolution_method(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info,
-                                                      weights_info, CLScheduler::get().target(), dilation))
+                                                      weights_info, act_info, CLScheduler::get().target(), dilation))
     {
         case ConvolutionMethod::DIRECT:
         {
@@ -72,25 +72,25 @@ void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, c
 }
 
 Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
-                                    const WeightsInfo &weights_info, const Size2D &dilation)
+                                    const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
 
     //Configure if the parameters match the direct convolution or the gemm-based
     const GPUTarget gpu_target = CLScheduler::get().target();
 
-    switch(CLConvolutionLayer::get_convolution_method(input, weights, biases, output, conv_info, weights_info, gpu_target, dilation))
+    switch(CLConvolutionLayer::get_convolution_method(input, weights, biases, output, conv_info, weights_info, act_info, gpu_target, dilation))
     {
         case ConvolutionMethod::DIRECT:
         {
             // Validate direct convolution layer
-            CLDirectConvolutionLayer::validate(input, weights, biases, output, conv_info);
+            CLDirectConvolutionLayer::validate(input, weights, biases, output, conv_info, act_info);
             break;
         }
         case ConvolutionMethod::GEMM:
         {
             // Validate gemm-based convolution layer
-            CLGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info, dilation);
+            CLGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info, dilation, act_info);
             break;
         }
         default:
@@ -102,7 +102,7 @@ Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo
 }
 
 ConvolutionMethod CLConvolutionLayer::get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
-                                                             const WeightsInfo &weights_info, const GPUTarget gpu_target, const Size2D &dilation)
+                                                             const WeightsInfo &weights_info, const ActivationLayerInfo &act_info, const GPUTarget gpu_target, const Size2D &dilation)
 {
     ARM_COMPUTE_UNUSED(input);
     ARM_COMPUTE_UNUSED(weights);
@@ -112,6 +112,7 @@ ConvolutionMethod CLConvolutionLayer::get_convolution_method(const ITensorInfo *
     ARM_COMPUTE_UNUSED(weights_info);
     ARM_COMPUTE_UNUSED(gpu_target);
     ARM_COMPUTE_UNUSED(dilation);
+    ARM_COMPUTE_UNUSED(act_info);
 
     return ConvolutionMethod::GEMM;
 }
diff --git a/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp b/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp
index c48865a0cc..c451bd4b4c 100644
--- a/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp
@@ -33,11 +33,11 @@
 using namespace arm_compute;
 
 CLDirectConvolutionLayer::CLDirectConvolutionLayer()
-    : _direct_conv_kernel(), _input_border_handler()
+    : _direct_conv_kernel(), _input_border_handler(), _activationlayer_function(), _is_activationlayer_enabled(false)
 {
 }
 
-void CLDirectConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info)
+void CLDirectConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
 {
     // Set GPU target
     _direct_conv_kernel.set_target(CLScheduler::get().target());
@@ -55,11 +55,25 @@ void CLDirectConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weig
 
     // Tune kernels
     CLScheduler::get().tune_kernel_static(_direct_conv_kernel);
+
+    _is_activationlayer_enabled = act_info.enabled();
+
+    //Configure Activation Layer
+    if(_is_activationlayer_enabled)
+    {
+        _activationlayer_function.configure(output, nullptr, act_info);
+    }
 }
 
-Status CLDirectConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info)
+Status CLDirectConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
+                                          const ActivationLayerInfo &act_info)
 {
-    return CLDirectConvolutionLayerKernel::validate(input, weights, biases, output, conv_info, CLScheduler::get().target());
+    ARM_COMPUTE_RETURN_ON_ERROR(CLDirectConvolutionLayerKernel::validate(input, weights, biases, output, conv_info, CLScheduler::get().target()));
+    if(act_info.enabled())
+    {
+        ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(output, nullptr, act_info));
+    }
+    return Status{};
 }
 
 void CLDirectConvolutionLayer::run()
@@ -69,4 +83,10 @@ void CLDirectConvolutionLayer::run()
 
     // Run direct convolution
     CLScheduler::get().enqueue(_direct_conv_kernel);
+
+    //Run Activation Layer
+    if(_is_activationlayer_enabled)
+    {
+        _activationlayer_function.run();
+    }
 }
diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
index f43e100565..084c4df718 100644
--- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
@@ -90,8 +90,8 @@ void CLConvolutionLayerReshapeWeights::run()
 }
 
 CLGEMMConvolutionLayer::CLGEMMConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
-    : _memory_group(memory_manager), _reshape_weights(), _im2col_kernel(), _mm_gemm(memory_manager), _mm_gemmlowp(memory_manager), _gemmlowp_output_stage(), _col2im_kernel(), _original_weights(nullptr),
-      _im2col_output(), _weights_reshaped(), _gemm_output(), _tmp_output(), _is_quantized(false), _is_first_run(true)
+    : _memory_group(memory_manager), _reshape_weights(), _im2col_kernel(), _mm_gemm(memory_manager), _mm_gemmlowp(memory_manager), _gemmlowp_output_stage(), _col2im_kernel(), _activationlayer_function(),
+      _original_weights(nullptr), _im2col_output(), _weights_reshaped(), _gemm_output(), _tmp_output(), _is_quantized(false), _is_first_run(true), _is_activationlayer_enabled(false)
 {
 }
 
@@ -152,7 +152,7 @@ Status CLGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITens
 }
 
 void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
-                                       const Size2D &dilation)
+                                       const Size2D &dilation, const ActivationLayerInfo &act_info)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
 
@@ -162,7 +162,8 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *
                                                                 output->info(),
                                                                 conv_info,
                                                                 weights_info,
-                                                                dilation));
+                                                                dilation,
+                                                                act_info));
 
     _is_first_run     = true;
     _original_weights = weights;
@@ -260,11 +261,19 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *
     // Allocate intermediate tensor
     _weights_reshaped.allocator()->allocate();
 
+    //Configure Activation Layer
+    _is_activationlayer_enabled = act_info.enabled();
+
+    if(_is_activationlayer_enabled)
+    {
+        _activationlayer_function.configure(output, nullptr, act_info);
+    }
+
     ARM_COMPUTE_UNUSED(weights_info);
 }
 
 Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
-                                        const WeightsInfo &weights_info, const Size2D &dilation)
+                                        const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights_info.are_reshaped(), "Weights already reshaped are not supported!");
@@ -274,6 +283,11 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
     ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(2) != input->dimension(2));
     ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4);
 
+    if(act_info.enabled())
+    {
+        ARM_COMPUTE_ERROR_ON(act_info.b() > act_info.a());
+    }
+
     const bool     is_quantized = is_data_type_quantized_asymmetric(input->data_type());
     const bool     append_bias  = (biases != nullptr) && (!is_quantized);
     const unsigned bias_element = (append_bias) ? 1 : 0;
@@ -343,6 +357,12 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
         ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
     }
 
+    //Validate Activation Layer
+    if(act_info.enabled())
+    {
+        CLActivationLayer::validate(output, nullptr, act_info);
+    }
+
     return Status{};
 }
 
@@ -383,5 +403,11 @@ void CLGEMMConvolutionLayer::run()
     // Reshape output matrix
     CLScheduler::get().enqueue(_col2im_kernel, false);
 
+    //Run Activation Layer if enabled
+    if(_is_activationlayer_enabled)
+    {
+        _activationlayer_function.run();
+    }
+
     _memory_group.release();
 }
diff --git a/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp b/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp
index a861e0072e..7af36bf06b 100644
--- a/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp
@@ -32,11 +32,12 @@
 using namespace arm_compute;
 
 CLWinogradConvolutionLayer::CLWinogradConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
-    : _memory_group(memory_manager), _batched_mm(memory_manager), _input_transform(), _filter_transform(), _output_transform(), _input0(), _input1(), _batched_mm_output(), _is_first_run(true)
+    : _memory_group(memory_manager), _batched_mm(memory_manager), _input_transform(), _filter_transform(), _output_transform(), _activationlayer_function(), _input0(), _input1(), _batched_mm_output(),
+      _is_first_run(true), _is_activationlayer_enabled(false)
 {
 }
 
-void CLWinogradConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info)
+void CLWinogradConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
 {
     // TODO(COMPMID-1013): This part will be removed
     // Get indeces for the width and height
@@ -73,13 +74,21 @@ void CLWinogradConvolutionLayer::configure(ICLTensor *input, const ICLTensor *we
     _output_transform.configure(&_batched_mm_output, biases, output, Size2D(kernel_w, kernel_h), Size2D(output_convolved_shape[idx_width], output_convolved_shape[idx_height]), Size2D(num_tiles_x,
                                 num_tiles_y));
 
+    // Configure activation layer
+    _is_activationlayer_enabled = act_info.enabled();
+    if(_is_activationlayer_enabled)
+    {
+        _activationlayer_function.configure(output, nullptr, act_info);
+    }
+
     // Allocate temporary tensors
     _input0.allocator()->allocate();
     _input1.allocator()->allocate();
     _batched_mm_output.allocator()->allocate();
 }
 
-Status CLWinogradConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info)
+Status CLWinogradConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
+                                            const ActivationLayerInfo &act_info)
 {
     // TODO(COMPMID-1013): This part will be removed
     // Get indeces for the width and height
@@ -107,17 +116,23 @@ Status CLWinogradConvolutionLayer::validate(const ITensorInfo *input, const ITen
     const TensorInfo  input1       = weights->clone()->set_tensor_shape(input1_shape);
     ARM_COMPUTE_RETURN_ON_ERROR(CLWinogradFilterTransformKernel::validate(weights, &input1, Size2D(2U, 2U)));
 
-    // Configure batched matrix multiply
+    // Validate batched matrix multiply
     TensorShape batched_mm_output_shape = input0.tensor_shape();
     batched_mm_output_shape[0]          = input1.tensor_shape()[0];
     const TensorInfo batched_mm_output  = input0.clone()->set_tensor_shape(batched_mm_output_shape);
     ARM_COMPUTE_RETURN_ON_ERROR(CLGEMM::validate(&input0, &input1, nullptr, &batched_mm_output, 1.0f, 0.0f, GEMMInfo(false, false, true /* Reshape weights only for the first run*/)));
 
-    // Configure output transform
+    // Validate output transform
     ARM_COMPUTE_RETURN_ON_ERROR(CLWinogradOutputTransformKernel::validate(&batched_mm_output, biases, output, Size2D(kernel_w, kernel_h), Size2D(output_convolved_shape[idx_width],
                                                                           output_convolved_shape[idx_height]),
                                                                           Size2D(num_tiles_x, num_tiles_y)));
 
+    // Validate Activation Layer
+    if(act_info.enabled())
+    {
+        CLActivationLayer::validate(output, nullptr, act_info);
+    }
+
     return Status{};
 }
 
@@ -142,5 +157,10 @@ void CLWinogradConvolutionLayer::run()
     // Run output transform
     CLScheduler::get().enqueue(_output_transform);
 
+    if(_is_activationlayer_enabled)
+    {
+        _activationlayer_function.run();
+    }
+
     _memory_group.release();
 }
diff --git a/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp
index c2b7e02284..b1c8665216 100644
--- a/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp
+++ b/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp
@@ -92,8 +92,9 @@ void GCConvolutionLayerReshapeWeights::run()
 }
 
 GCConvolutionLayer::GCConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
-    : _memory_group(std::move(memory_manager)), _reshape_weights(), _input_im2col_kernel(), _input_interleave_kernel(), _mm_kernel(), _output_col2im_kernel(), _fill_border(), _input_im2col_reshaped(),
-      _input_interleaved_reshaped(), _weights_reshaped(), _weights_transposed(), _gemm_output(), _tmp_output(), _append_bias(false), _is_fully_connected_convolution(false), _are_weights_reshaped(false)
+    : _memory_group(std::move(memory_manager)), _reshape_weights(), _input_im2col_kernel(), _input_interleave_kernel(), _mm_kernel(), _output_col2im_kernel(), _fill_border(), _activationlayer_function(),
+      _input_im2col_reshaped(), _input_interleaved_reshaped(), _weights_reshaped(), _weights_transposed(), _gemm_output(), _tmp_output(), _append_bias(false), _is_fully_connected_convolution(false),
+      _are_weights_reshaped(false), _is_activationlayer_enabled(false)
 {
 }
 
@@ -103,7 +104,7 @@ void GCConvolutionLayer::configure_mm(const IGCTensor *input, const IGCTensor *w
 }
 
 void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
-                                   const Size2D &dilation)
+                                   const Size2D &dilation, const ActivationLayerInfo &act_info)
 {
     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
     ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
@@ -256,6 +257,14 @@ void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weig
     {
         _weights_reshaped.allocator()->allocate();
     }
+
+    //Configure Activation Layer
+    _is_activationlayer_enabled = act_info.enabled();
+
+    if(_is_activationlayer_enabled)
+    {
+        _activationlayer_function.configure(output, nullptr, act_info);
+    }
 }
 
 void GCConvolutionLayer::run()
@@ -290,4 +299,11 @@ void GCConvolutionLayer::run()
     GCScheduler::get().dispatch(_output_col2im_kernel, false);
 
     _memory_group.release();
+
+    GCScheduler::get().memory_barrier();
+    // Run Activation Layer
+    if(_is_activationlayer_enabled)
+    {
+        _activationlayer_function.run();
+    }
 }
diff --git a/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp
index a2607d4c2d..c0cf09836f 100644
--- a/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp
+++ b/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp
@@ -39,26 +39,27 @@ GCDirectConvolutionLayer::GCDirectConvolutionLayer()
 {
 }
 
-void GCDirectConvolutionLayer::configure(IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info)
+void GCDirectConvolutionLayer::configure(IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info,
+                                         const ActivationLayerInfo &act_info)
 {
     int kernel_size = weights->info()->dimension(0);
 
     if(kernel_size == 1)
     {
         auto k = arm_compute::support::cpp14::make_unique<GCDirectConvolutionLayer1x1Kernel>();
-        k->configure(input, weights, biases, output, conv_info);
+        k->configure(input, weights, biases, output, conv_info, act_info);
         _kernel = std::move(k);
     }
     else if(kernel_size == 3)
     {
         auto k = arm_compute::support::cpp14::make_unique<GCDirectConvolutionLayer3x3Kernel>();
-        k->configure(input, weights, biases, output, conv_info);
+        k->configure(input, weights, biases, output, conv_info, act_info);
         _kernel = std::move(k);
     }
     else if(kernel_size == 5)
     {
         auto k = arm_compute::support::cpp14::make_unique<GCDirectConvolutionLayer5x5Kernel>();
-        k->configure(input, weights, biases, output, conv_info);
+        k->configure(input, weights, biases, output, conv_info, act_info);
         _kernel = std::move(k);
     }
     else
@@ -79,4 +80,6 @@ void GCDirectConvolutionLayer::run()
     GCScheduler::get().dispatch(_border_handler, false);
     GCScheduler::get().memory_barrier();
     GCScheduler::get().dispatch(*_kernel);
+    GCScheduler::get().memory_barrier();
+    GCScheduler::get().dispatch(_shift_handler);
 }
diff --git a/src/runtime/NEON/functions/NEConvolutionLayer.cpp b/src/runtime/NEON/functions/NEConvolutionLayer.cpp
index e659495b7c..badeb07405 100644
--- a/src/runtime/NEON/functions/NEConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEConvolutionLayer.cpp
@@ -41,33 +41,33 @@ NEConvolutionLayer::NEConvolutionLayer(std::shared_ptr<IMemoryManager> memory_ma
 }
 
 void NEConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
-                                   const Size2D &dilation)
+                                   const Size2D &dilation, const ActivationLayerInfo &act_info)
 {
     // Perform validate step
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
-    ARM_COMPUTE_ERROR_THROW_ON(NEConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation));
+    ARM_COMPUTE_ERROR_THROW_ON(NEConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation, act_info));
 
     switch(NEConvolutionLayer::get_convolution_method(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info,
-                                                      weights_info, dilation))
+                                                      weights_info, dilation, act_info))
     {
         case ConvolutionMethod::WINOGRAD:
         {
             auto f = arm_compute::support::cpp14::make_unique<NEWinogradLayer>(_memory_manager);
-            f->configure(input, weights, biases, output, conv_info);
+            f->configure(input, weights, biases, output, conv_info, act_info);
             _function = std::move(f);
             break;
         }
         case ConvolutionMethod::GEMM:
         {
             auto f = arm_compute::support::cpp14::make_unique<NEGEMMConvolutionLayer>(_memory_manager);
-            f->configure(input, weights, biases, output, conv_info, weights_info, dilation);
+            f->configure(input, weights, biases, output, conv_info, weights_info, dilation, act_info);
             _function = std::move(f);
             break;
         }
         case ConvolutionMethod::DIRECT:
         {
             auto f = arm_compute::support::cpp14::make_unique<NEDirectConvolutionLayer>(_memory_manager);
-            f->configure(input, weights, biases, output, conv_info);
+            f->configure(input, weights, biases, output, conv_info, act_info);
             _function = std::move(f);
             break;
         }
@@ -78,9 +78,9 @@ void NEConvolutionLayer::configure(ITensor *input, const ITensor *weights, const
 }
 
 Status NEConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
-                                    const WeightsInfo &weights_info, const Size2D &dilation)
+                                    const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info)
 {
-    switch(NEConvolutionLayer::get_convolution_method(input, weights, biases, output, conv_info, weights_info, dilation))
+    switch(NEConvolutionLayer::get_convolution_method(input, weights, biases, output, conv_info, weights_info, dilation, act_info))
     {
         case ConvolutionMethod::WINOGRAD:
             //Validate Winograd
@@ -88,11 +88,11 @@ Status NEConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo
             break;
         case ConvolutionMethod::GEMM:
             //Validate Gemm-based Convolution
-            NEGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info, dilation);
+            NEGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info, dilation, act_info);
             break;
         case ConvolutionMethod::DIRECT:
             //Validate Gemm-based Convolution
-            NEDirectConvolutionLayer::validate(input, weights, biases, output, conv_info);
+            NEDirectConvolutionLayer::validate(input, weights, biases, output, conv_info, act_info);
         default:
             ARM_COMPUTE_ERROR("Not supported.");
             break;
@@ -102,10 +102,12 @@ Status NEConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo
 }
 
 ConvolutionMethod NEConvolutionLayer::get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
-                                                             const WeightsInfo &weights_info, const Size2D &dilation)
+                                                             const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info)
 {
     ARM_COMPUTE_UNUSED(output);
     ARM_COMPUTE_UNUSED(weights_info);
+    ARM_COMPUTE_UNUSED(act_info);
+
     if((input->data_type() == DataType::F32) && (weights->dimension(0) == 3) && (weights->dimension(1) == 3) && (weights->num_dimensions() <= 4) && (conv_info.stride().first == 1)
        && (conv_info.stride().second == 1) && (biases != nullptr) && (dilation == Size2D(1U, 1U)))
     {
diff --git a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
index c26c99a0f8..00776d7cf6 100644
--- a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -34,11 +34,12 @@
 using namespace arm_compute;
 
 NEDirectConvolutionLayer::NEDirectConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
-    : _memory_group(std::move(memory_manager)), _output_stage_kernel(), _conv_kernel(), _input_border_handler(), _accumulator(), _has_bias(false), _is_fixed_point(false)
+    : _memory_group(std::move(memory_manager)), _output_stage_kernel(), _conv_kernel(), _input_border_handler(), _activationlayer_function(), _accumulator(), _has_bias(false), _is_fixed_point(false),
+      _is_activationlayer_enabled(false)
 {
 }
 
-void NEDirectConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &conv_info)
+void NEDirectConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
 {
     // Free accumulator
     if(_accumulator.buffer() != nullptr)
@@ -73,9 +74,17 @@ void NEDirectConvolutionLayer::configure(ITensor *input, const ITensor *weights,
 
     // Add zero padding XY
     _input_border_handler.configure(input, _conv_kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast<float>(0.f)));
+
+    //Configure Activation Layer
+    _is_activationlayer_enabled = act_info.enabled();
+    if(_is_activationlayer_enabled)
+    {
+        _activationlayer_function.configure(output, nullptr, act_info);
+    }
 }
 
-Status NEDirectConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &conv_info)
+Status NEDirectConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &conv_info,
+                                          const ActivationLayerInfo &act_info)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
 
@@ -101,6 +110,11 @@ Status NEDirectConvolutionLayer::validate(const ITensorInfo *input, const ITenso
     // Validate bias kernel
     ARM_COMPUTE_RETURN_ON_ERROR(NEDirectConvolutionLayerOutputStageKernel::validate(&accumulator, bias, output));
 
+    if(act_info.enabled())
+    {
+        ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));
+    }
+
     return Status{};
 }
 
@@ -115,5 +129,10 @@ void NEDirectConvolutionLayer::run()
     {
         NEScheduler::get().schedule(&_output_stage_kernel, Window::DimY);
     }
+
+    if(_is_activationlayer_enabled)
+    {
+        _activationlayer_function.run();
+    }
     _memory_group.release();
 }
diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
index cdbd32373a..c339947633 100644
--- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
@@ -165,10 +165,11 @@ TensorShape get_reshaped_weights_shape_conv(const ITensorInfo *weights, bool app
     }
 }
 
-Status validate_and_initialize_values(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, DataType &dt,
+Status validate_and_initialize_values(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
+                                      const ActivationLayerInfo &act_info, DataType &dt,
                                       bool &append_bias,
                                       bool &are_weights_reshaped, unsigned int &kernel_width, unsigned int &kernel_height,
-                                      bool &is_fully_connected_convolution, bool &is_interleaved, bool &is_quantized,
+                                      bool &is_fully_connected_convolution, bool &is_interleaved, bool &is_quantized, bool &is_activationlayer_enabled,
                                       unsigned int &mat_weights_cols, unsigned int &mat_weights_rows,
                                       unsigned int &conv_w, unsigned int &conv_h, const Size2D &dilation)
 {
@@ -210,6 +211,7 @@ Status validate_and_initialize_values(const ITensorInfo *input, const ITensorInf
     // Check if its a "fully connected" convolution
     is_fully_connected_convolution = ((conv_w == 1) && (conv_h == 1));
     is_interleaved                 = (!is_fully_connected_convolution && !is_quantized);
+    is_activationlayer_enabled     = act_info.enabled();
 
     return Status{};
 }
@@ -217,8 +219,8 @@ Status validate_and_initialize_values(const ITensorInfo *input, const ITensorInf
 
 NEGEMMConvolutionLayer::NEGEMMConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager)
     : _asm_glue(), _memory_group(memory_manager), _input_im2col_kernel(), _input_interleave_kernel(), _reshape_weights(), _mm_kernel(), _mm_gemmlowp(memory_manager), _gemmlowp_output_stage(),
-      _output_col2im_kernel(), _original_weights(nullptr), _input_im2col_reshaped(), _input_interleaved_reshaped(), _weights_reshaped(), _gemm_output(), _tmp_output(), _workspace(), _append_bias(false),
-      _is_fully_connected_convolution(false), _are_weights_reshaped(false), _is_quantized(false), _is_interleaved(false)
+      _output_col2im_kernel(), _activationlayer_function(), _original_weights(nullptr), _input_im2col_reshaped(), _input_interleaved_reshaped(), _weights_reshaped(), _gemm_output(), _tmp_output(),
+      _workspace(), _append_bias(false), _is_fully_connected_convolution(false), _are_weights_reshaped(false), _is_quantized(false), _is_interleaved(false), _is_activationlayer_enabled(false)
 {
 }
 
@@ -247,7 +249,7 @@ void NEGEMMConvolutionLayer::configure_mm(const ITensor *input, const ITensor *w
 }
 
 void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
-                                       const Size2D &dilation)
+                                       const Size2D &dilation, const ActivationLayerInfo &act_info)
 {
     // Perform validate step
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
@@ -260,9 +262,10 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
     unsigned int conv_w           = 0;
     unsigned int conv_h           = 0;
 
-    Status status = validate_and_initialize_values(input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(), conv_info, weights_info, dt, _append_bias, _are_weights_reshaped,
+    Status status = validate_and_initialize_values(input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(), conv_info, weights_info, act_info, dt, _append_bias,
+                                                   _are_weights_reshaped,
                                                    kernel_width, kernel_height,
-                                                   _is_fully_connected_convolution, _is_interleaved, _is_quantized,
+                                                   _is_fully_connected_convolution, _is_interleaved, _is_quantized, _is_activationlayer_enabled,
                                                    mat_weights_cols, mat_weights_rows, conv_w, conv_h, dilation);
 
     ARM_COMPUTE_ERROR_THROW_ON(status);
@@ -420,10 +423,16 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
     {
         _weights_reshaped.allocator()->allocate();
     }
+
+    //Configure Activation Layer
+    if(_is_activationlayer_enabled)
+    {
+        _activationlayer_function.configure(output, nullptr, act_info);
+    }
 }
 
 Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
-                                        const WeightsInfo &weights_info, const Size2D &dilation)
+                                        const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info)
 {
     ARM_COMPUTE_UNUSED(output);
 
@@ -433,6 +442,7 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
     bool         is_fully_connected_convolution{};
     bool         is_interleaved{};
     bool         is_quantized{};
+    bool         is_activationlayer_enabled{};
     unsigned int kernel_width     = 0;
     unsigned int kernel_height    = 0;
     unsigned int mat_weights_cols = 0;
@@ -440,8 +450,8 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
     unsigned int conv_w           = 0;
     unsigned int conv_h           = 0;
 
-    Status status = validate_and_initialize_values(input, weights, biases, conv_info, weights_info, dt, append_bias, are_weights_reshaped, kernel_width, kernel_height,
-                                                   is_fully_connected_convolution, is_interleaved, is_quantized, mat_weights_cols, mat_weights_rows,
+    Status status = validate_and_initialize_values(input, weights, biases, conv_info, weights_info, act_info, dt, append_bias, are_weights_reshaped, kernel_width, kernel_height,
+                                                   is_fully_connected_convolution, is_interleaved, is_quantized, is_activationlayer_enabled, mat_weights_cols, mat_weights_rows,
                                                    conv_w, conv_h, dilation);
 
     const Size2D kernel_weights = Size2D(kernel_width, kernel_height);
@@ -536,6 +546,15 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
         ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMMatrixMultiplyKernel::validate(&im2_col_info, weights, &gemm_output_info, 1.f, is_interleaved, GEMMReshapeInfo()));
     }
 
+    ARM_COMPUTE_RETURN_ON_ERROR(NECol2ImKernel::validate(&gemm_output_info, output, Size2D(conv_w, conv_h)));
+
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG((output->dimension(0) != conv_w) || (output->dimension(1) != conv_h), "Output shape does not match the expected one");
+
+    if(act_info.enabled())
+    {
+        ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));
+    }
+
     return Status{};
 }
 
@@ -591,6 +610,11 @@ void NEGEMMConvolutionLayer::run()
     // Reshape output matrix
     NEScheduler::get().schedule(&_output_col2im_kernel, Window::DimY);
 
+    if(_is_activationlayer_enabled)
+    {
+        _activationlayer_function.run();
+    }
+
     _memory_group.release();
 }
 } // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEWinogradLayer.cpp b/src/runtime/NEON/functions/NEWinogradLayer.cpp
index 0a344f0cae..f82845c7ad 100644
--- a/src/runtime/NEON/functions/NEWinogradLayer.cpp
+++ b/src/runtime/NEON/functions/NEWinogradLayer.cpp
@@ -75,13 +75,13 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights,
 } //namespace
 
 NEWinogradLayer::NEWinogradLayer(std::shared_ptr<IMemoryManager> memory_manager)
-    : _memory_group(std::move(memory_manager)), _batched_gemm_kernel(nullptr), _transform_input_kernel(nullptr), _transform_output_kernel(nullptr), _transform_weights_kernel(nullptr), _permute_input(),
-      _permute_weights(), _permute_output(), _input_workspace(), _output_workspace(), _kernel_storage(), _input_nhwc(), _output_nhwc(), _weights_hwio(), _input(), _weights(), _output(),
-      _reshaped_kernel(false)
+    : _memory_group(std::move(memory_manager)), _batched_gemm_kernel(nullptr), _transform_input_kernel(nullptr), _transform_output_kernel(nullptr), _transform_weights_kernel(nullptr),
+      _activationlayer_function(), _permute_input(), _permute_weights(), _permute_output(), _input_workspace(), _output_workspace(), _kernel_storage(), _input_nhwc(), _output_nhwc(), _weights_hwio(),
+      _input(), _weights(), _output(), _reshaped_kernel(false), _is_activationlayer_enabled(false)
 {
 } /* arm_compute */
 
-void NEWinogradLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info)
+void NEWinogradLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, biases, output);
     ARM_COMPUTE_UNUSED(conv_info);
@@ -217,6 +217,13 @@ void NEWinogradLayer::configure(const ITensor *input, const ITensor *weights, co
     _transform_weights_kernel = std::move(transform_weights_kernel);
     _transform_output_kernel  = std::move(transform_output_kernel);
     _batched_gemm_kernel      = std::move(batched_gemm_kernel);
+
+    //Configure Activation Layer
+    _is_activationlayer_enabled = act_info.enabled();
+    if(_is_activationlayer_enabled)
+    {
+        _activationlayer_function.configure(output, nullptr, act_info);
+    }
 }
 
 void NEWinogradLayer::run()
@@ -242,6 +249,12 @@ void NEWinogradLayer::run()
 
     // Reorder the convoluted output to ACL's ordering NCHW
     _permute_output.run();
+
+    if(_is_activationlayer_enabled)
+    {
+        _activationlayer_function.run();
+    }
+
     _memory_group.release();
 }
author	Isabella Gottardi <isabella.gottardi@arm.com>	2018-02-12 14:59:19 +0000
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-11-02 16:49:16 +0000
commit	3f217ec4ff11e20fe686beb9a28d0bbd80a56cd6 (patch)
tree	81db8baab925af5b416b66d0328be2eb49543824 /src/runtime
parent	d9eb27597eabe5b7c17520f4f9b3f8a282d72573 (diff)
download	ComputeLibrary-3f217ec4ff11e20fe686beb9a28d0bbd80a56cd6.tar.gz