From 08346e9b9a7dadd2f0765aea64e656902d843e8a Mon Sep 17 00:00:00 2001
From: Georgios Pinitas <georgios.pinitas@arm.com>
Date: Tue, 16 Oct 2018 19:10:46 +0100
Subject: COMPMID-1451:Fuse RELU,LU_BOUNDED_RELU with requantization in
 NEGEMMConvolutionLayer.

Change-Id: Iea5f2c5bcac8051c4c7655a6eabb2c43772eb31f
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/154104
Tested-by: bsgcomp <bsgcomp@arm.com>
Reviewed-by: Michele DiGiorgio <michele.digiorgio@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
---
 arm_compute/graph/backends/FunctionHelpers.h       | 18 ++++++++--------
 .../graph/nodes/BatchNormalizationLayerNode.h      |  3 +++
 arm_compute/graph/nodes/ConvolutionLayerNode.h     | 24 +++++++++++++++++-----
 3 files changed, 32 insertions(+), 13 deletions(-)

(limited to 'arm_compute/graph')
diff --git a/arm_compute/graph/backends/FunctionHelpers.h b/arm_compute/graph/backends/FunctionHelpers.h
index 56f4173fa0..a1cadcbf4c 100644
--- a/arm_compute/graph/backends/FunctionHelpers.h
+++ b/arm_compute/graph/backends/FunctionHelpers.h
@@ -266,10 +266,11 @@ std::unique_ptr<IFunction> create_convolution_layer(ConvolutionLayerNode &node,
         biases->info()->set_data_type(DataType::S32);
     }
 
-    const PadStrideInfo     conv_info      = node.convolution_info();
-    const unsigned int      num_groups     = node.num_groups();
-    const ConvolutionMethod conv_algorithm = node.convolution_method();
-    const bool              fast_math      = node.fast_math_hint() == FastMathHint::Enabled;
+    const PadStrideInfo       conv_info      = node.convolution_info();
+    const unsigned int        num_groups     = node.num_groups();
+    const ConvolutionMethod   conv_algorithm = node.convolution_method();
+    const bool                fast_math      = node.fast_math_hint() == FastMathHint::Enabled;
+    const ActivationLayerInfo fused_act      = node.fused_activation();
 
     // Create and configure function (we assume that functions have been validated before creation)
     std::shared_ptr<IMemoryManager> mm = get_memory_manager(ctx, TargetInfo::TargetType);
@@ -281,28 +282,28 @@ std::unique_ptr<IFunction> create_convolution_layer(ConvolutionLayerNode &node,
         ARM_COMPUTE_ERROR_ON_MSG(num_groups != 1, "WinogradConvolutionLayer does not support grouping!");
         std::tie(func, func_name) = create_named_memory_managed_function<typename ConvolutionLayerFunctions::WinogradConvolutionLayer>(
                                         std::string("WinogradConvolutionLayer"), mm,
-                                        input, weights, biases, output, conv_info, ActivationLayerInfo(), fast_math);
+                                        input, weights, biases, output, conv_info, fused_act, fast_math);
     }
     else if(conv_algorithm == ConvolutionMethod::Direct)
     {
         ARM_COMPUTE_ERROR_ON_MSG(num_groups != 1, "DirectConvolutionLayer does not support grouping!");
         std::tie(func, func_name) = create_named_function<typename ConvolutionLayerFunctions::DirectConvolutionLayer>(
                                         std::string("DirectConvolutionLayer"),
-                                        input, weights, biases, output, conv_info);
+                                        input, weights, biases, output, conv_info, fused_act);
     }
     else if(conv_algorithm == ConvolutionMethod::GEMM)
     {
         std::tie(func, func_name) = create_named_memory_managed_function<typename ConvolutionLayerFunctions::GEMMConvolutionLayer>(
                                         std::string("GEMMConvolutionLayer"), mm,
                                         input, weights, biases, output, conv_info,
-                                        WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), num_groups);
+                                        WeightsInfo(), Size2D(1U, 1U), fused_act, num_groups);
     }
     else
     {
         std::tie(func, func_name) = create_named_memory_managed_function<typename ConvolutionLayerFunctions::GenericConvolutionLayer>(
                                         std::string("GenericConvolutionLayer"), mm,
                                         input, weights, biases, output, conv_info,
-                                        WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), fast_math, num_groups);
+                                        WeightsInfo(), Size2D(1U, 1U), fused_act, fast_math, num_groups);
     }
 
     // Log info
@@ -321,6 +322,7 @@ std::unique_ptr<IFunction> create_convolution_layer(ConvolutionLayerNode &node,
                                << " Input shape: " << input->info()->tensor_shape()
                                << " Weights shape: " << weights->info()->tensor_shape()
                                << " Output shape: " << output->info()->tensor_shape()
+                               << (fused_act.enabled() ? " " + to_string(fused_act.activation()) : "")
                                << std::endl);
     return func;
 }
diff --git a/arm_compute/graph/nodes/BatchNormalizationLayerNode.h b/arm_compute/graph/nodes/BatchNormalizationLayerNode.h
index a364d1c5ae..b2284782bd 100644
--- a/arm_compute/graph/nodes/BatchNormalizationLayerNode.h
+++ b/arm_compute/graph/nodes/BatchNormalizationLayerNode.h
@@ -62,6 +62,9 @@ public:
     TensorDescriptor configure_output(size_t idx) const override;
     void accept(INodeVisitor &v) override;
 
+public:
+    static constexpr NodeType node_type = NodeType::BatchNormalizationLayer;
+
 private:
     float               _epsilon;
     ActivationLayerInfo _fused_activation;
diff --git a/arm_compute/graph/nodes/ConvolutionLayerNode.h b/arm_compute/graph/nodes/ConvolutionLayerNode.h
index 0698ac1360..eecb927ab2 100644
--- a/arm_compute/graph/nodes/ConvolutionLayerNode.h
+++ b/arm_compute/graph/nodes/ConvolutionLayerNode.h
@@ -80,6 +80,16 @@ public:
      * @return Number of groups in convolution
      */
     unsigned int num_groups() const;
+    /** Returns fused activation
+     *
+     * @return Fused activation
+     */
+    ActivationLayerInfo fused_activation() const;
+    /** Sets fused activation
+     *
+     * @param[in] fused_activation Fused activation to set
+     */
+    void set_fused_activation(ActivationLayerInfo fused_activation);
     /** Computes convolution output descriptor
      *
      * @param[in] input_descriptor   Input descriptor
@@ -98,12 +108,16 @@ public:
     TensorDescriptor configure_output(size_t idx) const override;
     void accept(INodeVisitor &v) override;
 
+public:
+    static constexpr NodeType node_type = NodeType::ConvolutionLayer;
+
 private:
-    PadStrideInfo     _info;
-    unsigned int      _num_groups;
-    ConvolutionMethod _method;
-    FastMathHint      _fast_math_hint;
-    QuantizationInfo  _out_quant_info;
+    PadStrideInfo       _info;
+    unsigned int        _num_groups;
+    ConvolutionMethod   _method;
+    FastMathHint        _fast_math_hint;
+    QuantizationInfo    _out_quant_info;
+    ActivationLayerInfo _fused_activation;
 };
 } // namespace graph
 } // namespace arm_compute
-- 
cgit v1.2.1