COMPMID-1451: Fuse activation in DepthwiseConvolution.

Change-Id: Id964d9068e18aaa13ab8adcbf7a9375b034ea6c3 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/154651 Tested-by: bsgcomp <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
author: Georgios Pinitas <georgios.pinitas@arm.com> 2018-10-22 16:17:20 +0100
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-11-02 16:55:45 +0000
commit: 60e98253f1e3df1723e7b8f4c996b544aa7c7205 (patch)
tree: 45ca11d6fb0a16974fc8681bc7161a6ad2b1af2e /arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
parent: c04a0e8f93c620d05444251e1ae55dcf8c660a1b (diff)
download: ComputeLibrary-60e98253f1e3df1723e7b8f4c996b544aa7c7205.tar.gz
1 files changed, 17 insertions, 4 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
index b7398f628a..288d5136d2 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
@@ -35,6 +35,7 @@
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
 #include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEPermute.h"
 #include "arm_compute/runtime/Tensor.h"
 
@@ -62,8 +63,10 @@ public:
      * @param[out]     output           Destination tensor. Data type supported: same as @p input.
      * @param[in]      conv_info        Padding and stride information to use for the convolution.
      * @param[in]      depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+     * @param[in]      act_info         (Optional) Activation layer information in case of a fused activation.
      */
-    void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1);
+    void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
+                   unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo());
 
     /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer3x3
      *
@@ -74,10 +77,12 @@ public:
      * @param[in] output           Destination tensor. Data type supported: same as @p input.
      * @param[in] conv_info        Padding and stride information to use for the convolution.
      * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+     * @param[in] act_info         (Optional) Activation layer information in case of a fused activation.
      *
      * @return a status
      */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
+                           unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo());
 
     // Inherited methods overriden:
     void run() override;
@@ -89,6 +94,7 @@ private:
     NEPermute                                 _permute_input;
     NEPermute                                 _permute_weights;
     NEPermute                                 _permute_output;
+    NEActivationLayer                         _activationlayer_function;
     Tensor                                    _accumulator;
     Tensor                                    _permuted_input;
     Tensor                                    _permuted_weights;
@@ -100,6 +106,7 @@ private:
     bool                                      _is_nchw;
     bool                                      _is_first_run;
     bool                                      _permute;
+    bool                                      _is_activationlayer_enabled;
 };
 
 /** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernels:
@@ -132,8 +139,10 @@ public:
      *                                  Data type supported: Same as @p input, S32 when input is QASYMM8.
      * @param[in]      conv_info        Padding and stride information to use for the convolution.
      * @param[in]      depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+     * @param[in]      act_info         (Optional) Activation layer information in case of a fused activation.
      */
-    void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1);
+    void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
+                   unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo());
 
     /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer
      *
@@ -144,10 +153,12 @@ public:
      *                             Data type supported: Same as @p input, S32 when input is QASYMM8.
      * @param[in] conv_info        Padding and stride information to use for the convolution.
      * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+     * @param[in] act_info         (Optional) Activation layer information in case of a fused activation.
      *
      * @return a status
      */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
+                           unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo());
 
     // Inherited methods overriden:
     void run() override;
@@ -164,6 +175,7 @@ private:
     NEPermute                                 _permute_input;
     NEPermute                                 _permute_weights;
     NEPermute                                 _permute_output;
+    NEActivationLayer                         _activationlayer_function;
     Tensor                                    _input_reshaped;
     Tensor                                    _weights_reshaped;
     Tensor                                    _v2mm_output;
@@ -174,6 +186,7 @@ private:
     bool                                      _is_prepared;
     bool                                      _is_quantized;
     bool                                      _is_nhwc;
+    bool                                      _is_activationlayer_enabled;
     const ITensor                            *_original_weights;
 };
 }
author	Georgios Pinitas <georgios.pinitas@arm.com>	2018-10-22 16:17:20 +0100
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-11-02 16:55:45 +0000
commit	60e98253f1e3df1723e7b8f4c996b544aa7c7205 (patch)
tree	45ca11d6fb0a16974fc8681bc7161a6ad2b1af2e /arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
parent	c04a0e8f93c620d05444251e1ae55dcf8c660a1b (diff)
download	ComputeLibrary-60e98253f1e3df1723e7b8f4c996b544aa7c7205.tar.gz