Add PostOp support to GEMM and CLGEMM operators and functions Part 2

* Implement PostOp interface changes * Remove spaces around "=" in TypePrinter Partially resolves COMPMID-4435 Signed-off-by: SiCongLi <sicong.li@arm.com> Change-Id: If1e2280554030a0f635e73339a2e86987f6dc41b Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6484 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Sheri Zhang <sheri.zhang@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
author: SiCongLi <sicong.li@arm.com> 2021-10-18 09:38:33 +0100
committer: SiCong Li <sicong.li@arm.com> 2021-11-01 15:18:12 +0000
commit: 579ca84bd8ef5a91eded65c4dc5e0b9f7de8bef1 (patch)
tree: 0c2ceba8ad5b2c944bce00055fe1ec7ac84b49f3 /arm_compute
parent: 48717a3d38fef8d316cd4b9fd9a3bc1a43db736b (diff)
download: ComputeLibrary-579ca84bd8ef5a91eded65c4dc5e0b9f7de8bef1.tar.gz
4 files changed, 68 insertions, 32 deletions
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index bfe85ea937..bff672c361 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -30,6 +30,7 @@
 #include "arm_compute/core/Size3D.h"
 #include "arm_compute/core/Strides.h"
 #include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/experimental/IPostOp.h"
 #include "arm_compute/core/utils/misc/Macros.h"
 #include "support/Bfloat16.h"
 #include "support/Half.h"
@@ -1963,6 +1964,7 @@ struct GEMMRHSMatrixInfo
     bool         export_to_cl_image{ false }; /**< True if the reshaped rhs has to be exported to cl_image. n0 must be equal to 4 */
 };
 
+class ITensorInfo;
 /** GEMM information class. This class stores the necessary information to compute GEMM functions
  *
  * This object also contains the information about how matrix A and matrix B have been reshaped
@@ -1984,7 +1986,8 @@ public:
           _fp_mixed_precision(false),
           _broadcast_bias(false),
           _pretranspose_B(true),
-          _activation_info()
+          _activation_info(),
+          _post_ops()
     {
     }
     /** Constructor
@@ -2002,10 +2005,11 @@ public:
      * @param[in] fast_math                   (Optional) Use a data type of shorter width to improve performance
      * @param[in] broadcast_bias              (Optional) Broadcast the shape of the bias tensor from a vector to a matrix.
      * @param[in] activation_info             (Optional) Activation to apply after the matrix multiplication
+     * @param[in] post_ops                    (Optional) A sequence of post operations that are performed after the main operation.
      */
     GEMMInfo(bool is_a_reshaped, bool is_b_reshaped, bool reshape_b_only_on_first_run, int depth_output_gemm3d = 0, bool reinterpret_input_as_3d = false, bool retain_internal_weights = false,
              GEMMLowpOutputStageInfo gemmlowp_output_stage = GEMMLowpOutputStageInfo(), bool fp_mixed_precision = false, bool fast_math = false, bool broadcast_bias = false,
-             const ActivationLayerInfo &activation_info = ActivationLayerInfo()) noexcept
+             const ActivationLayerInfo &activation_info = ActivationLayerInfo(), const experimental::PostOpList<ITensorInfo *> &post_ops = experimental::PostOpList<ITensorInfo *>()) noexcept
         : _is_a_reshaped(is_a_reshaped),
           _is_b_reshaped(is_b_reshaped),
           _reshape_b_only_on_first_run(reshape_b_only_on_first_run),
@@ -2017,7 +2021,8 @@ public:
           _fp_mixed_precision(fp_mixed_precision),
           _broadcast_bias(broadcast_bias),
           _pretranspose_B(reshape_b_only_on_first_run),
-          _activation_info(activation_info)
+          _activation_info(activation_info),
+          _post_ops(post_ops)
     {
     }
     /** Flag which specifies if the matrix A has been reshaped
@@ -2142,20 +2147,37 @@ public:
     {
         _activation_info = activation_info;
     }
+    /** Post operations to apply after the matrix multiplication
+     *
+     * @return experimental::PostOpList object
+     */
+    const experimental::PostOpList<ITensorInfo *> &post_ops() const
+    {
+        return _post_ops;
+    }
+    /** Set post ops
+     *
+     * @param[in] post_ops experimental::PostOpList object to set
+     */
+    void set_post_ops(const experimental::PostOpList<ITensorInfo *> &post_ops)
+    {
+        _post_ops = post_ops;
+    }
 
 private:
-    bool                    _is_a_reshaped;
-    bool                    _is_b_reshaped;
-    bool                    _reshape_b_only_on_first_run;
-    int                     _depth_output_gemm3d;
-    bool                    _reinterpret_input_as_3d;
-    bool                    _retain_internal_weights;
-    GEMMLowpOutputStageInfo _gemmlowp_output_stage;
-    bool                    _fast_math;
-    bool                    _fp_mixed_precision;
-    bool                    _broadcast_bias;
-    bool                    _pretranspose_B;
-    ActivationLayerInfo     _activation_info;
+    bool                                    _is_a_reshaped;
+    bool                                    _is_b_reshaped;
+    bool                                    _reshape_b_only_on_first_run;
+    int                                     _depth_output_gemm3d;
+    bool                                    _reinterpret_input_as_3d;
+    bool                                    _retain_internal_weights;
+    GEMMLowpOutputStageInfo                 _gemmlowp_output_stage;
+    bool                                    _fast_math;
+    bool                                    _fp_mixed_precision;
+    bool                                    _broadcast_bias;
+    bool                                    _pretranspose_B;
+    ActivationLayerInfo                     _activation_info;
+    experimental::PostOpList<ITensorInfo *> _post_ops;
 };
 
 /** Winograd information */
diff --git a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
index 0f092bdbc2..38a4019609 100644
--- a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
@@ -26,6 +26,7 @@
 
 #include "arm_compute/core/CL/CLCompileContext.h"
 #include "arm_compute/core/Types.h"
+#include "arm_compute/core/experimental/IPostOp.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
@@ -118,9 +119,11 @@ public:
      * @param[in]  enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
      *                              available which may introduce a drop of accuracy as well. Default is false
      * @param[in]  num_groups       (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
+     * @param[in]  post_ops         (Optional) A sequence of post operations that are performed after the main operation.
      */
     void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(),
-                   const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false, unsigned int num_groups = 1);
+                   const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false, unsigned int num_groups = 1,
+                   const experimental::PostOpList<ICLTensor *> &post_ops = experimental::PostOpList<ICLTensor *> {});
     /** Set the input and output tensors.
      *
      * @param[in]  compile_context  The compile context to be used.
@@ -140,10 +143,11 @@ public:
      * @param[in]  enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
      *                              available which may introduce a drop of accuracy as well. Default is false
      * @param[in]  num_groups       (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
+     * @param[in]  post_ops         (Optional) A sequence of post operations that are performed after the main operation.
      */
     void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
                    const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false,
-                   unsigned int num_groups = 1);
+                   unsigned int num_groups = 1, const experimental::PostOpList<ICLTensor *> &post_ops = experimental::PostOpList<ICLTensor *> {});
     /** Static function to check if given info will lead to a valid configuration of @ref CLConvolutionLayer
      *
      * @param[in] input            Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -162,12 +166,13 @@ public:
      * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
      *                             available which may introduce a drop of accuracy as well. Default is false
      * @param[in] num_groups       (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
+     * @param[in] post_ops         (Optional) A sequence of post operations that are performed after the main operation.
      *
      * @return a status
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
                            const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false,
-                           unsigned int num_groups = 1);
+                           unsigned int num_groups = 1, const experimental::PostOpList<ITensorInfo *> &post_ops = experimental::PostOpList<ITensorInfo *> {});
     /** Static function to check if given info will return the convolution called by @ref CLConvolutionLayer
      *
      * @param[in] input            Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
index d7a4e7f944..9918a61cab 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
@@ -24,6 +24,7 @@
 #ifndef ARM_COMPUTE_CLGEMMCONVOLUTIONLAYER_H
 #define ARM_COMPUTE_CLGEMMCONVOLUTIONLAYER_H
 
+#include "arm_compute/core/experimental/IPostOp.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/CL/CLTypes.h"
 #include "arm_compute/runtime/IFunction.h"
@@ -93,9 +94,11 @@ public:
      * @param[in]  dilation     (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
      * @param[in]  act_info     (Optional) Activation layer information in case of a fused activation.
      * @param[in]  num_groups   (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
+     * @param[in]  post_ops     (Optional) A sequence of post operations that are performed after the main operation.
      */
     void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(),
-                   const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1);
+                   const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1,
+                   const experimental::PostOpList<ICLTensor *> &post_ops = experimental::PostOpList<ICLTensor *> {});
     /** Set the input and output tensors.
      *
      * @param[in]  compile_context The compile context to be used.
@@ -114,10 +117,12 @@ public:
      * @param[in]  dilation        (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
      * @param[in]  act_info        (Optional) Activation layer information in case of a fused activation.
      * @param[in]  num_groups      (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
+     * @param[in]  post_ops        (Optional) A sequence of post operations that are performed after the main operation.
      */
     void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
                    const WeightsInfo &weights_info = WeightsInfo(),
-                   const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1);
+                   const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1,
+                   const experimental::PostOpList<ICLTensor *> &post_ops = experimental::PostOpList<ICLTensor *> {});
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMConvolutionLayer.
      *
      * @param[in]  input        Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -135,11 +140,13 @@ public:
      * @param[in]  dilation     (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
      * @param[in]  act_info     (Optional) Activation layer information in case of a fused activation.
      * @param[in]  num_groups   (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
+     * @param[in]  post_ops     (Optional) A sequence of post operations that are performed after the main operation.
      *
      * @return a status
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
-                           const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1);
+                           const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1,
+                           const experimental::PostOpList<ITensorInfo *> &post_ops = experimental::PostOpList<ITensorInfo *> {});
 
     // Inherited methods overridden:
     void run() override;
diff --git a/arm_compute/runtime/FunctionDescriptors.h b/arm_compute/runtime/FunctionDescriptors.h
index 07a8f6600e..face8a6fb4 100644
--- a/arm_compute/runtime/FunctionDescriptors.h
+++ b/arm_compute/runtime/FunctionDescriptors.h
@@ -57,20 +57,22 @@ struct Conv2dInfo
 {
     Conv2dInfo() = default;
 
-    Conv2dInfo(const PadStrideInfo       &conv_info,
-               const Size2D              &dilation,
-               const ActivationLayerInfo &act_info,
-               bool                       enable_fast_math,
-               unsigned int               num_groups)
-        : conv_info(conv_info), dilation(dilation), act_info(act_info), enable_fast_math(enable_fast_math), num_groups(num_groups)
+    Conv2dInfo(const PadStrideInfo                           &conv_info,
+               const Size2D                                  &dilation,
+               const ActivationLayerInfo                     &act_info,
+               bool                                           enable_fast_math,
+               unsigned int                                   num_groups,
+               const experimental::PostOpList<ITensorInfo *> &post_ops = experimental::PostOpList<ITensorInfo *> {})
+        : conv_info(conv_info), dilation(dilation), act_info(act_info), enable_fast_math(enable_fast_math), num_groups(num_groups), post_ops(post_ops)
     {
     }
 
-    PadStrideInfo       conv_info{};
-    Size2D              dilation{ 1U, 1U };
-    ActivationLayerInfo act_info{};
-    bool                enable_fast_math{ false };
-    unsigned int        num_groups{ 1 };
+    PadStrideInfo                           conv_info{};
+    Size2D                                  dilation{ 1U, 1U };
+    ActivationLayerInfo                     act_info{};
+    bool                                    enable_fast_math{ false };
+    unsigned int                            num_groups{ 1 };
+    experimental::PostOpList<ITensorInfo *> post_ops{};
 };
 
 /** Descriptor used by the 3d Convolution function */
author	SiCongLi <sicong.li@arm.com>	2021-10-18 09:38:33 +0100
committer	SiCong Li <sicong.li@arm.com>	2021-11-01 15:18:12 +0000
commit	579ca84bd8ef5a91eded65c4dc5e0b9f7de8bef1 (patch)
tree	0c2ceba8ad5b2c944bce00055fe1ec7ac84b49f3 /arm_compute
parent	48717a3d38fef8d316cd4b9fd9a3bc1a43db736b (diff)
download	ComputeLibrary-579ca84bd8ef5a91eded65c4dc5e0b9f7de8bef1.tar.gz