diff options
author | SiCongLi <sicong.li@arm.com> | 2021-10-18 09:38:33 +0100 |
---|---|---|
committer | SiCong Li <sicong.li@arm.com> | 2021-11-01 15:18:12 +0000 |
commit | 579ca84bd8ef5a91eded65c4dc5e0b9f7de8bef1 (patch) | |
tree | 0c2ceba8ad5b2c944bce00055fe1ec7ac84b49f3 /arm_compute | |
parent | 48717a3d38fef8d316cd4b9fd9a3bc1a43db736b (diff) | |
download | ComputeLibrary-579ca84bd8ef5a91eded65c4dc5e0b9f7de8bef1.tar.gz |
Add PostOp support to GEMM and CLGEMM operators and functions Part 2
* Implement PostOp interface changes
* Remove spaces around "=" in TypePrinter
Partially resolves COMPMID-4435
Signed-off-by: SiCongLi <sicong.li@arm.com>
Change-Id: If1e2280554030a0f635e73339a2e86987f6dc41b
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6484
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Sheri Zhang <sheri.zhang@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r-- | arm_compute/core/Types.h | 52 | ||||
-rw-r--r-- | arm_compute/runtime/CL/functions/CLConvolutionLayer.h | 11 | ||||
-rw-r--r-- | arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h | 13 | ||||
-rw-r--r-- | arm_compute/runtime/FunctionDescriptors.h | 24 |
4 files changed, 68 insertions, 32 deletions
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h index bfe85ea937..bff672c361 100644 --- a/arm_compute/core/Types.h +++ b/arm_compute/core/Types.h @@ -30,6 +30,7 @@ #include "arm_compute/core/Size3D.h" #include "arm_compute/core/Strides.h" #include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/experimental/IPostOp.h" #include "arm_compute/core/utils/misc/Macros.h" #include "support/Bfloat16.h" #include "support/Half.h" @@ -1963,6 +1964,7 @@ struct GEMMRHSMatrixInfo bool export_to_cl_image{ false }; /**< True if the reshaped rhs has to be exported to cl_image. n0 must be equal to 4 */ }; +class ITensorInfo; /** GEMM information class. This class stores the necessary information to compute GEMM functions * * This object also contains the information about how matrix A and matrix B have been reshaped @@ -1984,7 +1986,8 @@ public: _fp_mixed_precision(false), _broadcast_bias(false), _pretranspose_B(true), - _activation_info() + _activation_info(), + _post_ops() { } /** Constructor @@ -2002,10 +2005,11 @@ public: * @param[in] fast_math (Optional) Use a data type of shorter width to improve performance * @param[in] broadcast_bias (Optional) Broadcast the shape of the bias tensor from a vector to a matrix. * @param[in] activation_info (Optional) Activation to apply after the matrix multiplication + * @param[in] post_ops (Optional) A sequence of post operations that are performed after the main operation. */ GEMMInfo(bool is_a_reshaped, bool is_b_reshaped, bool reshape_b_only_on_first_run, int depth_output_gemm3d = 0, bool reinterpret_input_as_3d = false, bool retain_internal_weights = false, GEMMLowpOutputStageInfo gemmlowp_output_stage = GEMMLowpOutputStageInfo(), bool fp_mixed_precision = false, bool fast_math = false, bool broadcast_bias = false, - const ActivationLayerInfo &activation_info = ActivationLayerInfo()) noexcept + const ActivationLayerInfo &activation_info = ActivationLayerInfo(), const experimental::PostOpList<ITensorInfo *> &post_ops = experimental::PostOpList<ITensorInfo *>()) noexcept : _is_a_reshaped(is_a_reshaped), _is_b_reshaped(is_b_reshaped), _reshape_b_only_on_first_run(reshape_b_only_on_first_run), @@ -2017,7 +2021,8 @@ public: _fp_mixed_precision(fp_mixed_precision), _broadcast_bias(broadcast_bias), _pretranspose_B(reshape_b_only_on_first_run), - _activation_info(activation_info) + _activation_info(activation_info), + _post_ops(post_ops) { } /** Flag which specifies if the matrix A has been reshaped @@ -2142,20 +2147,37 @@ public: { _activation_info = activation_info; } + /** Post operations to apply after the matrix multiplication + * + * @return experimental::PostOpList object + */ + const experimental::PostOpList<ITensorInfo *> &post_ops() const + { + return _post_ops; + } + /** Set post ops + * + * @param[in] post_ops experimental::PostOpList object to set + */ + void set_post_ops(const experimental::PostOpList<ITensorInfo *> &post_ops) + { + _post_ops = post_ops; + } private: - bool _is_a_reshaped; - bool _is_b_reshaped; - bool _reshape_b_only_on_first_run; - int _depth_output_gemm3d; - bool _reinterpret_input_as_3d; - bool _retain_internal_weights; - GEMMLowpOutputStageInfo _gemmlowp_output_stage; - bool _fast_math; - bool _fp_mixed_precision; - bool _broadcast_bias; - bool _pretranspose_B; - ActivationLayerInfo _activation_info; + bool _is_a_reshaped; + bool _is_b_reshaped; + bool _reshape_b_only_on_first_run; + int _depth_output_gemm3d; + bool _reinterpret_input_as_3d; + bool _retain_internal_weights; + GEMMLowpOutputStageInfo _gemmlowp_output_stage; + bool _fast_math; + bool _fp_mixed_precision; + bool _broadcast_bias; + bool _pretranspose_B; + ActivationLayerInfo _activation_info; + experimental::PostOpList<ITensorInfo *> _post_ops; }; /** Winograd information */ diff --git a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h index 0f092bdbc2..38a4019609 100644 --- a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h @@ -26,6 +26,7 @@ #include "arm_compute/core/CL/CLCompileContext.h" #include "arm_compute/core/Types.h" +#include "arm_compute/core/experimental/IPostOp.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" @@ -118,9 +119,11 @@ public: * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation * available which may introduce a drop of accuracy as well. Default is false * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout + * @param[in] post_ops (Optional) A sequence of post operations that are performed after the main operation. */ void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(), - const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false, unsigned int num_groups = 1); + const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false, unsigned int num_groups = 1, + const experimental::PostOpList<ICLTensor *> &post_ops = experimental::PostOpList<ICLTensor *> {}); /** Set the input and output tensors. * * @param[in] compile_context The compile context to be used. @@ -140,10 +143,11 @@ public: * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation * available which may introduce a drop of accuracy as well. Default is false * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout + * @param[in] post_ops (Optional) A sequence of post operations that are performed after the main operation. */ void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false, - unsigned int num_groups = 1); + unsigned int num_groups = 1, const experimental::PostOpList<ICLTensor *> &post_ops = experimental::PostOpList<ICLTensor *> {}); /** Static function to check if given info will lead to a valid configuration of @ref CLConvolutionLayer * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], @@ -162,12 +166,13 @@ public: * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation * available which may introduce a drop of accuracy as well. Default is false * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout + * @param[in] post_ops (Optional) A sequence of post operations that are performed after the main operation. * * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false, - unsigned int num_groups = 1); + unsigned int num_groups = 1, const experimental::PostOpList<ITensorInfo *> &post_ops = experimental::PostOpList<ITensorInfo *> {}); /** Static function to check if given info will return the convolution called by @ref CLConvolutionLayer * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h index d7a4e7f944..9918a61cab 100644 --- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h @@ -24,6 +24,7 @@ #ifndef ARM_COMPUTE_CLGEMMCONVOLUTIONLAYER_H #define ARM_COMPUTE_CLGEMMCONVOLUTIONLAYER_H +#include "arm_compute/core/experimental/IPostOp.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTypes.h" #include "arm_compute/runtime/IFunction.h" @@ -93,9 +94,11 @@ public: * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). * @param[in] act_info (Optional) Activation layer information in case of a fused activation. * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout + * @param[in] post_ops (Optional) A sequence of post operations that are performed after the main operation. */ void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(), - const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1); + const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1, + const experimental::PostOpList<ICLTensor *> &post_ops = experimental::PostOpList<ICLTensor *> {}); /** Set the input and output tensors. * * @param[in] compile_context The compile context to be used. @@ -114,10 +117,12 @@ public: * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). * @param[in] act_info (Optional) Activation layer information in case of a fused activation. * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout + * @param[in] post_ops (Optional) A sequence of post operations that are performed after the main operation. */ void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(), - const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1); + const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1, + const experimental::PostOpList<ICLTensor *> &post_ops = experimental::PostOpList<ICLTensor *> {}); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMConvolutionLayer. * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], @@ -135,11 +140,13 @@ public: * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). * @param[in] act_info (Optional) Activation layer information in case of a fused activation. * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout + * @param[in] post_ops (Optional) A sequence of post operations that are performed after the main operation. * * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1); + const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1, + const experimental::PostOpList<ITensorInfo *> &post_ops = experimental::PostOpList<ITensorInfo *> {}); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/FunctionDescriptors.h b/arm_compute/runtime/FunctionDescriptors.h index 07a8f6600e..face8a6fb4 100644 --- a/arm_compute/runtime/FunctionDescriptors.h +++ b/arm_compute/runtime/FunctionDescriptors.h @@ -57,20 +57,22 @@ struct Conv2dInfo { Conv2dInfo() = default; - Conv2dInfo(const PadStrideInfo &conv_info, - const Size2D &dilation, - const ActivationLayerInfo &act_info, - bool enable_fast_math, - unsigned int num_groups) - : conv_info(conv_info), dilation(dilation), act_info(act_info), enable_fast_math(enable_fast_math), num_groups(num_groups) + Conv2dInfo(const PadStrideInfo &conv_info, + const Size2D &dilation, + const ActivationLayerInfo &act_info, + bool enable_fast_math, + unsigned int num_groups, + const experimental::PostOpList<ITensorInfo *> &post_ops = experimental::PostOpList<ITensorInfo *> {}) + : conv_info(conv_info), dilation(dilation), act_info(act_info), enable_fast_math(enable_fast_math), num_groups(num_groups), post_ops(post_ops) { } - PadStrideInfo conv_info{}; - Size2D dilation{ 1U, 1U }; - ActivationLayerInfo act_info{}; - bool enable_fast_math{ false }; - unsigned int num_groups{ 1 }; + PadStrideInfo conv_info{}; + Size2D dilation{ 1U, 1U }; + ActivationLayerInfo act_info{}; + bool enable_fast_math{ false }; + unsigned int num_groups{ 1 }; + experimental::PostOpList<ITensorInfo *> post_ops{}; }; /** Descriptor used by the 3d Convolution function */ |