aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorSiCongLi <sicong.li@arm.com>2021-10-18 09:38:33 +0100
committerSiCong Li <sicong.li@arm.com>2021-11-01 15:18:12 +0000
commit579ca84bd8ef5a91eded65c4dc5e0b9f7de8bef1 (patch)
tree0c2ceba8ad5b2c944bce00055fe1ec7ac84b49f3 /arm_compute
parent48717a3d38fef8d316cd4b9fd9a3bc1a43db736b (diff)
downloadComputeLibrary-579ca84bd8ef5a91eded65c4dc5e0b9f7de8bef1.tar.gz
Add PostOp support to GEMM and CLGEMM operators and functions Part 2
* Implement PostOp interface changes * Remove spaces around "=" in TypePrinter Partially resolves COMPMID-4435 Signed-off-by: SiCongLi <sicong.li@arm.com> Change-Id: If1e2280554030a0f635e73339a2e86987f6dc41b Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6484 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Sheri Zhang <sheri.zhang@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/core/Types.h52
-rw-r--r--arm_compute/runtime/CL/functions/CLConvolutionLayer.h11
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h13
-rw-r--r--arm_compute/runtime/FunctionDescriptors.h24
4 files changed, 68 insertions, 32 deletions
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index bfe85ea937..bff672c361 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -30,6 +30,7 @@
#include "arm_compute/core/Size3D.h"
#include "arm_compute/core/Strides.h"
#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/experimental/IPostOp.h"
#include "arm_compute/core/utils/misc/Macros.h"
#include "support/Bfloat16.h"
#include "support/Half.h"
@@ -1963,6 +1964,7 @@ struct GEMMRHSMatrixInfo
bool export_to_cl_image{ false }; /**< True if the reshaped rhs has to be exported to cl_image. n0 must be equal to 4 */
};
+class ITensorInfo;
/** GEMM information class. This class stores the necessary information to compute GEMM functions
*
* This object also contains the information about how matrix A and matrix B have been reshaped
@@ -1984,7 +1986,8 @@ public:
_fp_mixed_precision(false),
_broadcast_bias(false),
_pretranspose_B(true),
- _activation_info()
+ _activation_info(),
+ _post_ops()
{
}
/** Constructor
@@ -2002,10 +2005,11 @@ public:
* @param[in] fast_math (Optional) Use a data type of shorter width to improve performance
* @param[in] broadcast_bias (Optional) Broadcast the shape of the bias tensor from a vector to a matrix.
* @param[in] activation_info (Optional) Activation to apply after the matrix multiplication
+ * @param[in] post_ops (Optional) A sequence of post operations that are performed after the main operation.
*/
GEMMInfo(bool is_a_reshaped, bool is_b_reshaped, bool reshape_b_only_on_first_run, int depth_output_gemm3d = 0, bool reinterpret_input_as_3d = false, bool retain_internal_weights = false,
GEMMLowpOutputStageInfo gemmlowp_output_stage = GEMMLowpOutputStageInfo(), bool fp_mixed_precision = false, bool fast_math = false, bool broadcast_bias = false,
- const ActivationLayerInfo &activation_info = ActivationLayerInfo()) noexcept
+ const ActivationLayerInfo &activation_info = ActivationLayerInfo(), const experimental::PostOpList<ITensorInfo *> &post_ops = experimental::PostOpList<ITensorInfo *>()) noexcept
: _is_a_reshaped(is_a_reshaped),
_is_b_reshaped(is_b_reshaped),
_reshape_b_only_on_first_run(reshape_b_only_on_first_run),
@@ -2017,7 +2021,8 @@ public:
_fp_mixed_precision(fp_mixed_precision),
_broadcast_bias(broadcast_bias),
_pretranspose_B(reshape_b_only_on_first_run),
- _activation_info(activation_info)
+ _activation_info(activation_info),
+ _post_ops(post_ops)
{
}
/** Flag which specifies if the matrix A has been reshaped
@@ -2142,20 +2147,37 @@ public:
{
_activation_info = activation_info;
}
+ /** Post operations to apply after the matrix multiplication
+ *
+ * @return experimental::PostOpList object
+ */
+ const experimental::PostOpList<ITensorInfo *> &post_ops() const
+ {
+ return _post_ops;
+ }
+ /** Set post ops
+ *
+ * @param[in] post_ops experimental::PostOpList object to set
+ */
+ void set_post_ops(const experimental::PostOpList<ITensorInfo *> &post_ops)
+ {
+ _post_ops = post_ops;
+ }
private:
- bool _is_a_reshaped;
- bool _is_b_reshaped;
- bool _reshape_b_only_on_first_run;
- int _depth_output_gemm3d;
- bool _reinterpret_input_as_3d;
- bool _retain_internal_weights;
- GEMMLowpOutputStageInfo _gemmlowp_output_stage;
- bool _fast_math;
- bool _fp_mixed_precision;
- bool _broadcast_bias;
- bool _pretranspose_B;
- ActivationLayerInfo _activation_info;
+ bool _is_a_reshaped;
+ bool _is_b_reshaped;
+ bool _reshape_b_only_on_first_run;
+ int _depth_output_gemm3d;
+ bool _reinterpret_input_as_3d;
+ bool _retain_internal_weights;
+ GEMMLowpOutputStageInfo _gemmlowp_output_stage;
+ bool _fast_math;
+ bool _fp_mixed_precision;
+ bool _broadcast_bias;
+ bool _pretranspose_B;
+ ActivationLayerInfo _activation_info;
+ experimental::PostOpList<ITensorInfo *> _post_ops;
};
/** Winograd information */
diff --git a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
index 0f092bdbc2..38a4019609 100644
--- a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
@@ -26,6 +26,7 @@
#include "arm_compute/core/CL/CLCompileContext.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/core/experimental/IPostOp.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
@@ -118,9 +119,11 @@ public:
* @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
* available which may introduce a drop of accuracy as well. Default is false
* @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
+ * @param[in] post_ops (Optional) A sequence of post operations that are performed after the main operation.
*/
void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(),
- const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false, unsigned int num_groups = 1);
+ const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false, unsigned int num_groups = 1,
+ const experimental::PostOpList<ICLTensor *> &post_ops = experimental::PostOpList<ICLTensor *> {});
/** Set the input and output tensors.
*
* @param[in] compile_context The compile context to be used.
@@ -140,10 +143,11 @@ public:
* @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
* available which may introduce a drop of accuracy as well. Default is false
* @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
+ * @param[in] post_ops (Optional) A sequence of post operations that are performed after the main operation.
*/
void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false,
- unsigned int num_groups = 1);
+ unsigned int num_groups = 1, const experimental::PostOpList<ICLTensor *> &post_ops = experimental::PostOpList<ICLTensor *> {});
/** Static function to check if given info will lead to a valid configuration of @ref CLConvolutionLayer
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -162,12 +166,13 @@ public:
* @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
* available which may introduce a drop of accuracy as well. Default is false
* @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
+ * @param[in] post_ops (Optional) A sequence of post operations that are performed after the main operation.
*
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false,
- unsigned int num_groups = 1);
+ unsigned int num_groups = 1, const experimental::PostOpList<ITensorInfo *> &post_ops = experimental::PostOpList<ITensorInfo *> {});
/** Static function to check if given info will return the convolution called by @ref CLConvolutionLayer
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
index d7a4e7f944..9918a61cab 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
@@ -24,6 +24,7 @@
#ifndef ARM_COMPUTE_CLGEMMCONVOLUTIONLAYER_H
#define ARM_COMPUTE_CLGEMMCONVOLUTIONLAYER_H
+#include "arm_compute/core/experimental/IPostOp.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTypes.h"
#include "arm_compute/runtime/IFunction.h"
@@ -93,9 +94,11 @@ public:
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
* @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
+ * @param[in] post_ops (Optional) A sequence of post operations that are performed after the main operation.
*/
void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(),
- const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1);
+ const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1,
+ const experimental::PostOpList<ICLTensor *> &post_ops = experimental::PostOpList<ICLTensor *> {});
/** Set the input and output tensors.
*
* @param[in] compile_context The compile context to be used.
@@ -114,10 +117,12 @@ public:
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
* @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
+ * @param[in] post_ops (Optional) A sequence of post operations that are performed after the main operation.
*/
void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
const WeightsInfo &weights_info = WeightsInfo(),
- const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1);
+ const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1,
+ const experimental::PostOpList<ICLTensor *> &post_ops = experimental::PostOpList<ICLTensor *> {});
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMConvolutionLayer.
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -135,11 +140,13 @@ public:
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
* @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
+ * @param[in] post_ops (Optional) A sequence of post operations that are performed after the main operation.
*
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1);
+ const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1,
+ const experimental::PostOpList<ITensorInfo *> &post_ops = experimental::PostOpList<ITensorInfo *> {});
// Inherited methods overridden:
void run() override;
diff --git a/arm_compute/runtime/FunctionDescriptors.h b/arm_compute/runtime/FunctionDescriptors.h
index 07a8f6600e..face8a6fb4 100644
--- a/arm_compute/runtime/FunctionDescriptors.h
+++ b/arm_compute/runtime/FunctionDescriptors.h
@@ -57,20 +57,22 @@ struct Conv2dInfo
{
Conv2dInfo() = default;
- Conv2dInfo(const PadStrideInfo &conv_info,
- const Size2D &dilation,
- const ActivationLayerInfo &act_info,
- bool enable_fast_math,
- unsigned int num_groups)
- : conv_info(conv_info), dilation(dilation), act_info(act_info), enable_fast_math(enable_fast_math), num_groups(num_groups)
+ Conv2dInfo(const PadStrideInfo &conv_info,
+ const Size2D &dilation,
+ const ActivationLayerInfo &act_info,
+ bool enable_fast_math,
+ unsigned int num_groups,
+ const experimental::PostOpList<ITensorInfo *> &post_ops = experimental::PostOpList<ITensorInfo *> {})
+ : conv_info(conv_info), dilation(dilation), act_info(act_info), enable_fast_math(enable_fast_math), num_groups(num_groups), post_ops(post_ops)
{
}
- PadStrideInfo conv_info{};
- Size2D dilation{ 1U, 1U };
- ActivationLayerInfo act_info{};
- bool enable_fast_math{ false };
- unsigned int num_groups{ 1 };
+ PadStrideInfo conv_info{};
+ Size2D dilation{ 1U, 1U };
+ ActivationLayerInfo act_info{};
+ bool enable_fast_math{ false };
+ unsigned int num_groups{ 1 };
+ experimental::PostOpList<ITensorInfo *> post_ops{};
};
/** Descriptor used by the 3d Convolution function */