From 1af5416917268692fcd4b34b1d7ffebd3a2aea8a Mon Sep 17 00:00:00 2001 From: SiCongLi Date: Wed, 6 Oct 2021 15:25:57 +0100 Subject: Add experimental PostOp interface to ClGemmMatrixMultiplyReshapedKernel Part 1 This interface supports the fusion of multiple elementwise operations Partially resolves: COMPMID-4435 Change-Id: If68dd7dd98dcf239fde7cb1f0a4a6d4d1e899a6f Signed-off-by: SiCongLi Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6483 Tested-by: Arm Jenkins Reviewed-by: Gian Marco Iodice Comments-Addressed: Arm Jenkins --- src/core/CL/CLUtils.h | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 85 insertions(+), 1 deletion(-) (limited to 'src/core/CL/CLUtils.h') diff --git a/src/core/CL/CLUtils.h b/src/core/CL/CLUtils.h index b65d547756..d133e4fe6f 100644 --- a/src/core/CL/CLUtils.h +++ b/src/core/CL/CLUtils.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 Arm Limited. + * Copyright (c) 2020-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,10 +26,13 @@ #define ARM_COMPUTE_CL_CLUTILS_H #include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/experimental/IPostOp.h" namespace arm_compute { class TensorShape; +class CLBuildOptions; +class ITensorInfo; /** Create a cl::Image2D object from an OpenCL buffer * @@ -51,6 +54,87 @@ class TensorShape; */ cl::Image2D create_image2d_from_buffer(const cl::Context &ctx, const cl::Buffer &buffer, const TensorShape &shape2d, DataType data_type, size_t image_row_pitch); +namespace experimental +{ +/** @name (EXPERIMENTAL_POST_OPS) + * @{ + */ + +/** Manage validation, building and configurations of PostOp CL kernels */ +class PostOpCLKernelUtils final +{ +public: + /** CL kernel name postfix for post ops */ + using NamePostfix = std::string; + /** CL kernels that supports post ops assign each post op to a 'slot', in accordance with the postfix + * For example, for a kernel with postfix '_act_prelu_eltwiseadd', there are 3 slots + * slot 1: (unary) activation, slot 2: pRelu, slot 3: elementwise addition + * + * Some kernels may allow some slots to be optional, to support multiple combinations of post op sequences. + * In such cases, we need to explicitly set up a mapping between each post op and the slots for that kernel. + * For example, suppose we have 2 kernels with postfixes: _eltwiseadd_prelu, _act_eltwiseadd_act_prelu, where the activations in the + * second kernel are optional. Say we want to support an eltwise addition, followed by a prelu (sequence { eltwiseadd, prelu }). + * Now we can choose which one of the 2 kernels to use, since they both support this post op sequence. + * We can either: + * 1. assign the elementwise to slot 1 and prelu to slot 2 of kernel 1 + * { { Eltwise_Add, PRelu } -> {"_eltwise_act", {1, 2} } } or + * 2. assign the elementwise to slot 2 and prelu to slot 4 of kernel 1 + * { { Eltwise_Add, PRelu } -> {"_act_eltwiseadd_act_prelu", {2, 4} } } + */ + using Slots = std::vector; + using Config = std::map>; + +public: + explicit PostOpCLKernelUtils(const Config &config); + + /** Check if post op argument tensor shapes are compliant + * All post ops must not alter the shape of the original dst tensor (even after broadcasting) + * + * @param[in] dst Dst tensor to apply the post ops to + * @param[in] post_ops Post ops + * + * @return true if shapes are compliant and false otherwise + */ + static bool are_post_op_shapes_compliant(const ITensorInfo *dst, const experimental::PostOpList &post_ops); + /** Check if the post op sequence is supported in the current configuration + * + * @param[in] post_ops Post ops + * + * @return true if the post op sequence is supported and false otherwise + */ + bool is_post_op_sequence_supported(const PostOpList &post_ops) const; + /** Helper function to set PostOp related build options + * @note Convention + * 1. Each post op "slot" is prefixed with "P", followed by the usual parameters for that post op. + * E.g. If the first slot is an activation, we need to pass 3 definitions in this way: + * -P1_ACTIVATION_TYPE=... -P1_ACTIVATION_A_VAL=... -P1_ACTIVATION_B_VAL=... + * + * 2. For multi-ary post ops, to pass the position of the previous op's dest tensor, + * we append "_X_POS_" to the post op type. + * E.g. for a single post op add(dst, x), where dst is the result of the main op. + * In this case, the position of the previous op's dest is 0, so we pass + * -P1_ELTWISE_OP=ADD_X_POS_0 + * + * @param[out] built_opts OpenCL kernel build options + * @param[in] post_ops Post ops + * + */ + void set_post_ops_cl_build_options(CLBuildOptions &built_opts, const PostOpList &post_ops) const; + /** Helper function to set PostOp kernel name + * + * @param[out] kernel_name OpenCL kernel name + * @param[in] post_ops Post ops + * + */ + void set_post_ops_cl_kernel_name(std::string &kernel_name, const PostOpList &post_ops) const; + +private: + Config _supported_config{}; +}; +/** @} */ // end of group (EXPERIMENTAL_POST_OPS) + +} // namespace experimental + } // arm_compute #endif /* ARM_COMPUTE_CL_CLUTILS_H */ -- cgit v1.2.1