From 1af5416917268692fcd4b34b1d7ffebd3a2aea8a Mon Sep 17 00:00:00 2001 From: SiCongLi Date: Wed, 6 Oct 2021 15:25:57 +0100 Subject: Add experimental PostOp interface to ClGemmMatrixMultiplyReshapedKernel Part 1 This interface supports the fusion of multiple elementwise operations Partially resolves: COMPMID-4435 Change-Id: If68dd7dd98dcf239fde7cb1f0a4a6d4d1e899a6f Signed-off-by: SiCongLi Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6483 Tested-by: Arm Jenkins Reviewed-by: Gian Marco Iodice Comments-Addressed: Arm Jenkins --- src/core/CL/CLUtils.cpp | 97 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 95 insertions(+), 2 deletions(-) (limited to 'src/core/CL/CLUtils.cpp') diff --git a/src/core/CL/CLUtils.cpp b/src/core/CL/CLUtils.cpp index 67af240044..1da970e705 100644 --- a/src/core/CL/CLUtils.cpp +++ b/src/core/CL/CLUtils.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 Arm Limited. + * Copyright (c) 2020-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,12 +21,18 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#include "arm_compute/core/CL/CLCompileContext.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Types.h" +#include "arm_compute/core/Validate.h" +#include "support/StringSupport.h" #include "src/core/CL/CLUtils.h" +#include "src/core/experimental/PostOp.h" -cl::Image2D arm_compute::create_image2d_from_buffer(const cl::Context &ctx, const cl::Buffer &buffer, const TensorShape &shape2d, DataType data_type, size_t image_row_pitch) +namespace arm_compute +{ +cl::Image2D create_image2d_from_buffer(const cl::Context &ctx, const cl::Buffer &buffer, const TensorShape &shape2d, DataType data_type, size_t image_row_pitch) { cl_channel_type cl_data_type; @@ -62,3 +68,90 @@ cl::Image2D arm_compute::create_image2d_from_buffer(const cl::Context &ctx, cons return cl::Image2D(cl_image); } + +namespace experimental +{ +PostOpCLKernelUtils::PostOpCLKernelUtils(const Config &supported_config) + : _supported_config(supported_config) +{ + ARM_COMPUTE_ERROR_ON_MSG(supported_config.empty(), "Empty PostOp CL kernel support configuration is not allowed"); + for(auto it = _supported_config.begin(); it != _supported_config.end(); ++it) + { + auto post_op_sequence = it->first; + auto post_op_slots = std::get<1>(it->second); + ARM_COMPUTE_ERROR_ON_MSG(post_op_sequence.size() != post_op_slots.size(), "The number of PostOps must be the same as that of the assigned slots"); + } +} + +bool PostOpCLKernelUtils::are_post_op_shapes_compliant(const ITensorInfo *dst, const experimental::PostOpList &post_ops) +{ + // All post ops must be elementwise and must not alter the shape of the original dst tensor after broadcasting + for(const auto &op : post_ops.get_list()) + { + for(const auto &tensor : op->arguments()) + { + const TensorShape &out_shape = TensorShape::broadcast_shape(dst->tensor_shape(), (*tensor)->tensor_shape()); + if(detail::have_different_dimensions(out_shape, dst->tensor_shape(), 0)) + { + return false; + } + } + } + return true; +} + +bool PostOpCLKernelUtils::is_post_op_sequence_supported(const PostOpList &post_ops) const +{ + if(post_ops.size() == 0) + { + return true; // Always support cases where no post op is specified + } + const auto post_op_sequence = get_post_op_sequence(post_ops); + + return _supported_config.find(post_op_sequence) != _supported_config.end(); +} + +void PostOpCLKernelUtils::set_post_ops_cl_build_options(CLBuildOptions &build_opts, const PostOpList &post_ops) const +{ + const auto post_op_sequence = get_post_op_sequence(post_ops); + const auto slots = std::get<1>(_supported_config.at(post_op_sequence)); + for(size_t post_op_id = 0; post_op_id < post_ops.size(); ++post_op_id) + { + const auto &post_op = post_ops.get_list().at(post_op_id); + const auto slot_prefix = "-DP" + support::cpp11::to_string(slots[post_op_id]); + if(post_op->type() == experimental::PostOpType::Activation) + { + const auto _post_op = utils::cast::polymorphic_downcast *>(post_op.get()); + const auto act_type = slot_prefix + "_ACTIVATION_TYPE=" + lower_string(string_from_activation_func(_post_op->_act_info.activation())); + const auto act_a_val = slot_prefix + "_ACTIVATION_A_VAL=" + float_to_string_with_full_precision(_post_op->_act_info.a()); + const auto act_b_val = slot_prefix + "_ACTIVATION_B_VAL=" + float_to_string_with_full_precision(_post_op->_act_info.b()); + build_opts.add_option(act_type); + build_opts.add_option(act_a_val); + build_opts.add_option(act_b_val); + } + else if(post_op->type() == experimental::PostOpType::Eltwise_Add) + { + size_t arg_id = 1; + const auto eltwise_op = slot_prefix + "_ELTWISE_OP=ADD" + "_X_POS_" + support::cpp11::to_string(post_op->prev_dst_pos()); + build_opts.add_option(eltwise_op); + for(const auto &tensor : post_op->arguments()) + { + const auto height = slot_prefix + "_ELTWISE_ARG" + support::cpp11::to_string(arg_id) + "_HEIGHT=" + support::cpp11::to_string((*tensor)->dimension(1)); + const auto width = slot_prefix + "_ELTWISE_ARG" + support::cpp11::to_string(arg_id) + "_WIDTH=" + support::cpp11::to_string((*tensor)->dimension(0)); + build_opts.add_option(height); + build_opts.add_option(width); + ++arg_id; + } + } + } +} + +void PostOpCLKernelUtils::set_post_ops_cl_kernel_name(std::string &kernel_name, const PostOpList &post_ops) const +{ + const auto post_op_sequence = get_post_op_sequence(post_ops); + const auto postfix = std::get<0>(_supported_config.at(post_op_sequence)); + kernel_name += postfix; +} +} // namespace experimental + +} // namespace arm_compute \ No newline at end of file -- cgit v1.2.1