aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/experimental
diff options
context:
space:
mode:
authorSiCongLi <sicong.li@arm.com>2021-10-06 15:25:57 +0100
committerSiCong Li <sicong.li@arm.com>2021-10-28 11:00:52 +0000
commit1af5416917268692fcd4b34b1d7ffebd3a2aea8a (patch)
tree81833ecad401eeb0101fb0d464728df8b699caf8 /arm_compute/core/experimental
parent49956ccf029ff4c1873e3a6702b5bede95d81f7a (diff)
downloadComputeLibrary-1af5416917268692fcd4b34b1d7ffebd3a2aea8a.tar.gz
Add experimental PostOp interface to ClGemmMatrixMultiplyReshapedKernel Part 1
This interface supports the fusion of multiple elementwise operations Partially resolves: COMPMID-4435 Change-Id: If68dd7dd98dcf239fde7cb1f0a4a6d4d1e899a6f Signed-off-by: SiCongLi <sicong.li@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6483 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute/core/experimental')
-rw-r--r--arm_compute/core/experimental/IPostOp.h162
-rw-r--r--arm_compute/core/experimental/Types.h5
2 files changed, 167 insertions, 0 deletions
diff --git a/arm_compute/core/experimental/IPostOp.h b/arm_compute/core/experimental/IPostOp.h
new file mode 100644
index 0000000000..cd6b8fc4cc
--- /dev/null
+++ b/arm_compute/core/experimental/IPostOp.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_EXPERIMENTAL_IPOSTOP
+#define ARM_COMPUTE_EXPERIMENTAL_IPOSTOP
+
+#include <memory>
+#include <numeric>
+#include <vector>
+
+namespace arm_compute
+{
+namespace experimental
+{
+/** Type of Post Op */
+enum class PostOpType
+{
+ Activation,
+ Eltwise_Add,
+};
+/** An ordered sequence of type of Post Ops */
+using PostOpTypeSequence = std::vector<PostOpType>;
+/** An elementwise n-ary operation that can be appended to and fused with (at kernel-level) other operators
+ * It contains:
+ * 1. The attributes of the original operator.
+ * 2. Any additional tensor argument.
+ * 3. The postion of the previous op's dst tensor in its argument list ( @ref prev_dst_pos )
+ *
+ * For example, a series of chained ops:
+ *
+ * div(src1, relu(conv(src0, weights, bias, conv_info), act_info), div_info)
+ *
+ * translates to
+ *
+ * dst = conv(src0, weights, bias, conv_info) // main op
+ * dst = relu(dst, act_info) // previous dst is placed in the first (and only) argument
+ * dst = div(src1, dst, div_info) // previous dst is placed in the second argument
+ *
+ * which in turn translates to:
+ *
+ * main op: conv(src0, weights, bias, conv_info)
+ * post op1: relu(act_info, prev_dst_pos = 0)
+ * post op2: div(div_info, src1, prev_dst_pos = 1)
+ *
+ * NOTE: PostOps do not own any resources pointed to by TensorRelatedT if it's a pointer type
+ * NOTE: If TensorRelatedT points to a resource, IPostOp assumes that resource is valid throughout its lifetime
+ * and the lifetime of its copies. This is almost guaranteed as IPostOp is only meant to be used at configure time
+ * after the ITensor or ITensorInfo objects are already constructed
+ */
+template <typename TensorRelatedT>
+struct IPostOp
+{
+ /** Get the arity of the post op
+ * NOTE: that this is one fewer than the arity of the original op, because we implicitly pass the previous op's dst
+ * tensor as one of the arguments
+ */
+ size_t arity() const
+ {
+ return arguments().size();
+ }
+ /** The position of previous op's dst in current op's argument list */
+ virtual int prev_dst_pos() const = 0;
+ /** The IPostOp type */
+ virtual PostOpType type() const = 0;
+ /** The argument tensors
+ * The order of the argument tensor is strictly preserved
+ */
+ virtual std::vector<TensorRelatedT *> arguments() = 0;
+ virtual std::vector<const TensorRelatedT *> arguments() const = 0;
+ /** Clone method used in cases where PostOps are owned by unique_ptr
+ * NOTE: This performs a shallow copy of the TensorRelatedT if TensorRelatedT points to a resource
+ */
+ virtual std::unique_ptr<IPostOp<TensorRelatedT>> clone() const = 0;
+ virtual ~IPostOp()
+ {
+ }
+};
+
+/** A sequence of PostOps that can be appended to the end of other operators */
+template <typename TensorRelatedT>
+class PostOpList
+{
+public:
+ /** Constructor */
+ PostOpList() = default;
+ /** Destructor */
+ ~PostOpList() = default;
+ PostOpList(const PostOpList &other)
+ {
+ for(const auto &op : other._post_ops)
+ {
+ this->_post_ops.push_back(op->clone());
+ }
+ }
+ PostOpList &operator=(const PostOpList &other)
+ {
+ PostOpList tmp{ other };
+ std::swap(tmp, *this);
+ return *this;
+ }
+ PostOpList(PostOpList &&other) = default;
+ PostOpList &operator=(PostOpList &&other) = default;
+
+ /** Add a new post op at the end of the list */
+ template <typename OpT, typename... Args>
+ void push_back_op(Args &&... args)
+ {
+ _post_ops.push_back(std::make_unique<OpT>(std::forward<Args>(args)...));
+ }
+
+ /** Number of post ops */
+ size_t size() const
+ {
+ return _post_ops.size();
+ }
+
+ /** Total number of post ops */
+ size_t total_num_arguments() const
+ {
+ return std::accumulate(_post_ops.begin(), _post_ops.end(), 0, [](size_t op1_arity, const auto & op2)
+ {
+ return op1_arity + op2->arity();
+ });
+ }
+
+ /** Get the underlying post op list */
+ std::vector<std::unique_ptr<IPostOp<TensorRelatedT>>> &get_list()
+ {
+ return _post_ops;
+ }
+ const std::vector<std::unique_ptr<IPostOp<TensorRelatedT>>> &get_list() const
+ {
+ return _post_ops;
+ }
+
+private:
+ std::vector<std::unique_ptr<IPostOp<TensorRelatedT>>> _post_ops{};
+};
+
+} // namespace experimental
+} // namespace arm_compute
+#endif //ARM_COMPUTE_EXPERIMENTAL_IPOSTOP \ No newline at end of file
diff --git a/arm_compute/core/experimental/Types.h b/arm_compute/core/experimental/Types.h
index a478513b1b..c8755dc26c 100644
--- a/arm_compute/core/experimental/Types.h
+++ b/arm_compute/core/experimental/Types.h
@@ -76,6 +76,11 @@ enum TensorType : int32_t
ACL_VEC_COL_SUM = ACL_SRC_4,
ACL_SHIFTS = ACL_SRC_5,
ACL_MULTIPLIERS = ACL_SRC_6,
+
+ // (EXPERIMENTAL_POST_OPS) Post ops arguments begin after everything else
+ EXPERIMENTAL_ACL_POST_OP_ARG = 2048,
+ EXPERIMENTAL_ACL_POST_OP_ARG_FIRST = EXPERIMENTAL_ACL_POST_OP_ARG,
+ EXPERIMENTAL_ACL_POST_OP_ARG_LAST = EXPERIMENTAL_ACL_POST_OP_ARG_FIRST + 1024, // Max number of post op arguments
};
namespace experimental