aboutsummaryrefslogtreecommitdiff
path: root/src/dynamic_fusion/sketch
diff options
context:
space:
mode:
Diffstat (limited to 'src/dynamic_fusion/sketch')
-rw-r--r--src/dynamic_fusion/sketch/ArgumentPack.h242
-rw-r--r--src/dynamic_fusion/sketch/OperatorAttributes.cpp63
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuComponentServices.h54
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelArgument.cpp37
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h128
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp125
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h104
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp291
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h143
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp69
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.h88
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h126
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp87
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.h77
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.cpp172
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.h111
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuOperatorProperties.h54
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp55
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp76
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h111
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h252
-rw-r--r--src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h66
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/GpuKernelComponentFactory.h64
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h119
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/Types.h52
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp152
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h147
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp57
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h102
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp255
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp109
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h135
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h137
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp400
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h113
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp113
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h85
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp297
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h92
-rw-r--r--src/dynamic_fusion/sketch/utils/DependencyGraph.h658
40 files changed, 5618 insertions, 0 deletions
diff --git a/src/dynamic_fusion/sketch/ArgumentPack.h b/src/dynamic_fusion/sketch/ArgumentPack.h
new file mode 100644
index 0000000000..f118d7d851
--- /dev/null
+++ b/src/dynamic_fusion/sketch/ArgumentPack.h
@@ -0,0 +1,242 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_ARGUMENTPACK
+#define SRC_DYNAMIC_FUSION_SKETCH_ARGUMENTPACK
+
+#include "arm_compute/core/experimental/Types.h"
+#include <unordered_map>
+#include <vector>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** This is a generic class that packs the arguments of an operator. For now, it is only used for tensor-related types
+ * Examples of "tensor-related types": @ref ITensorInfo, @ref ITensor, @ref ICLTensor
+ *
+ * The argument id is the position of the argument within the pack, and is represented by @ref TensorType
+ *
+ * @tparam T Tensor-related type
+ */
+template <typename T>
+class ArgumentPack
+{
+public:
+ /** @ref TensorType encodes the position of a tensor argument within the pack */
+ using Id = TensorType;
+ /** A single argument element within the pack
+ * It contains either a const pointer or a non-const pointer to the Tensor-related type T, but never at the same time
+ */
+ struct PackElement
+ {
+ PackElement() = default;
+ PackElement(const PackElement &elem) = default;
+ PackElement &operator=(const PackElement &elem) = default;
+ PackElement(PackElement &&elem) = default;
+ PackElement &operator=(PackElement &&elem) = default;
+ PackElement(Id id, T *tensor)
+ : id(id), tensor(tensor), ctensor(nullptr)
+ {
+ }
+ PackElement(Id id, const T *ctensor)
+ : id(id), tensor(nullptr), ctensor(ctensor)
+ {
+ }
+
+ Id id{ ACL_UNKNOWN }; /**< Argument id within the pack */
+ T *tensor{ nullptr }; /**< Non-const pointer to tensor-related object */
+ const T *ctensor
+ {
+ nullptr
+ }; /**< Const pointer to tensor-related object */
+ };
+
+public:
+ /** Default constructor */
+ ArgumentPack() = default;
+ /** Destructor */
+ ~ArgumentPack() = default;
+ /** Allow instances of this class to be copy constructed */
+ ArgumentPack<T>(const ArgumentPack<T> &other) = default;
+ /** Allow instances of this class to be copied */
+ ArgumentPack<T> &operator=(const ArgumentPack<T> &other) = default;
+ /** Allow instances of this class to be move constructed */
+ ArgumentPack<T>(ArgumentPack<T> &&other) = default;
+ /** Allow instances of this class to be moved */
+ ArgumentPack<T> &operator=(ArgumentPack<T> &&other) = default;
+ /** Initializer list Constructor */
+ ArgumentPack(const std::initializer_list<PackElement> &l)
+ : _pack{}
+ {
+ for(const auto &e : l)
+ {
+ _pack[e.id] = e;
+ }
+ }
+ /** Add tensor to the pack
+ *
+ * @param[in] id ID of the tensor to add
+ * @param[in] tensor Tensor to add
+ */
+ void add_tensor(Id id, T *tensor)
+ {
+ _pack[id] = PackElement(id, tensor);
+ }
+ /** Add const tensor to the pack
+ *
+ * @param[in] id ID of the tensor to add
+ * @param[in] tensor Tensor to add
+ */
+ void add_const_tensor(Id id, const T *tensor)
+ {
+ _pack[id] = PackElement(id, tensor);
+ }
+ /** Get tensor of a given id from the pack
+ *
+ * @param[in] id ID of tensor to extract
+ *
+ * @return The pointer to the tensor if exist and is non-const else nullptr
+ */
+ T *get_tensor(Id id)
+ {
+ auto it = _pack.find(id);
+ return it != _pack.end() ? it->second.tensor : nullptr;
+ }
+ /** Get constant tensor of a given id
+ *
+ * @param[in] id ID of tensor to extract
+ *
+ * @return The pointer to the tensor (const or not) if exist else nullptr
+ */
+ const T *get_const_tensor(Id id) const
+ {
+ auto it = _pack.find(id);
+ if(it != _pack.end())
+ {
+ return it->second.ctensor != nullptr ? it->second.ctensor : it->second.tensor;
+ }
+ return nullptr;
+ }
+ /** Remove the tensor stored with the given id
+ *
+ * @param[in] id ID of tensor to remove
+ */
+ void remove_tensor(Id id)
+ {
+ _pack.erase(id);
+ }
+ /** Pack size accessor
+ *
+ * @return Number of tensors registered to the pack
+ */
+ size_t size() const
+ {
+ return _pack.size();
+ }
+ /** Checks if pack is empty
+ *
+ * @return True if empty else false
+ */
+ bool empty() const
+ {
+ return _pack.empty();
+ }
+ /** Get the ACL_SRC_* tensors
+ *
+ * @return std::vector<T *>
+ */
+ std::vector<T *> get_src_tensors()
+ {
+ std::vector<T *> src_tensors{};
+ for(int id = static_cast<int>(TensorType::ACL_SRC); id <= static_cast<int>(TensorType::ACL_SRC_END); ++id)
+ {
+ auto tensor = get_tensor(static_cast<TensorType>(id));
+ if(tensor != nullptr)
+ {
+ src_tensors.push_back(tensor);
+ }
+ }
+ return src_tensors;
+ }
+ /** Get the const ACL_SRC_* tensors
+ *
+ * @return std::vector<const T *>
+ */
+ std::vector<const T *> get_const_src_tensors() const
+ {
+ std::vector<const T *> src_tensors{};
+ for(int id = static_cast<int>(TensorType::ACL_SRC); id <= static_cast<int>(TensorType::ACL_SRC_END); ++id)
+ {
+ auto tensor = get_const_tensor(static_cast<TensorType>(id));
+ if(tensor != nullptr)
+ {
+ src_tensors.push_back(tensor);
+ }
+ }
+ return src_tensors;
+ }
+ /** Get the ACL_DST_* tensors
+ *
+ * @return std::vector<T *>
+ */
+ std::vector<T *> get_dst_tensors()
+ {
+ std::vector<T *> dst_tensors{};
+ for(int id = static_cast<int>(TensorType::ACL_DST); id <= static_cast<int>(TensorType::ACL_DST_END); ++id)
+ {
+ auto tensor = get_tensor(static_cast<TensorType>(id));
+ if(tensor != nullptr)
+ {
+ dst_tensors.push_back(tensor);
+ }
+ }
+ return dst_tensors;
+ }
+ /** Get the const ACL_DST_* tensors
+ *
+ * @return std::vector<const T *>
+ */
+ std::vector<const T *> get_const_dst_tensors() const
+ {
+ std::vector<const T *> dst_tensors{};
+ for(int id = static_cast<int>(TensorType::ACL_DST); id <= static_cast<int>(TensorType::ACL_DST_END); ++id)
+ {
+ auto tensor = get_const_tensor(static_cast<TensorType>(id));
+ if(tensor != nullptr)
+ {
+ dst_tensors.push_back(tensor);
+ }
+ }
+ return dst_tensors;
+ }
+
+private:
+ std::unordered_map<int, PackElement> _pack{}; /**< Container with the packed tensors */
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_ARGUMENTPACK */
diff --git a/src/dynamic_fusion/sketch/OperatorAttributes.cpp b/src/dynamic_fusion/sketch/OperatorAttributes.cpp
new file mode 100644
index 0000000000..51ec444587
--- /dev/null
+++ b/src/dynamic_fusion/sketch/OperatorAttributes.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/dynamic_fusion/sketch/OperatorAttributes.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+Conv2dAttributes &Conv2dAttributes::pad(const Padding2D &pad)
+{
+ _pad = pad;
+ return *this;
+}
+Padding2D Conv2dAttributes::pad() const
+{
+ return _pad;
+}
+Conv2dAttributes &Conv2dAttributes::stride(const Size2D &stride)
+{
+ _stride = stride;
+ return *this;
+}
+Size2D Conv2dAttributes::stride() const
+{
+ return _stride;
+}
+Conv2dAttributes &Conv2dAttributes::dilation(const Size2D &dilation)
+{
+ _dilation = dilation;
+ return *this;
+}
+Size2D Conv2dAttributes::dilation() const
+{
+ return _dilation;
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/GpuComponentServices.h b/src/dynamic_fusion/sketch/gpu/GpuComponentServices.h
new file mode 100644
index 0000000000..93881508bb
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuComponentServices.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTSERVICES
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTSERVICES
+
+#include "src/dynamic_fusion/sketch/gpu/components/GpuKernelComponentFactory.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Services that are used throughout the creation phase of workload code
+ */
+class GpuComponentServices
+{
+public:
+ /** Default constructor */
+ GpuComponentServices() = default;
+ /** Get reference to component factory */
+ GpuKernelComponentFactory &component_factory()
+ {
+ return _comp_factory;
+ }
+
+private:
+ GpuKernelComponentFactory _comp_factory{};
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTSERVICES */
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.cpp b/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.cpp
new file mode 100644
index 0000000000..9cecfc2ffd
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+bool operator==(const GpuKernelArgumentInfo &info0, const GpuKernelArgumentInfo &info1)
+{
+ return info0.type == info1.type;
+}
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h b/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h
new file mode 100644
index 0000000000..eb36e91d48
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELARGUMENT
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELARGUMENT
+
+#include "arm_compute/core/TensorInfo.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Contain information required to set up a kernel argument at run time
+ */
+struct GpuKernelArgumentInfo
+{
+ /** Enumerate all the tensor arguments variants used by all kernel implementations. */
+ enum class Type : int
+ {
+ Scalar,
+
+ Vector,
+
+ Image,
+ Image_Reinterpret_As_3D,
+ Image_Export_To_ClImage2D,
+
+ Image_3D, // 3D Tensor represented as a 2D Image + stride_z
+ Image_3D_Export_To_ClImage2D,
+
+ Tensor_3D,
+ Tensor_4D,
+ Tensor_4D_t_Buffer,
+ Tensor_4D_t_Image
+ };
+ /** Default constructor */
+ GpuKernelArgumentInfo() = default;
+ /** Constructor */
+ GpuKernelArgumentInfo(Type type)
+ : type{ type }
+ {
+ }
+ Type type{ Type::Tensor_4D_t_Buffer };
+};
+
+bool operator==(const GpuKernelArgumentInfo &info0, const GpuKernelArgumentInfo &info1);
+
+/** Kernel argument information linked with its corresponding @ref ITensorInfo
+ */
+class GpuKernelArgument
+{
+public:
+ /** Constructor
+ *
+ * @param[in] tensor_info Associated @ref ITensorInfo
+ * @param[in] kernel_arg_info Associated @ref GpuKernelArgumentInfo
+ */
+ GpuKernelArgument(const ITensorInfo &tensor_info,
+ const GpuKernelArgumentInfo &kernel_arg_info)
+ : _tensor_info{ tensor_info },
+ _kernel_arg_info{ kernel_arg_info }
+ {
+ }
+ /** Get workload tensor id */
+ ITensorInfo::Id id() const
+ {
+ return _tensor_info.id();
+ }
+ /** Get associated @ref ITensorInfo */
+ ITensorInfo *tensor_info()
+ {
+ return &_tensor_info;
+ }
+ /** Get associated @ref ITensorInfo */
+ const ITensorInfo *tensor_info() const
+ {
+ return &_tensor_info;
+ }
+ /** Get associated @ref GpuKernelArgumentInfo */
+ GpuKernelArgumentInfo *kernel_argument_info()
+ {
+ return &_kernel_arg_info;
+ }
+ /** Get associated @ref GpuKernelArgumentInfo */
+ const GpuKernelArgumentInfo *kernel_argument_info() const
+ {
+ return &_kernel_arg_info;
+ }
+ /** Check if the associated workload tensor has valid id
+ *
+ * @return true if has valid id
+ * @return false otherwise
+ */
+ bool has_valid_id() const
+ {
+ return _tensor_info.has_valid_id();
+ }
+
+private:
+ TensorInfo _tensor_info{};
+ GpuKernelArgumentInfo _kernel_arg_info{};
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELARGUMENT */
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp
new file mode 100644
index 0000000000..6e6422c957
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "GpuKernelComponentGraph.h"
+
+#include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+namespace
+{
+/** Automatically create memory descriptors for all tensors in the graph
+ *
+ * @param[in] tensors @ref ITensorInfo map
+ * @param[in] graph @ref DependencyGraph of which the @p tensors are a part
+ *
+ * @return MemoryDescriptorMap An assignment map of @ref MemoryDescriptors for each ITensorInfo in the graph
+ */
+MemoryDescriptorMap assign_memory_descriptors(const std::map<ITensorInfo::Id, const ITensorInfo *> tensors, const DependencyGraph &graph)
+{
+ MemoryDescriptorMap mem_map{};
+ for(auto t_id : graph.all_tensors())
+ {
+ const auto &tensor = tensors.at(t_id);
+ // Only global src and dst tensors to the entire component graph are "User" tensors, which are user-specified memories
+ if(is_in(t_id, graph.global_src_tensors()) || is_in(t_id, graph.global_dst_tensors()))
+ {
+ mem_map[t_id] = MemoryDescriptor{ MemoryType::User };
+ }
+ else
+ {
+ AuxMemoryInfo aux_mem_info{ tensor->total_size() };
+ mem_map[t_id] = MemoryDescriptor{ MemoryType::Auxiliary, aux_mem_info };
+ }
+ }
+ return mem_map;
+}
+
+} // namespace
+
+std::vector<DependencyGraph::TensorId> GpuKernelComponentGraph::get_tensor_ids(const std::vector<const ITensorInfo *> tensors)
+{
+ std::vector<DependencyGraph::TensorId> tensor_ids{};
+ std::transform(
+ std::begin(tensors), std::end(tensors),
+ std::back_inserter(tensor_ids),
+ [](const auto & t)
+ {
+ return t->id();
+ });
+ return tensor_ids;
+}
+
+GpuKernelComponentGraph::GpuKernelComponentGraph(GpuComponentServices *services)
+ : _services{ services }, _components{}, _tensors{}, _dependency_graph{}
+{
+}
+
+GpuKernelComponentStream GpuKernelComponentGraph::fuse() const
+{
+ // Obtain memory descriptor map
+ const auto mem_map = assign_memory_descriptors(_tensors, _dependency_graph);
+ /// @note Fusion constraints (for kernel components) are exactly the same as the invariants of @ref GpuKernelComponentGroup
+ /// Fusion can be framed as a mathematical optimization problem:
+ /// Given fusion constraints, find the "best" fusion patterns possible
+ /// "Best" is ill-defined at the moment. For now we define "best" fusion pattern as one
+ /// which results in the least number of fused kernels ( @ref GpuKernelComponentGroup ) at the end
+
+ /// As the first iteration, we offer a sub-optimal algorithm here which ensures all
+ /// constraints are met, but provides no guarantee that the fusion pattern is optimal
+
+ GpuKernelComponentStream stream{ _services, mem_map };
+ // Break down into linear groups of components (constraint 1), preserving topological order
+ const auto linear_graphs = _dependency_graph.topological_partition();
+
+ // Further divide up the linear groups based on rest of the fusion constraints (rely on component group's invariants)
+ for(const auto &graph : linear_graphs)
+ {
+ for(unsigned int i = 0; i < graph.size(); ++i)
+ {
+ const auto comp = _components.at(graph[i].op).get();
+ // Each new linear graph signals a new component group in the stream
+ if(i == 0)
+ {
+ stream.new_component_group();
+ }
+ // If it violates the component group's invariant / fusion constraint, breaks up the stream by inserting a new group
+ bool success = stream.add_component(comp);
+ if(!success)
+ {
+ stream.new_component_group();
+ success = stream.add_component(comp);
+ ARM_COMPUTE_ERROR_ON(!success);
+ }
+ }
+ }
+ return stream;
+}
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h
new file mode 100644
index 0000000000..fbcb2c10ea
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGRAPH
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGRAPH
+
+#include "src/dynamic_fusion/sketch/ArgumentPack.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuComponentServices.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.h"
+#include "src/dynamic_fusion/sketch/utils/DependencyGraph.h"
+
+#include <vector>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+class IGpuKernelComponent;
+
+/** A multi-input (tensors), multi-output (tensors) acyclic directed graph of gpu kernel components
+ * Its main purposes are:
+ * - Perform "graph-level" optimizations like fusion of kernel components (not the fusion of operators)
+ * - Automatically assign memory descriptions @ref MemoryDescriptor of all tensors based on graph topology
+ */
+class GpuKernelComponentGraph
+{
+public:
+ /** Constructor
+ *
+ * @param[in] services @ref GpuComponentServices to be used by the graph
+ */
+ GpuKernelComponentGraph(GpuComponentServices *services);
+ /** Prevent instances of this class from being copy constructed */
+ GpuKernelComponentGraph(const GpuKernelComponentGraph &graph) = delete;
+ /** Prevent instances of this class from being copied */
+ GpuKernelComponentGraph &operator=(const GpuKernelComponentGraph &graph) = delete;
+ /** Allow instances of this class to be move constructed */
+ GpuKernelComponentGraph(GpuKernelComponentGraph &&graph) = default;
+ /** Allow instances of this class to be moved */
+ GpuKernelComponentGraph &operator=(GpuKernelComponentGraph &&graph) = default;
+ /** Create a new component and add it to the component graph
+ * Component id is automatically allocated
+ *
+ * @tparam T Component type
+ * @tparam Args Component argument types
+ *
+ * @param[in] args Component arguments except for component id, which is auto-allocated
+ */
+ template <typename T, typename... Args>
+ void add_new_component(Args &&... args)
+ {
+ auto comp = _services->component_factory().create<T>(std::forward<Args>(args)...);
+ ArgumentPack<ITensorInfo> tensors = comp->tensors();
+ const auto src_tensor_ids = get_tensor_ids(tensors.get_const_src_tensors());
+ const auto dst_tensor_ids = get_tensor_ids(tensors.get_const_dst_tensors());
+ bool success = _dependency_graph.add_operator(comp->id(), src_tensor_ids, dst_tensor_ids);
+ ARM_COMPUTE_ERROR_ON(!success);
+ _components[comp->id()] = std::move(comp);
+ for(auto t : tensors.get_const_src_tensors())
+ {
+ _tensors[t->id()] = t;
+ }
+ for(auto t : tensors.get_const_dst_tensors())
+ {
+ _tensors[t->id()] = t;
+ }
+ }
+ /** Perform component fusion and serialize the graph into a stream of component groups
+ */
+ GpuKernelComponentStream fuse() const;
+
+private:
+ static std::vector<DependencyGraph::TensorId> get_tensor_ids(const std::vector<const ITensorInfo *> tensors);
+ GpuComponentServices *_services;
+ std::map<ComponentId, std::unique_ptr<IGpuKernelComponent>> _components;
+ std::map<ITensorInfo::Id, const ITensorInfo *> _tensors;
+ DependencyGraph _dependency_graph{};
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGRAPH */
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp
new file mode 100644
index 0000000000..3af4c1429d
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp
@@ -0,0 +1,291 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "GpuKernelComponentGroup.h"
+
+#include "arm_compute/core/ITensorInfo.h"
+#include "arm_compute/core/Validate.h"
+#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+bool GpuKernelComponentGroup::add_component(ComponentPtr component)
+{
+ // note: Constraint 1 is guaranteed as a precondition
+ // Constraint 2
+ if(component->type() != GpuComponentType::Output && _components.size() >= max_fused_components)
+ {
+ return false;
+ }
+ // Constraint 3.1: Pattern: (Unfusable + Output)
+ if(!_components.empty() && get_root_component()->type() == GpuComponentType::Unfusable && component->type() != GpuComponentType::Output)
+ {
+ return false;
+ }
+ // Constraint 3.2
+ if(!_components.empty() && (component->type() != GpuComponentType::Simple && component->type() != GpuComponentType::Output))
+ {
+ return false;
+ }
+ // Constraint 3.3: Disallow multiple output components
+ if(!_components.empty() && get_last_component()->type() == GpuComponentType::Output && component->type() == GpuComponentType::Output)
+ {
+ return false;
+ }
+ // Constraint 4
+ if(component->type() != GpuComponentType::Unfusable && component->tensors().get_const_dst_tensors().size() != 1U)
+ {
+ return false;
+ }
+ // Constraint 5
+ if(!_components.empty() && !(get_root_component()->properties() == component->properties()))
+ {
+ return false;
+ }
+ // Constraint 7
+ if(!_components.empty())
+ {
+ const auto root_dst_tensors = get_root_component()->tensors().get_const_dst_tensors();
+ ARM_COMPUTE_ERROR_ON(root_dst_tensors.empty());
+ const auto first_dst_tensor = root_dst_tensors[0];
+ const auto dst_tensors = component->tensors().get_const_dst_tensors();
+ for(const auto &t : root_dst_tensors)
+ {
+ if(detail::have_different_dimensions(t->tensor_shape(), first_dst_tensor->tensor_shape(), 0))
+ {
+ return false;
+ }
+ }
+ for(const auto &t : dst_tensors)
+ {
+ if(detail::have_different_dimensions(t->tensor_shape(), first_dst_tensor->tensor_shape(), 0))
+ {
+ return false;
+ }
+ }
+ }
+ // Constraint 8
+ if(!_components.empty())
+ {
+ const auto root_dst_tensors = get_root_component()->tensors().get_const_dst_tensors();
+ ARM_COMPUTE_ERROR_ON(root_dst_tensors.empty());
+ const auto first_dst_tensor_layout = root_dst_tensors[0]->data_layout();
+ const auto dst_tensors = component->tensors().get_const_dst_tensors();
+ for(const auto &t : root_dst_tensors)
+ {
+ if(t->data_layout() != first_dst_tensor_layout)
+ {
+ return false;
+ }
+ }
+ for(const auto &t : dst_tensors)
+ {
+ if(t->data_layout() != first_dst_tensor_layout)
+ {
+ return false;
+ }
+ }
+ }
+ // Constraint 9
+ if(component->tensors().get_const_dst_tensors().size() >= max_dst_tensors)
+ {
+ return false;
+ }
+ // Constraint 9 corollary
+ if(component->type() == GpuComponentType::Output && _components.size() >= max_fused_components + max_dst_tensors)
+ {
+ return false;
+ }
+ _components.push_back(component);
+ return true;
+}
+
+std::vector<const ITensorInfo *> GpuKernelComponentGroup::get_src_tensors() const
+{
+ if(_components.empty())
+ {
+ return {};
+ }
+ auto src_tensors = _components[0]->tensors().get_const_src_tensors();
+ auto prev_dst_tensor = _components[0]->tensors().get_const_dst_tensors()[0]; // PRE: Only one dst tensor per component
+ for(unsigned int i = 1; i < _components.size(); ++i)
+ {
+ auto cur_src_tensors = _components[i]->tensors().get_const_src_tensors();
+ for(const auto src_tensor : cur_src_tensors)
+ {
+ if(src_tensor->id() == prev_dst_tensor->id())
+ {
+ continue; // Skip "intermediate" tensors. I.e. tensors that are used to link between two components
+ }
+ src_tensors.push_back(src_tensor);
+ }
+ prev_dst_tensor = _components[i]->tensors().get_const_dst_tensors()[0]; // PRE: Only one dst tensor per component
+ }
+
+ return src_tensors;
+}
+
+std::vector<const ITensorInfo *> GpuKernelComponentGroup::get_dst_tensors() const
+{
+ if(_components.empty())
+ {
+ return {};
+ }
+ const auto dst_tensor_ptrs = _components[_components.size() - 1]->tensors().get_const_dst_tensors();
+ std::vector<const ITensorInfo *> dst_tensors;
+ for(auto tensor_ptr : dst_tensor_ptrs)
+ {
+ dst_tensors.push_back(tensor_ptr);
+ }
+ return dst_tensors;
+}
+
+std::vector<const ITensorInfo *> GpuKernelComponentGroup::get_argument_tensors() const
+{
+ std::vector<const ITensorInfo *> arguments;
+ const auto src_tensors = get_src_tensors();
+ const auto dst_tensors = get_dst_tensors();
+ arguments.reserve(src_tensors.size() + dst_tensors.size());
+ arguments.insert(arguments.end(), src_tensors.begin(), src_tensors.end());
+ arguments.insert(arguments.end(), dst_tensors.begin(), dst_tensors.end());
+ return arguments;
+}
+
+GpuKernelComponentGroup::ComponentPtr GpuKernelComponentGroup::get_root_component() const
+{
+ if(empty())
+ {
+ return nullptr;
+ }
+ return _components[0];
+}
+
+GpuKernelComponentGroup::ComponentPtr GpuKernelComponentGroup::get_last_component() const
+{
+ if(empty())
+ {
+ return nullptr;
+ }
+ return _components[_components.size() - 1];
+}
+
+GpuKernelComponentGroup::ComponentPtr GpuKernelComponentGroup::get_previous_component(ComponentId id) const
+{
+ if(empty())
+ {
+ return nullptr;
+ }
+ // Get the index of the requested component
+ size_t ind = 0;
+ for(const auto c : _components)
+ {
+ if(c->id() == id)
+ {
+ break;
+ }
+ ind++;
+ }
+ if(ind == 0 || ind >= _components.size())
+ {
+ return nullptr;
+ }
+ return _components[ind - 1];
+}
+
+bool GpuKernelComponentGroup::is_intermediate_tensor(const ITensorInfo *tensor) const
+{
+ return is_tensor_in(tensor, get_interm_tensors());
+}
+
+size_t GpuKernelComponentGroup::size() const
+{
+ return _components.size();
+}
+bool GpuKernelComponentGroup::empty() const
+{
+ return _components.empty();
+}
+GpuKernelComponentGroup::ComponentPtr &GpuKernelComponentGroup::operator[](size_t index)
+{
+ return _components[index];
+}
+const GpuKernelComponentGroup::ComponentPtr &GpuKernelComponentGroup::operator[](size_t index) const
+{
+ return _components[index];
+}
+typename std::vector<GpuKernelComponentGroup::ComponentPtr>::iterator GpuKernelComponentGroup::begin()
+{
+ return _components.begin();
+}
+typename std::vector<GpuKernelComponentGroup::ComponentPtr>::iterator GpuKernelComponentGroup::end()
+{
+ return _components.end();
+}
+typename std::vector<GpuKernelComponentGroup::ComponentPtr>::const_iterator GpuKernelComponentGroup::begin() const
+{
+ return _components.cbegin();
+}
+typename std::vector<GpuKernelComponentGroup::ComponentPtr>::const_iterator GpuKernelComponentGroup::end() const
+{
+ return _components.cend();
+}
+typename std::vector<GpuKernelComponentGroup::ComponentPtr>::const_iterator GpuKernelComponentGroup::cbegin() const
+{
+ return _components.cbegin();
+}
+typename std::vector<GpuKernelComponentGroup::ComponentPtr>::const_iterator GpuKernelComponentGroup::cend() const
+{
+ return _components.cend();
+}
+
+std::vector<const ITensorInfo *> GpuKernelComponentGroup::get_interm_tensors() const
+{
+ std::vector<const ITensorInfo *> interm_tensors{};
+ for(unsigned int i = 0; i + 1 < _components.size(); ++i)
+ {
+ auto interm_tensor = _components[i]->tensors().get_const_dst_tensors()[0];
+ interm_tensors.push_back(interm_tensor); // PRE: Only one dst tensor per component
+ }
+
+ return interm_tensors;
+}
+
+bool GpuKernelComponentGroup::is_tensor_in(const ITensorInfo *tensor, const std::vector<const ITensorInfo *> tensors)
+{
+ for(auto t : tensors)
+ {
+ if(tensor->id() == t->id())
+ {
+ return true;
+ }
+ }
+ return false;
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h
new file mode 100644
index 0000000000..4c9d940594
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGROUP
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGROUP
+
+#include "components/Types.h"
+
+#include <cstdint>
+#include <cstdlib>
+#include <vector>
+
+namespace arm_compute
+{
+/** Forward declaration */
+class ITensorInfo;
+namespace experimental
+{
+namespace dynamic_fusion
+{
+class IGpuKernelComponent;
+/** A group of gpu kernel components to be fused together
+ * PRECONDITIONS:
+ * 1. Fusion is limited to a linear sequence of kernel components
+ * INVARIANTS:
+ * @note These preconditions and invariants are exactly the same as fusion constraints for kernel components
+ * 2. Max number of components that can be fused is @ref GpuKernelComponentGroup::max_fused_components (
+ * excluding any output or input (if any) components.
+ * The max number of output components are bound by the maximum number of dst tensors allowed for a component / component group
+ * )
+ * 3. The fusion is subject to the pattern: (Complex + Simple * | Simple + Simple * | Un-fusable) + Output?
+ * 4. All components but unfusable, have exactly 1 dst tensor
+ * 5. All fused components share the same @ref IGpuKernelComponent::Properties ( @ref UnitWorkloadStage etc. )
+ * 6. All fused components share the same tunable parameters like tile size
+ * 7. All fused components share the same dst tensor shape
+ * 8. All fused components' tensors share the same @ref DataLayout
+ * 9. Maximum number of dst tensors allowed for an component (including unfusable) / component group is @ref GpuKernelComponentGroup::max_dst_tensors
+ * This has an impact on the total number of components supported, which = max_fused_components + max_dst_tensors
+ */
+class GpuKernelComponentGroup
+{
+public:
+ using ComponentPtr = IGpuKernelComponent *;
+ /** Maximum number of components that can be fused into the same component group
+ */
+ static constexpr size_t max_fused_components = 64;
+ /** Maximum number of dst tensors allowed for a component / component
+ */
+ static constexpr size_t max_dst_tensors = 8;
+
+public:
+ /** Default constructor */
+ GpuKernelComponentGroup() = default;
+ /** Allow instances of this class to be copy constructed */
+ GpuKernelComponentGroup(const GpuKernelComponentGroup &) = default;
+ /** Allow instances of this class to be copied */
+ GpuKernelComponentGroup &operator=(const GpuKernelComponentGroup &) = default;
+ /** Allow instances of this class to be move constructed */
+ GpuKernelComponentGroup(GpuKernelComponentGroup &&) = default;
+ /** Allow instances of this class to be moved */
+ GpuKernelComponentGroup &operator=(GpuKernelComponentGroup &&) = default;
+ /** Add a component pointer into the group
+ * If the operation fails, then no change is made to the group
+ *
+ * @param[in] component Pointer to the component to be added
+ *
+ * @return true If the operation is successful
+ * @return false If the operation fails
+ */
+ bool add_component(ComponentPtr component);
+ /** Get source tensors of this group */
+ std::vector<const ITensorInfo *> get_src_tensors() const;
+ /** Get destination tensors of this group */
+ std::vector<const ITensorInfo *> get_dst_tensors() const;
+ /** Get tensor argument of this group
+ * A tensor is an argument if it is a source or destination tensor to the group
+ */
+ std::vector<const ITensorInfo *> get_argument_tensors() const;
+ /** Get the root (first) component of this group */
+ ComponentPtr get_root_component() const;
+ /** Get the last component of this group */
+ ComponentPtr get_last_component() const;
+ /** Get the previous component to the component with id @p id
+ *
+ * @param[in] id Component id of the component whose previous component is of concern
+ *
+ * @return ComponentPtr Pointer to the previous component of the one identified by @p id
+ */
+ ComponentPtr get_previous_component(ComponentId id) const;
+ /** Check if a @ref ITensorInfo is an "intermediate" tensor of the group
+ *
+ * An intermediate tensor is any tensor that is not an argument.
+ *
+ * @param[in] tensor @ref ITensorInfo to be looked up
+ *
+ * @return true If @p tensor is an intermediate tensor
+ * @return false Otherwise
+ */
+ bool is_intermediate_tensor(const ITensorInfo *tensor) const;
+ /** Get the number of components within the group */
+ size_t size() const;
+ /** Check if the component group is empty */
+ bool empty() const;
+ ComponentPtr &operator[](size_t index);
+ const ComponentPtr &operator[](size_t index) const;
+ typename std::vector<ComponentPtr>::iterator begin();
+ typename std::vector<ComponentPtr>::iterator end();
+ typename std::vector<ComponentPtr>::const_iterator begin() const;
+ typename std::vector<ComponentPtr>::const_iterator end() const;
+ typename std::vector<ComponentPtr>::const_iterator cbegin() const;
+ typename std::vector<ComponentPtr>::const_iterator cend() const;
+
+private:
+ std::vector<const ITensorInfo *> get_interm_tensors() const;
+
+ static bool is_tensor_in(const ITensorInfo *tensor, const std::vector<const ITensorInfo *> tensors);
+
+ std::vector<ComponentPtr> _components{};
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGROUP */
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp
new file mode 100644
index 0000000000..aac84b6c59
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "GpuKernelComponentStream.h"
+
+#include "src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h"
+#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+GpuKernelComponentStream::GpuKernelComponentStream(GpuComponentServices *services, const MemoryDescriptorMap &mem_map)
+ : _services{ services }, _component_groups{}, _mem_map{ mem_map }
+{
+}
+
+GpuWorkloadSourceCode GpuKernelComponentStream::write_workload_code()
+{
+ GpuWorkloadSourceCode source_code;
+ // Traverse through component groups and assemble workload together
+ for(auto && group : _component_groups)
+ {
+ // Write kernel code
+ GpuLogicalKernel logical_kernel(_services, group);
+ const GpuKernelSourceCode kernel_code = logical_kernel.write_kernel_code();
+ // The whole unit workload stage is determined by the root component
+ const auto unit_workload_stage = group.get_root_component()->properties().stage();
+ source_code.add_unit_workload(kernel_code, unit_workload_stage, _mem_map);
+ }
+ return source_code;
+}
+
+void GpuKernelComponentStream::new_component_group()
+{
+ _component_groups.emplace_back();
+}
+
+bool GpuKernelComponentStream::add_component(IGpuKernelComponent *component)
+{
+ ARM_COMPUTE_ERROR_ON(_component_groups.empty());
+ return _component_groups.back().add_component(component);
+}
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.h b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.h
new file mode 100644
index 0000000000..cbaa7c297b
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTSTREAM
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTSTREAM
+
+#include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+class GpuComponentServices;
+class IGpuKernelComponent;
+
+/** A linear sequence of component groups serialized from the @ref GpuKernelComponentGraph
+ * Each component group in the stream denotes a complete kernel that may consist of multiple components
+ *
+ * The main purposes of this class are:
+ * - Facilitate component fusion algorithm by allowing insertions of new component groups into the stream
+ * - Invoke kernel writer and assemble the final @ref GpuWorkloadSourceCode
+ */
+class GpuKernelComponentStream
+{
+public:
+ /** Constructor
+ *
+ * @param[in] services @ref GpuComponentServices to be used throughout the stream
+ * @param[in] mem_map @ref MemoryDescriptor map used to assemble the @ref GpuWorkloadSourceCode
+ */
+ GpuKernelComponentStream(GpuComponentServices *services, const MemoryDescriptorMap &mem_map);
+ /** Allow instances of this class to be copy constructed */
+ GpuKernelComponentStream(const GpuKernelComponentStream &stream) = default;
+ /** Allow instances of this class to be copied */
+ GpuKernelComponentStream &operator=(const GpuKernelComponentStream &stream) = default;
+ /** Allow instances of this class to be move constructed */
+ GpuKernelComponentStream(GpuKernelComponentStream &&stream) = default;
+ /** Allow instances of this class to be moved */
+ GpuKernelComponentStream &operator=(GpuKernelComponentStream &&stream) = default;
+ /** Generate and assemble @ref GpuWorkloadSourceCode from the stream */
+ GpuWorkloadSourceCode write_workload_code();
+ /** Insert a new component group in the stream.
+ * Subsequent components are added to this group until end of stream or the next new_component_group is called
+ */
+ void new_component_group();
+ /** Add a component to the previously created component group
+ * Throw an error if no component group is present in the stream
+ *
+ * @param[in] component Component to be inserted
+ *
+ * @return true If the operation is successful
+ * @return false Otherwise
+ */
+ bool add_component(IGpuKernelComponent *component);
+
+private:
+ GpuComponentServices *_services;
+ std::vector<GpuKernelComponentGroup> _component_groups{};
+ MemoryDescriptorMap _mem_map{};
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTSTREAM */
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h b/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h
new file mode 100644
index 0000000000..7479328d7b
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE
+
+#include "arm_compute/core/CL/CLCompileContext.h"
+#include "arm_compute/core/Window.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+
+#include <map>
+#include <string>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** The argument list of a @ref GpuKernelSourceCode */
+using GpuKernelArgumentList = std::map<ITensorInfo::Id, GpuKernelArgument>;
+
+/** Container of kernel code to be compiled and run in a @ref GpuUnitWorkload
+ */
+class GpuKernelSourceCode
+{
+public:
+ /** Set kernel name */
+ GpuKernelSourceCode &name(const std::string &n)
+ {
+ _name = n;
+ return *this;
+ }
+ /** Set kernel code */
+ GpuKernelSourceCode &code(const std::string &c)
+ {
+ _code = c;
+ return *this;
+ }
+ /** Set kernel config id string */
+ GpuKernelSourceCode &config_id(const std::string &c_id)
+ {
+ _config_id = c_id;
+ return *this;
+ }
+ /** Set kernel build options */
+ GpuKernelSourceCode &build_options(const CLBuildOptions &b_options)
+ {
+ _build_options = b_options;
+ return *this;
+ }
+ /** Set kernel execution window */
+ GpuKernelSourceCode &window(const Window &window)
+ {
+ _window = window;
+ return *this;
+ }
+ /** Set kernel argument list */
+ GpuKernelSourceCode &arguments(const GpuKernelArgumentList &arguments)
+ {
+ _arguments = arguments;
+ return *this;
+ }
+ /** Get kernel name */
+ std::string name() const
+ {
+ return _name;
+ }
+ /** Get kernel code */
+ std::string code() const
+ {
+ return _code;
+ }
+ /** Get kernel config id string */
+ std::string config_id() const
+ {
+ return _config_id;
+ }
+ /** Get kernel build options */
+ const CLBuildOptions &build_options() const
+ {
+ return _build_options;
+ }
+ /** Get kernel execution window */
+ const Window &window() const
+ {
+ return _window;
+ }
+ /** Get kernel argument list */
+ const GpuKernelArgumentList &arguments() const
+ {
+ return _arguments;
+ }
+
+private:
+ std::string _name{};
+ std::string _code{};
+ std::string _config_id{};
+ CLBuildOptions _build_options{};
+ Window _window{};
+ GpuKernelArgumentList _arguments{};
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE */
diff --git a/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp
new file mode 100644
index 0000000000..7746f8bbf3
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "GpuLogicalKernel.h"
+
+#include "arm_compute/core/experimental/Types.h"
+
+#include "src/dynamic_fusion/sketch/ArgumentPack.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuComponentServices.h"
+#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+GpuLogicalKernel::GpuLogicalKernel(GpuComponentServices *services, const GpuKernelComponentGroup &components)
+ : _services{ services }, _comp_group{ components }, _store_components{}
+{
+ add_load_store();
+}
+
+GpuKernelSourceCode GpuLogicalKernel::write_kernel_code()
+{
+ GpuKernelSourceCode code;
+ ClTemplateWriter writer{ _comp_group };
+
+ code.name(writer.get_name());
+ code.code(writer.get_code());
+ code.arguments(writer.get_tensors());
+ code.build_options(writer.get_build_options());
+ code.config_id(writer.get_config_id());
+ code.window(writer.get_window());
+
+ return code;
+}
+
+void GpuLogicalKernel::add_load_store()
+{
+ const auto dst_tensors = _comp_group.get_dst_tensors();
+ // Each dst tensor from the component group requires exactly one store component
+ for(const auto &dst_tensor : dst_tensors)
+ {
+ ArgumentPack<ITensorInfo> tensors;
+ // Pass same destination tensor to both source and destination of the store component
+ // In other words, the addition of a store component does not create a new dst tensor
+ // This way we avoid the issue of the dst tensor of the component group differs from that of a logical kernel
+ // This may seem to violate the acyclic-ness of the component graph. But it is fine because at the point of
+ // the construction of the logical kernel, we do not need a graph representation of components anymore
+ // (the graph has been serialized)
+ tensors.add_const_tensor(ACL_SRC_0, dst_tensor);
+ tensors.add_const_tensor(ACL_DST_0, dst_tensor);
+
+ auto store = _services->component_factory().create<ClComponentStore>(
+ _comp_group.get_root_component()->properties(), // Store component share the same properties as that of the root component
+ tensors);
+ _store_components.push_back(std::move(store));
+ auto success = _comp_group.add_component(_store_components.back().get());
+ ARM_COMPUTE_ERROR_ON(!success); // It's guaranteed that any load store insertion should be successful
+ }
+}
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.h b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.h
new file mode 100644
index 0000000000..4ce4443f60
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPULOGICALKERNEL
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPULOGICALKERNEL
+
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h"
+
+#include <memory>
+#include <vector>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Forward declaration */
+class GpuComponentServices;
+class IGpuKernelComponent;
+
+/** A wrapper-processor of a @ref GpuKernelComponentGroup
+ * It adds the load (if any) and store components to the component group
+ * The @ref GpuLogicalKernel represents a complete kernel, and can proceed to invoke any kernel writer to generate the full kernel code
+ */
+class GpuLogicalKernel
+{
+public:
+ /** Constructor
+ *
+ * @param[in] services @ref GpuComponentServices to be used
+ * @param[in] components Component group from which this logical kernel is initialized
+ */
+ explicit GpuLogicalKernel(GpuComponentServices *services, const GpuKernelComponentGroup &components);
+ /** Allow instances of this class to be copy constructed */
+ GpuLogicalKernel(const GpuLogicalKernel &) = default;
+ /** Allow instances of this class to be copied */
+ GpuLogicalKernel &operator=(const GpuLogicalKernel &) = default;
+ /** Allow instances of this class to be move constructed */
+ GpuLogicalKernel(GpuLogicalKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ GpuLogicalKernel &operator=(GpuLogicalKernel &&) = default;
+ /** Generate a @ref GpuKernelSourceCode */
+ GpuKernelSourceCode write_kernel_code();
+
+private:
+ void add_load_store();
+
+ GpuComponentServices *_services;
+ GpuKernelComponentGroup _comp_group{};
+ std::vector<std::unique_ptr<IGpuKernelComponent>> _store_components{};
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPULOGICALKERNEL */
diff --git a/src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.cpp b/src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.cpp
new file mode 100644
index 0000000000..e8ef835405
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.cpp
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.h"
+
+#include "arm_compute/core/Validate.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+namespace
+{
+std::vector<DependencyGraph::TensorId> get_tensor_ids(const std::vector<const ITensorInfo *> tensors)
+{
+ std::vector<DependencyGraph::TensorId> tensor_ids{};
+ std::transform(
+ std::begin(tensors), std::end(tensors),
+ std::back_inserter(tensor_ids),
+ [](const auto & t)
+ {
+ return t->id();
+ });
+ return tensor_ids;
+}
+
+} // namespace
+
+Operator::Operator(OperatorId id, GpuOperatorType operator_type, const ArgumentPack<ITensorInfo> &tensors)
+ : _id{ id }, _operator_type{ operator_type }, _tensors{ tensors }
+{
+}
+
+OperatorId Operator::id() const
+{
+ return _id;
+}
+
+GpuOperatorType Operator::operator_type() const
+{
+ return _operator_type;
+}
+
+ArgumentPack<ITensorInfo> Operator::tensors() const
+{
+ return _tensors;
+}
+
+bool GpuOperatorGroup::try_add_operator(const Operator &op) const
+{
+ const auto src_tensor_ids = get_tensor_ids(op.tensors().get_const_src_tensors());
+ const auto dst_tensor_ids = get_tensor_ids(op.tensors().get_const_dst_tensors());
+ // Constraint 1
+ if(!_graph.try_add_operator_as_linear(op.id(), src_tensor_ids, dst_tensor_ids))
+ {
+ return false;
+ }
+ // Constraint 2
+ if(_operators.size() >= max_fused_operators)
+ {
+ return false;
+ }
+ // Constraint 3.1: Pattern: (Unfusable)
+ if(_operators.size() > 0 && get_root_operator()->operator_type() == GpuOperatorType::Unfusable)
+ {
+ return false;
+ }
+ // Constraint 3.2
+ if(_operators.size() > 0 && (op.operator_type() != GpuOperatorType::Simple))
+ {
+ return false;
+ }
+ // Constraint 4
+ if(op.operator_type() != GpuOperatorType::Unfusable && op.tensors().get_const_dst_tensors().size() != 1U)
+ {
+ return false;
+ }
+ // Constraint 5
+ if(_operators.size() > 0)
+ {
+ const auto root_dst_tensors = get_root_operator()->tensors().get_const_dst_tensors();
+ ARM_COMPUTE_ERROR_ON(root_dst_tensors.empty());
+ const auto first_dst_tensor = root_dst_tensors[0];
+ const auto dst_tensors = op.tensors().get_const_dst_tensors();
+ for(const auto &t : root_dst_tensors)
+ {
+ if(detail::have_different_dimensions(t->tensor_shape(), first_dst_tensor->tensor_shape(), 0))
+ {
+ return false;
+ }
+ }
+ for(const auto &t : dst_tensors)
+ {
+ if(detail::have_different_dimensions(t->tensor_shape(), first_dst_tensor->tensor_shape(), 0))
+ {
+ return false;
+ }
+ }
+ }
+ // Constraint 6
+ if(_operators.size() > 0)
+ {
+ const auto root_dst_tensors = get_root_operator()->tensors().get_const_dst_tensors();
+ ARM_COMPUTE_ERROR_ON(root_dst_tensors.empty());
+ const auto first_dst_tensor_layout = root_dst_tensors[0]->data_layout();
+ const auto dst_tensors = op.tensors().get_const_dst_tensors();
+ for(const auto &t : root_dst_tensors)
+ {
+ if(t->data_layout() != first_dst_tensor_layout)
+ {
+ return false;
+ }
+ }
+ for(const auto &t : dst_tensors)
+ {
+ if(t->data_layout() != first_dst_tensor_layout)
+ {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+void GpuOperatorGroup::add_operator(const Operator &op)
+{
+ ARM_COMPUTE_ERROR_ON(!try_add_operator(op));
+ const auto src_tensor_ids = get_tensor_ids(op.tensors().get_const_src_tensors());
+ const auto dst_tensor_ids = get_tensor_ids(op.tensors().get_const_dst_tensors());
+ _graph.add_operator_as_linear(op.id(), src_tensor_ids, dst_tensor_ids);
+ _operators[op.id()] = op;
+}
+Operator GpuOperatorGroup::new_operator(const GpuOperatorType &operator_type, const ArgumentPack<ITensorInfo> &tensors) const
+{
+ auto new_id = static_cast<OperatorId>(_operators.size());
+ return Operator{ new_id, operator_type, tensors };
+}
+const Operator *GpuOperatorGroup::get_root_operator() const
+{
+ const auto roots = _graph.get_root_ops();
+ ARM_COMPUTE_ERROR_ON(roots.size() > 1);
+ if(roots.empty())
+ {
+ return nullptr;
+ }
+ return &_operators.at(roots[0]);
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.h b/src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.h
new file mode 100644
index 0000000000..35abe6c543
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUOPERATORGROUP
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUOPERATORGROUP
+
+#include "arm_compute/core/ITensorInfo.h"
+#include "src/dynamic_fusion/sketch/ArgumentPack.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuOperatorProperties.h"
+#include "src/dynamic_fusion/sketch/utils/DependencyGraph.h"
+#include <map>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+using OperatorId = DependencyGraph::OperatorId;
+
+/** An operator for the sole purpose of validating fusion
+ */
+class Operator
+{
+public:
+ /** Default constructor */
+ Operator() = default;
+ /** Get Operator Id */
+ OperatorId id() const;
+ /** Get operator type */
+ GpuOperatorType operator_type() const;
+ /** Get tensor arguments */
+ ArgumentPack<ITensorInfo> tensors() const;
+ friend class GpuOperatorGroup;
+
+private:
+ Operator(OperatorId id, GpuOperatorType operator_type, const ArgumentPack<ITensorInfo> &tensors);
+ OperatorId _id{};
+ GpuOperatorType _operator_type{};
+ ArgumentPack<ITensorInfo> _tensors{};
+};
+
+/** A linear sequence of operators to be fused in a workload
+ * For the time being, this class is only used for validating operator fusion
+ * INVARIANTS:
+ * @note These invariants are exactly the same as operator fusion constraints
+ * 1. Fusion is limited to a linear sequence of operators
+ * 2. Max number of operators that can be fused is @ref GpuOperatorGroup::max_fused_operators
+ * 3. The fusion is subject to the pattern: Complex + Simple * | Simple + Simple * | Un-fusable
+ * 4. All operator but unfusable, have exactly 1 dst tensor
+ * 5. All fused operators share the same dst tensor shape
+ * 6. All fused operators' tensors share the same @ref DataLayout
+ */
+class GpuOperatorGroup
+{
+public:
+ static constexpr size_t max_fused_operators = 32;
+ /** Try adding (without actually adding) an operator to the group
+ *
+ * @param[in] op Operator to be added
+ *
+ * @return true If @p op can be added while maintaining the invariants
+ * @return false Otherwise
+ */
+ bool try_add_operator(const Operator &op) const;
+ /** Add an operator to the group
+ *
+ * @param[in] op Operator to be added
+ */
+ void add_operator(const Operator &op);
+ /** Create a new operator
+ *
+ * @param[in] operator_type @ref GpuOperatorType of the new operator
+ * @param[in] tensors Tensor arguments to the new operator
+ *
+ * @return Operator
+ */
+ Operator new_operator(const GpuOperatorType &operator_type, const ArgumentPack<ITensorInfo> &tensors) const;
+ /** Get the "root operator" of the group, which is the first operator in a linear sequence
+ * @return const Operator* Pointer to the root operator
+ */
+ const Operator *get_root_operator() const;
+
+private:
+ DependencyGraph _graph{};
+ std::map<OperatorId, Operator> _operators{};
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUOPERATORGROUP */
diff --git a/src/dynamic_fusion/sketch/gpu/GpuOperatorProperties.h b/src/dynamic_fusion/sketch/gpu/GpuOperatorProperties.h
new file mode 100644
index 0000000000..c77697c343
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuOperatorProperties.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUOPERATORPROPERTIES
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUOPERATORPROPERTIES
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Contain properties common to all operator types */
+
+/** Operator type in the context of fusion
+ */
+enum class GpuOperatorType
+{
+ /** Simple operators are operators that:
+ * 1. Have a 1-to-1 mapping between the input elements and output elements, like elementwise
+ * 2. Have exactly 1 output
+ */
+ Simple,
+ /** Complex operators are operators that are not simple but are still fusable with simple ones
+ */
+ Complex,
+ /** Unfusable operators are operators that cannot be fused with any other types of operators
+ */
+ Unfusable
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUOPERATORPROPERTIES */
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp
new file mode 100644
index 0000000000..623bf351f8
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h"
+#include "arm_compute/core/CL/CLCompileContext.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+GpuWorkloadContext::GpuWorkloadContext(CLCompileContext *cl_compile_ctx)
+ : _gpu_language{ GpuLanguage::OpenCL }, _cl_compile_ctx{ cl_compile_ctx }
+{
+}
+
+GpuTarget GpuWorkloadContext::gpu_target() const
+{
+ return _cl_compile_ctx->get_gpu_target();
+}
+
+GpuLanguage GpuWorkloadContext::gpu_language() const
+{
+ return _gpu_language;
+}
+
+const CLCompileContext *GpuWorkloadContext::cl_compile_context() const
+{
+ return _cl_compile_ctx;
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp
new file mode 100644
index 0000000000..ce7cf1e908
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+GpuWorkloadSketch::GpuWorkloadSketch(Context *context)
+ : _impl{ std::make_unique<Implementation>(context) }
+{
+}
+GpuWorkloadSketch::~GpuWorkloadSketch()
+{
+}
+
+const GpuWorkloadSketch::Context *GpuWorkloadSketch::gpu_context() const
+{
+ return _impl->context();
+}
+
+TensorInfo GpuWorkloadSketch::create_tensor_info(const ITensorInfo &tensor_info)
+{
+ TensorInfo tensor{ tensor_info };
+ tensor.set_id(allocate_new_tensor_id());
+ return tensor;
+}
+
+TensorInfo GpuWorkloadSketch::create_tensor_info()
+{
+ TensorInfo tensor{};
+ tensor.set_id(allocate_new_tensor_id());
+ return tensor;
+}
+
+ITensorInfo::Id GpuWorkloadSketch::allocate_new_tensor_id()
+{
+ return _impl->allocate_new_tensor_id();
+}
+
+GpuWorkloadSketch::Implementation &GpuWorkloadSketch::implementation()
+{
+ return *_impl;
+}
+const GpuWorkloadSketch::Implementation &GpuWorkloadSketch::implementation() const
+{
+ return *_impl;
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h
new file mode 100644
index 0000000000..3997395c98
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSKETCHIMPL
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSKETCHIMPL
+
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuComponentServices.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Internal implementation of @ref GpuWorkloadSketch */
+class GpuWorkloadSketch::Implementation
+{
+public:
+ /** Constructor
+ *
+ * @param[in] context global workload creation context
+ */
+ explicit Implementation(
+ Context *context)
+ : _context{ context },
+ _comp_services{},
+ _component_graph{ &_comp_services },
+ _operator_group{}
+ {
+ }
+ /** Prevent instances of this class from being copy constructed */
+ Implementation(const Implementation &impl) = delete;
+ /** Prevent instances of this class from being copied */
+ Implementation &operator=(const Implementation &impl) = delete;
+ /** Allow instances of this class to be move constructed */
+ Implementation(Implementation &&impl) = default;
+ /** Allow instances of this class to be moved */
+ Implementation &operator=(Implementation &&impl) = default;
+ /** Get workload context */
+ const Context *context() const
+ {
+ return _context;
+ }
+ /** Get component graph */
+ const GpuKernelComponentGraph &component_graph() const
+ {
+ return _component_graph;
+ }
+ /** Get component graph */
+ GpuKernelComponentGraph &component_graph()
+ {
+ return _component_graph;
+ }
+ /** Get operator group */
+ const GpuOperatorGroup &operator_group() const
+ {
+ return _operator_group;
+ }
+ /** Get operator group */
+ GpuOperatorGroup &operator_group()
+ {
+ return _operator_group;
+ }
+ ITensorInfo::Id allocate_new_tensor_id()
+ {
+ return ++_next_id;
+ }
+ /** Generate @ref GpuWorkloadSourceCode from the workload sketch
+ * @note The sketch must be valid. Any error encountered during the building of the code will be thrown.
+ *
+ * @return GpuWorkloadSourceCode The generated workload code
+ */
+ GpuWorkloadSourceCode generate_source_code() const
+ {
+ return component_graph().fuse().write_workload_code();
+ }
+
+private:
+ Context *_context;
+ GpuComponentServices _comp_services;
+ GpuKernelComponentGraph _component_graph;
+ GpuOperatorGroup _operator_group;
+ ITensorInfo::Id _next_id{ ITensorInfo::invalid_tensor_id };
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSKETCHIMPL */
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h
new file mode 100644
index 0000000000..2375f5c6c6
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h
@@ -0,0 +1,252 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE
+
+#include "arm_compute/core/experimental/Types.h"
+#include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Uniquely identifies a @ref GpuUnitWorkload within a @ref GpuWorkloadSourceCode */
+using UnitWorkloadId = int32_t;
+
+/** Describes all the info related to a kernel in order to:
+ * - be used by runtime to configure gpu kernel argument
+ * - be used by memory managers to allocate required memory
+ */
+class GpuWorkloadArgument
+{
+public:
+ /** Default constructor */
+ GpuWorkloadArgument() = default;
+ /** Constructor
+ *
+ * @param[in] tensor_info @ref ITensorInfo of the workload argument
+ * @param[in] mem_desc @ref MemoryDescriptor of the workload argument
+ * @param[in] kernel_arg_info @ref GpuKernelArgumentInfo of the workload argument
+ */
+ GpuWorkloadArgument(const ITensorInfo &tensor_info,
+ const MemoryDescriptor &mem_desc,
+ const GpuKernelArgumentInfo &kernel_arg_info)
+ : _tensor_info{ tensor_info },
+ _mem_desc{ mem_desc },
+ _kernel_arg_info{ kernel_arg_info }
+ {
+ }
+ /** Get tensor id within workload */
+ ITensorInfo::Id id() const
+ {
+ return _tensor_info.id();
+ }
+ /** Get @ref ITensorInfo of the argument */
+ ITensorInfo *tensor_info()
+ {
+ return &_tensor_info;
+ }
+ /** Get @ref ITensorInfo of the argument */
+ const ITensorInfo *tensor_info() const
+ {
+ return &_tensor_info;
+ }
+ /** Get @ref MemoryDescriptor of the argument */
+ MemoryDescriptor *memory_descriptor()
+ {
+ return &_mem_desc;
+ }
+ /** Get @ref MemoryDescriptor of the argument */
+ const MemoryDescriptor *memory_descriptor() const
+ {
+ return &_mem_desc;
+ }
+ /** Get @ref GpuKernelArgumentInfo of the argument */
+ GpuKernelArgumentInfo *kernel_argument_info()
+ {
+ return &_kernel_arg_info;
+ }
+ /** Get @ref GpuKernelArgumentInfo of the argument */
+ const GpuKernelArgumentInfo *kernel_argument_info() const
+ {
+ return &_kernel_arg_info;
+ }
+ /** Check if the workload argument has valid id
+ *
+ * @return true If has valid id
+ * @return false Otherwise
+ */
+ bool has_valid_id() const
+ {
+ return _tensor_info.has_valid_id();
+ }
+
+private:
+ TensorInfo _tensor_info{};
+ MemoryDescriptor _mem_desc{};
+ GpuKernelArgumentInfo _kernel_arg_info{};
+};
+
+/** Describes when a unit workload is run.
+ */
+struct UnitWorkloadStage
+{
+ enum class Stage
+ {
+ Prepare, /**< Only run once at the beginning. */
+ Run, /**< Run every time after the first time. */
+ };
+ Stage stage{ Stage::Run };
+};
+
+inline bool operator==(const UnitWorkloadStage &stage0, const UnitWorkloadStage &stage1)
+{
+ return stage0.stage == stage1.stage;
+}
+
+/** The atomic unit in a Gpu workload. It contains exactly one kernel to run.
+ */
+class GpuUnitWorkload
+{
+public:
+ /** Default constructor */
+ GpuUnitWorkload() = default;
+ /** Constructor
+ *
+ * @param[in] id Id that uniquely identifies this unit workload in a workload
+ * @param[in] kernel_code @ref GpuKernelSourceCode contained within
+ * @param[in] stage Stage of the unit workload
+ */
+ GpuUnitWorkload(UnitWorkloadId id, const GpuKernelSourceCode &kernel_code, const UnitWorkloadStage &stage)
+ : _id{ id }, _kernel_code{ kernel_code }, _stage{ stage }
+ {
+ }
+ /** Get the id of the unit workload */
+ UnitWorkloadId id() const
+ {
+ return _id;
+ }
+ /** Get reference to the underlying @ref GpuKernelSourceCode */
+ const GpuKernelSourceCode &code() const
+ {
+ return _kernel_code;
+ }
+ /** Get the stage of the unit workload */
+ UnitWorkloadStage stage() const
+ {
+ return _stage;
+ }
+
+private:
+ UnitWorkloadId _id{};
+ GpuKernelSourceCode _kernel_code{};
+ UnitWorkloadStage _stage{};
+};
+
+/** Hold the generated kernel source code and other information required to compile and run the workload.
+ */
+class GpuWorkloadSourceCode
+{
+public:
+ /** Default constructor */
+ GpuWorkloadSourceCode() = default;
+ /** Add a unit workload to the workload code
+ *
+ * @param[in] kernel_code @ref GpuKernelSourceCode to be contained within the unit workload
+ * @param[in] stage Stage of the unit workload
+ * @param[in] mem_map @ref MemoryDescriptor map for all tensors within the unit workload
+ *
+ * @return UnitWorkloadId Allocated unit workload id
+ */
+ UnitWorkloadId add_unit_workload(const GpuKernelSourceCode &kernel_code, const UnitWorkloadStage &stage, const MemoryDescriptorMap &mem_map)
+ {
+ // Use the size of the kernel codes as Id
+ const auto uwk_id = static_cast<UnitWorkloadId>(_unit_workloads.size());
+ const auto unit_work = GpuUnitWorkload(uwk_id, kernel_code, stage);
+ _unit_workloads.push_back(unit_work);
+ // Assemble kernel argument with memory descriptor to form workload argument
+ for(const auto &id_arg : kernel_code.arguments())
+ {
+ const auto arg_id = id_arg.first;
+ const auto arg = id_arg.second;
+ _workload_arguments[arg_id] = GpuWorkloadArgument{ *arg.tensor_info(), mem_map.at(arg_id), *arg.kernel_argument_info() };
+ if(_tensor_uwork_map.find(arg_id) == _tensor_uwork_map.end())
+ {
+ _tensor_uwork_map[arg_id] = std::set<UnitWorkloadId>();
+ }
+ _tensor_uwork_map[arg_id].insert(uwk_id);
+ }
+ return uwk_id;
+ }
+ /** Get a unit workload from its id */
+ const GpuUnitWorkload &query_unit_workload(UnitWorkloadId id) const
+ {
+ ARM_COMPUTE_ERROR_ON(id < 0);
+ return _unit_workloads.at(id);
+ }
+ /** Get all unit workloads sorted in topological order */
+ std::vector<UnitWorkloadId> unit_workloads() const
+ {
+ std::vector<UnitWorkloadId> ids{};
+
+ for(const auto &uwk : _unit_workloads)
+ {
+ ids.push_back(uwk.id());
+ }
+ return ids;
+ }
+ /** Get a @ref GpuWorkloadArgument from its associated tensor id */
+ const GpuWorkloadArgument *query_tensor(ITensorInfo::Id t_id) const
+ {
+ return &_workload_arguments.at(t_id);
+ }
+ /** Get all tensors in the entire workload */
+ std::vector<ITensorInfo::Id> tensors() const
+ {
+ std::vector<ITensorInfo::Id> ids{};
+ for(const auto &id_tensor : _workload_arguments)
+ {
+ ids.push_back(id_tensor.first);
+ }
+ return ids;
+ }
+ /** Get all unit workloads connected to the tensor with @p t_id */
+ std::vector<UnitWorkloadId> get_unit_workloads_from_tensor(ITensorInfo::Id t_id) const
+ {
+ const auto unit_work_set = _tensor_uwork_map.at(t_id);
+ return std::vector<UnitWorkloadId>(unit_work_set.begin(), unit_work_set.end());
+ }
+
+private:
+ std::vector<GpuUnitWorkload> _unit_workloads{};
+ std::map<ITensorInfo::Id, GpuWorkloadArgument> _workload_arguments{};
+ std::map<ITensorInfo::Id, std::set<UnitWorkloadId>> _tensor_uwork_map{};
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE */
diff --git a/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h b/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h
new file mode 100644
index 0000000000..ae67790b4b
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_IGPUKERNELWRITER
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_IGPUKERNELWRITER
+
+#include "arm_compute/core/CL/CLCompileContext.h"
+#include "arm_compute/core/Window.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+
+#include <map>
+#include <string>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** An interface that can write a gpu kernel
+ */
+class IGpuKernelWriter
+{
+public:
+ /** Destructor */
+ virtual ~IGpuKernelWriter()
+ {
+ }
+ /** Generate kernel name */
+ virtual std::string get_name() = 0;
+ /** Generate kernel code */
+ virtual std::string get_code() = 0;
+ /** Generate build options */
+ virtual CLBuildOptions get_build_options() = 0;
+ /** Generate config id string of the entire kernel. This is used for tuning */
+ virtual std::string get_config_id() = 0;
+ /** Generate execution window */
+ virtual Window get_window() const = 0;
+ /** Get the kernel argument lists of the kernel*/
+ virtual std::map<ITensorInfo::Id, GpuKernelArgument> get_tensors() = 0;
+};
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_IGPUKERNELWRITER */
diff --git a/src/dynamic_fusion/sketch/gpu/components/GpuKernelComponentFactory.h b/src/dynamic_fusion/sketch/gpu/components/GpuKernelComponentFactory.h
new file mode 100644
index 0000000000..f7f0029618
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/components/GpuKernelComponentFactory.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_GPUKERNELCOMPONENTFACTORY
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_GPUKERNELCOMPONENTFACTORY
+
+#include "Types.h"
+#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
+#include <memory>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Factory class that creates new instances of @ref IGpuKernelComponent by assigning new component ids
+ */
+class GpuKernelComponentFactory
+{
+public:
+ /** Create a new kernel component
+ *
+ * @tparam T Any polymorphic type descending from @ref IGpuKernelComponent
+ * @tparam Args Argument types to construct the kernel component
+ *
+ * @param[in] args Arguments to construct the kernel component
+ *
+ * @return std::unique_ptr<IGpuKernelComponent>
+ */
+ template <typename T, typename... Args>
+ std::unique_ptr<IGpuKernelComponent> create(Args &&... args)
+ {
+ return std::make_unique<T>(_count++, std::forward<Args>(args)...);
+ }
+
+private:
+ ComponentId _count{ 0 };
+};
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_GPUKERNELCOMPONENTFACTORY */
diff --git a/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h b/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h
new file mode 100644
index 0000000000..8bb19155a2
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_IGPUKERNELCOMPONENT
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_IGPUKERNELCOMPONENT
+
+#include "Types.h"
+
+#include "src/dynamic_fusion/sketch/ArgumentPack.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Properties common to all kernel component types */
+class KernelProperties
+{
+public:
+ KernelProperties &stage(const UnitWorkloadStage &stage)
+ {
+ _stage = stage;
+ return *this;
+ }
+ UnitWorkloadStage stage() const
+ {
+ return _stage;
+ }
+
+private:
+ UnitWorkloadStage _stage{};
+};
+
+inline bool operator==(const KernelProperties &config0, const KernelProperties &config1)
+{
+ return config0.stage() == config1.stage();
+}
+
+/** Forward declaration */
+class IGpuTemplateComponentWriter;
+
+/** An abstract interface of a component. It enables manipulation by the component graph for purposes like fusion
+ */
+class IGpuKernelComponent
+{
+public:
+ using Properties = KernelProperties;
+
+public:
+ /** Constructor
+ *
+ * @param[in] id Component id
+ * @param[in] properties Kernel component properties
+ * @param[in] tensors Tensor arguments to the components
+ */
+ IGpuKernelComponent(
+ ComponentId id,
+ const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors)
+ : _id{ id },
+ _properties{ properties },
+ _tensors{ tensors }
+ {
+ }
+ /** Destructor */
+ virtual ~IGpuKernelComponent()
+ {
+ }
+ /** Get component id */
+ ComponentId id() const
+ {
+ return _id;
+ }
+ /** Get tensor arguments */
+ ArgumentPack<ITensorInfo> tensors() const
+ {
+ return _tensors;
+ }
+ /** Get properties */
+ Properties properties() const
+ {
+ return _properties;
+ }
+ /** Get template writer for the component */
+ virtual const IGpuTemplateComponentWriter *template_writer() const = 0;
+ /** Get component type */
+ virtual GpuComponentType type() const = 0;
+
+private:
+ ComponentId _id{ -1 };
+ Properties _properties{};
+ ArgumentPack<ITensorInfo> _tensors{};
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_IGPUKERNELCOMPONENT */
diff --git a/src/dynamic_fusion/sketch/gpu/components/Types.h b/src/dynamic_fusion/sketch/gpu/components/Types.h
new file mode 100644
index 0000000000..54b3a69057
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/components/Types.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_TYPES
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_TYPES
+
+#include <cstdint>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Uniquely identifies a kernel component within a workload
+ */
+using ComponentId = int32_t;
+
+/** Component type in the context of fusion
+ * Its main purpose is to inform the optimizer how to perform fusion.
+ */
+enum class GpuComponentType
+{
+ Complex,
+ Simple,
+ Unfusable,
+ Output
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_TYPES */
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp
new file mode 100644
index 0000000000..e94cfd1581
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "ClComponentDirectConv2d.h"
+
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/dynamic_fusion/sketch/OperatorAttributes.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+using Settings = ClComponentDirectConv2dSettings;
+
+Settings &Settings::export_to_cl_image(bool cl_image)
+{
+ _export_to_cl_image = cl_image;
+ return *this;
+}
+
+bool Settings::export_to_cl_image() const
+{
+ return _export_to_cl_image;
+}
+
+Settings &Settings::fast_relaxed_math(bool fast_relaxed_math)
+{
+ _fast_relaxed_math = fast_relaxed_math;
+ return *this;
+}
+
+bool Settings::fast_relaxed_math() const
+{
+ return _fast_relaxed_math;
+}
+
+Status ClComponentDirectConv2d::validate(
+ const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes,
+ const Settings &settings)
+{
+ ARM_COMPUTE_UNUSED(properties, settings);
+ const auto src = tensors.get_const_tensor(TensorType::ACL_SRC_0);
+ const auto wei = tensors.get_const_tensor(TensorType::ACL_SRC_1);
+ const auto bia = tensors.get_const_tensor(TensorType::ACL_SRC_2);
+ const auto dst = tensors.get_const_tensor(TensorType::ACL_DST_0);
+
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, wei, dst);
+
+ // 1. Check validity
+ // Matching data type
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, wei);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
+ if(bia != nullptr)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, bia);
+ }
+
+ // Matching data layout
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, wei);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, dst);
+ if(bia != nullptr)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, bia);
+ }
+
+ // All tensor infos are initialized
+ ARM_COMPUTE_RETURN_ERROR_ON(src->tensor_shape().total_size() == 0);
+ ARM_COMPUTE_RETURN_ERROR_ON(wei->tensor_shape().total_size() == 0);
+ ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0);
+ if(bia != nullptr)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON(bia->tensor_shape().total_size() == 0);
+ }
+ // Device requirements are met
+ ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src);
+ // wei shape is correct
+ const DataLayout data_layout = src->data_layout();
+ const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(wei->dimension(channel_idx) != src->dimension(channel_idx), "Weights feature map dimension should match the respective src's one");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(wei->num_dimensions() > 4, "Weights can be at most 4 dimensional");
+
+ // dst shape is correct
+ PadStrideInfo legacy_pad_stride(attributes.stride().x(), attributes.stride().y(), attributes.pad().left, attributes.pad().right, attributes.pad().top,
+ attributes.pad().bottom, DimensionRoundingType{});
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(dst->tensor_shape(),
+ misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, legacy_pad_stride));
+
+ // bia shape is correct
+ if(bia != nullptr)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(bia->dimension(0) != wei->dimension(3),
+ "Biases size and number of dst feature maps should match");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(bia->num_dimensions() > 1,
+ "Biases should be one dimensional");
+ }
+
+ // 2. Check support level
+ // Data type
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32);
+ // Data layout
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(src, DataLayout::NHWC);
+
+ return Status{};
+}
+
+ClComponentDirectConv2d::ClComponentDirectConv2d(
+ ComponentId id,
+ const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes,
+ const Settings &settings)
+ : IGpuKernelComponent{ id, properties, tensors },
+ _component_writer{ std::make_unique<ClTemplateDirectConv2d>(id, tensors, attributes, settings) }
+{
+}
+ClComponentDirectConv2d::~ClComponentDirectConv2d()
+{
+}
+const IGpuTemplateComponentWriter *ClComponentDirectConv2d::template_writer() const
+{
+ return _component_writer.get();
+}
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h
new file mode 100644
index 0000000000..fec22b84a5
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTDIRECTCONV2D
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTDIRECTCONV2D
+
+#include "arm_compute/core/Error.h"
+#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
+#include <memory>
+
+namespace arm_compute
+{
+/** Forward declaration */
+class ITensorInfo;
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Forward declaration */
+template <typename T>
+class ArgumentPack;
+class Conv2dAttributes;
+
+/** Component specific settings
+ */
+class ClComponentDirectConv2dSettings
+{
+public:
+ /** Set export_to_cl_image flag */
+ ClComponentDirectConv2dSettings &export_to_cl_image(bool cl_image);
+ /** Get export_to_cl_image flag */
+ bool export_to_cl_image() const;
+
+ /** Set fast_relaxed_math flag */
+ ClComponentDirectConv2dSettings &fast_relaxed_math(bool fast_relaxed_math);
+ /** Get fast_relaxed_math flag */
+ bool fast_relaxed_math() const;
+
+private:
+ bool _export_to_cl_image{ false };
+ bool _fast_relaxed_math{ true };
+};
+
+/** Forward declaration */
+class ClTemplateDirectConv2d;
+
+class ClComponentDirectConv2d final : public IGpuKernelComponent
+{
+public:
+ /** Attributes are a set of backend-agnostic parameters that define what a component does */
+ using Attributes = Conv2dAttributes;
+ /** Settings are a set of backend-specific parameters that influence the implementation of a component */
+ using Settings = ClComponentDirectConv2dSettings;
+
+public:
+ /** Validate the component
+ *
+ * @param[in] properties Component properties
+ * @param[in,out] tensors Tensor arguments to the component
+ * @param[in] attributes Component attributes
+ * @param[in] settings Component settings
+ *
+ * @return Status Validation results
+ *
+ * Tensor argument names:
+ * - ACL_SRC_0: Input
+ * - ACL_SRC_1: Weight
+ * - ACL_SRC_2: Bias (Optional)
+ * - ACL_DST_0: Output
+ *
+ * Tensor argument constness:
+ * - ACL_SRC_0: Const
+ * - ACL_SRC_1: Const
+ * - ACL_SRC_2: Const
+ * - ACL_DST_0: Const
+ *
+ * Valid data layouts:
+ * - NHWC
+ *
+ * Valid data type configurations:
+ * |ACL_SRC_0 |ACL_SRC_1 |ACL_SRC_2 |ACL_DST_0 |
+ * |:--------------|:--------------|:--------------|:--------------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ */
+ static Status validate(
+ const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes,
+ const Settings &settings);
+
+ /** Constructor
+ *
+ * Similar to @ref ClComponentDirectConv2d::validate()
+ */
+ ClComponentDirectConv2d(
+ ComponentId id,
+ const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes,
+ const Settings &settings);
+
+ /** Destructor */
+ ~ClComponentDirectConv2d() override;
+ /** Prevent instances of this class from being copy constructed */
+ ClComponentDirectConv2d(const ClComponentDirectConv2d &component) = delete;
+ /** Prevent instances of this class from being copied */
+ ClComponentDirectConv2d &operator=(const ClComponentDirectConv2d &component) = delete;
+ /** Allow instances of this class to be move constructed */
+ ClComponentDirectConv2d(ClComponentDirectConv2d &&component) = default;
+ /** Allow instances of this class to be moved */
+ ClComponentDirectConv2d &operator=(ClComponentDirectConv2d &&component) = default;
+ /** Get template writer for the component */
+ const IGpuTemplateComponentWriter *template_writer() const override;
+ /** Get component type */
+ GpuComponentType type() const override
+ {
+ return GpuComponentType::Complex;
+ }
+
+private:
+ std::unique_ptr<ClTemplateDirectConv2d> _component_writer;
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTDIRECTCONV2D */
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp
new file mode 100644
index 0000000000..f49f397ec1
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "ClComponentStore.h"
+
+#include "src/dynamic_fusion/sketch/ArgumentPack.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+Status ClComponentStore::validate(
+ const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors)
+{
+ ARM_COMPUTE_UNUSED(properties, tensors);
+ return Status{};
+}
+ClComponentStore::ClComponentStore(ComponentId id, const Properties &properties, const ArgumentPack<ITensorInfo> &tensors)
+ : IGpuKernelComponent{ id, properties, tensors }, _component_writer{ std::make_unique<ClTemplateStore>(id, tensors) }
+{
+}
+ClComponentStore::~ClComponentStore()
+{
+}
+const IGpuTemplateComponentWriter *ClComponentStore::template_writer() const
+{
+ return _component_writer.get();
+}
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h
new file mode 100644
index 0000000000..bf8c9f031e
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTSTORE
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTSTORE
+
+#include "arm_compute/core/Error.h"
+#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h"
+#include <memory>
+
+namespace arm_compute
+{
+/** Forward declaration */
+class ITensorInfo;
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Forward declaration */
+template <typename T>
+class ArgumentPack;
+
+class ClComponentStore final : public IGpuKernelComponent
+{
+public:
+ /** Validate the component
+ *
+ * @param[in] properties Component properties
+ * @param[in] tensors Tensor arguments to the components
+ *
+ * @return Status Validation results
+ *
+ * Tensor argument names:
+ * - ACL_SRC_0: Input
+ * - ACL_DST_0: Output
+ *
+ * Tensor argument constness:
+ * - ACL_SRC_0: Const
+ * - ACL_DST_0: Const
+ *
+ * Valid data layouts:
+ * - NHWC
+ *
+ * Valid data type configurations:
+ * |ACL_SRC_0 |ACL_DST_0 |
+ * |:--------------|:--------------|
+ * |All |All |
+ */
+ static Status validate(
+ const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors);
+ /** Constructor
+ *
+ * Similar to @ref ClComponentStore::validate()
+ */
+ ClComponentStore(ComponentId id, const Properties &properties, const ArgumentPack<ITensorInfo> &tensors);
+ /** Destructor */
+ ~ClComponentStore() override;
+ /** Prevent instances of this class from being copy constructed */
+ ClComponentStore(const ClComponentStore &component) = delete;
+ /** Prevent instances of this class from being copied */
+ ClComponentStore &operator=(const ClComponentStore &component) = delete;
+ /** Allow instances of this class to be move constructed */
+ ClComponentStore(ClComponentStore &&component) = default;
+ /** Allow instances of this class to be moved */
+ ClComponentStore &operator=(ClComponentStore &&component) = default;
+ /** Get template writer for the component */
+ const IGpuTemplateComponentWriter *template_writer() const override;
+ /** Get component type */
+ GpuComponentType type() const override
+ {
+ return GpuComponentType::Output;
+ }
+
+private:
+ std::unique_ptr<ClTemplateStore> _component_writer;
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTSTORE */
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp
new file mode 100644
index 0000000000..98c1cc3939
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp
@@ -0,0 +1,255 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuConv2d.h"
+
+#include "arm_compute/core/CL/CLCompileContext.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/experimental/Types.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/dynamic_fusion/sketch/ArgumentPack.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h"
+#include "src/gpu/cl/kernels/gemm/ClGemmHelpers.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+namespace
+{
+bool export_to_cl_image_support(const ITensorInfo *tensor, GPUTarget gpu_target, const cl::Device &device, DataLayout data_layout)
+{
+ if(tensor->tensor_shape()[0] % 4 || (data_layout != DataLayout::NHWC))
+ {
+ return false;
+ }
+
+ // If not floating point
+ if(!is_data_type_float(tensor->data_type()))
+ {
+ return false;
+ }
+
+ if(gpu_target == GPUTarget::G71 || get_arch_from_target(gpu_target) == GPUTarget::MIDGARD)
+ {
+ return false;
+ }
+
+ // Check if the cl_khr_image2d_from_buffer extension is supported on the target platform
+ if(!image2d_from_buffer_supported(device))
+ {
+ return false;
+ }
+
+ // Check cl image pitch alignment
+ if(get_cl_image_pitch_alignment(device) == 0)
+ {
+ return false;
+ }
+
+ const size_t image_w = tensor->tensor_shape()[0] / 4;
+ const size_t image_h = tensor->tensor_shape()[1] * tensor->tensor_shape()[2] * tensor->tensor_shape()[3];
+ const size_t max_image_w = device.getInfo<CL_DEVICE_IMAGE2D_MAX_WIDTH>();
+ const size_t max_image_h = device.getInfo<CL_DEVICE_IMAGE2D_MAX_HEIGHT>();
+
+ if(image_w > max_image_w || image_h > max_image_h)
+ {
+ return false;
+ }
+
+ return true;
+}
+
+GpuOperatorType operator_type = GpuOperatorType::Complex;
+} // namespace
+
+Status GpuConv2d::validate_op(const GpuWorkloadSketch &sketch,
+ const ITensorInfo *src,
+ const ITensorInfo *wei,
+ const ITensorInfo *bia,
+ const ITensorInfo *dst,
+ const Conv2dAttributes &attributes)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, wei, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON(
+ !src->has_valid_id() || !wei->has_valid_id() || !dst->has_valid_id());
+ if(bia != nullptr)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON(!bia->has_valid_id());
+ }
+
+ // Perform fusion test
+ // Pack tensor infos
+ ArgumentPack<ITensorInfo> tensors;
+ tensors.add_const_tensor(ACL_SRC_0, src);
+ tensors.add_const_tensor(ACL_SRC_1, wei);
+ tensors.add_const_tensor(ACL_SRC_2, bia);
+ tensors.add_const_tensor(ACL_DST_0, dst);
+ const auto op = sketch.implementation().operator_group().new_operator(operator_type, tensors);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(!sketch.implementation().operator_group().try_add_operator(op),
+ "Operator fusion test failed. This operator cannot be fused into the workload");
+
+ // Auto initialize dst tensor info
+ TensorInfo dst_info_to_validate = *dst;
+ const auto data_layout = src->data_layout();
+
+ {
+ auto shape = misc::shape_calculator::compute_deep_convolution_shape(src->tensor_shape(), data_layout, wei->tensor_shape(),
+ PadStrideInfo(attributes.stride().x(), attributes.stride().y(), attributes.pad().left,
+ attributes.pad().right,
+ attributes.pad().top, attributes.pad().bottom, DimensionRoundingType::FLOOR)); // use the default DimensionRoundingType
+
+ auto_init_if_empty(dst_info_to_validate, src->clone()->set_tensor_shape(shape));
+ }
+
+ // Check support level
+ // Data type
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32);
+ // Data layout
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(src, DataLayout::NHWC);
+
+ const auto sketch_ctx = sketch.implementation().context();
+
+ const auto gpu_target = sketch_ctx->gpu_target();
+
+ if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
+ {
+ const auto cl_compile_ctx = sketch_ctx->cl_compile_context();
+ ARM_COMPUTE_RETURN_ERROR_ON(cl_compile_ctx == nullptr);
+ // Validate Direct Conv2d Component
+ {
+ const auto properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+ auto settings = ClComponentDirectConv2d::Settings();
+
+ settings.export_to_cl_image(
+ export_to_cl_image_support(src, gpu_target, cl_compile_ctx->get_device(), data_layout));
+
+ settings.fast_relaxed_math(
+ (gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST)
+ && (dst_info_to_validate.data_type() == DataType::F32 || dst_info_to_validate.data_type() == DataType::F16));
+
+ ArgumentPack<ITensorInfo> arguments;
+ arguments.add_const_tensor(ACL_SRC_0, src);
+ arguments.add_const_tensor(ACL_SRC_1, wei);
+ arguments.add_const_tensor(ACL_SRC_2, bia);
+ arguments.add_const_tensor(ACL_DST_0, &dst_info_to_validate);
+ ARM_COMPUTE_RETURN_ON_ERROR(ClComponentDirectConv2d::validate(properties, arguments, attributes, settings));
+ }
+ }
+ else
+ {
+ ARM_COMPUTE_RETURN_ERROR_MSG("Unimplemented Gpu language");
+ }
+ return Status{};
+}
+
+void GpuConv2d::create_op(GpuWorkloadSketch &sketch,
+ ITensorInfo *src,
+ ITensorInfo *wei,
+ ITensorInfo *bia,
+ ITensorInfo *dst,
+ const Conv2dAttributes &attributes)
+{
+ // Assert validation
+ ARM_COMPUTE_ERROR_THROW_ON(GpuConv2d::validate_op(sketch, src, wei, bia, dst, attributes));
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, wei, dst);
+ const auto data_layout = src->data_layout();
+
+ // Auto initialize dst tensor
+ {
+ auto shape = misc::shape_calculator::compute_deep_convolution_shape(src->tensor_shape(), data_layout, wei->tensor_shape(),
+ PadStrideInfo(attributes.stride().x(), attributes.stride().y(), attributes.pad().left,
+ attributes.pad().right,
+ attributes.pad().top, attributes.pad().bottom, DimensionRoundingType::FLOOR)); // use the default DimensionRoundingType
+
+ auto_init_if_empty(*dst, src->clone()->set_tensor_shape(shape));
+ }
+
+ // Translate into components and add to component graph
+ auto &comp_graph = sketch.implementation().component_graph();
+
+ const auto sketch_ctx = sketch.implementation().context();
+
+ const auto gpu_target = sketch_ctx->gpu_target();
+
+ if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
+ {
+ const auto cl_compile_ctx = sketch_ctx->cl_compile_context();
+ ARM_COMPUTE_ERROR_ON(cl_compile_ctx == nullptr);
+
+ // Add Direct Conv2d Component
+ {
+ auto properties = IGpuKernelComponent::Properties();
+ properties.stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+
+ auto settings = ClComponentDirectConv2d::Settings();
+
+ settings.export_to_cl_image(
+ export_to_cl_image_support(src, gpu_target, cl_compile_ctx->get_device(), data_layout));
+
+ settings.fast_relaxed_math(
+ (gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST)
+ && (dst->data_type() == DataType::F32 || dst->data_type() == DataType::F16));
+
+ if(settings.export_to_cl_image())
+ {
+ arm_compute::opencl::kernels::gemm::update_padding_for_cl_image(wei);
+ }
+
+ ArgumentPack<ITensorInfo> arguments;
+ arguments.add_const_tensor(ACL_SRC_0, src);
+ arguments.add_const_tensor(ACL_SRC_1, wei);
+ arguments.add_const_tensor(ACL_SRC_2, bia);
+ arguments.add_const_tensor(ACL_DST_0, dst);
+ comp_graph.add_new_component<ClComponentDirectConv2d>(properties, arguments, attributes, settings);
+ }
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Unimplemented Gpu language");
+ }
+
+ // Set up fusion test by adding to the Operator Group
+ // Note this has to be performed after all the components have been successfully added to the component graph
+
+ // Pack tensor infos
+ ArgumentPack<ITensorInfo> tensors;
+ tensors.add_const_tensor(ACL_SRC_0, src);
+ tensors.add_tensor(ACL_SRC_1, wei);
+ tensors.add_const_tensor(ACL_SRC_2, bia);
+ tensors.add_tensor(ACL_DST_0, dst);
+
+ const auto op = sketch.implementation().operator_group().new_operator(operator_type, tensors);
+ sketch.implementation().operator_group().add_operator(op);
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp
new file mode 100644
index 0000000000..13c0b141a5
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "GpuKernelVariableTable.h"
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/ITensorInfo.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+void GpuKernelVariableTable::declare_variable(const ITensorInfo *tensor, GpuKernelArgumentInfo argument_info, bool is_interm, const std::string &alias)
+{
+ ARM_COMPUTE_ERROR_ON_MSG(!tensor->has_valid_id(), "Tensor info with valid id expected");
+ // Do not re-declare if the variable associated with the tensor has already been declared
+ if(get_variable(tensor).has_valid_id())
+ {
+ ARM_COMPUTE_ERROR_ON(!(get_variable(tensor).kernel_argument_info == argument_info));
+ return;
+ }
+ // Declare variable associated with the tensor
+ std::stringstream ss;
+ ss << alias << "_t" << tensor->id();
+ const auto uniq_name = ss.str();
+ TensorVariable var{ tensor->id(), uniq_name, argument_info };
+
+ if(is_interm)
+ {
+ _interm_var = var;
+ _interm_tensors.insert(tensor->id());
+ }
+ else
+ {
+ _vars.emplace(tensor->id(), var);
+ }
+}
+
+GpuKernelVariableTable::TensorVariable GpuKernelVariableTable::get_variable(const ITensorInfo *tensor) const
+{
+ const TensorVariable empty_var{};
+ if(_vars.find(tensor->id()) != _vars.end())
+ {
+ return _vars.at(tensor->id());
+ }
+ if(_interm_tensors.find(tensor->id()) != _interm_tensors.end())
+ {
+ return _interm_var;
+ }
+ return empty_var;
+}
+
+GpuKernelVariableTable::VariableList GpuKernelVariableTable::get_variable_list(const std::vector<const ITensorInfo *> &tensors) const
+{
+ VariableList vars{};
+ for(const auto &tensor : tensors)
+ {
+ if(!tensor->has_valid_id())
+ {
+ continue;
+ }
+ vars.push_back(get_variable(tensor));
+ }
+ return vars;
+}
+
+TagVal::TagVal(const GpuKernelVariableTable::TensorVariable &var)
+ : value{ var.uniq_name }
+{
+}
+
+TagVal::TagVal(const std::string &val)
+ : value{ val }
+{
+}
+
+TagVal::TagVal(const char *val)
+ : value{ std::string(val) }
+{
+}
+
+TagVal::TagVal(const DataType &data_type)
+ : value{ get_cl_type_from_data_type(data_type) }
+{
+}
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h b/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h
new file mode 100644
index 0000000000..4eee3963c2
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_GPUKERNELVARIABLETABLE
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_GPUKERNELVARIABLETABLE
+
+#include "arm_compute/core/ITensorInfo.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "support/Requires.h"
+#include "support/StringSupport.h"
+
+#include <set>
+#include <string>
+#include <type_traits>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** A table of all the variables used in the kernel
+ * Since fusion is restricted to a linear sequence of components in a kernel, only a single "intermediate variable" (the accumulator) is allowed.
+ * Each kernel has exactly one variable table
+ */
+class GpuKernelVariableTable
+{
+public:
+ /** A tensor variable whose main purposes are:
+ * - Hold the newly assigned @ref GpuKernelArgumentInfo for the associated tensor info
+ * - Hold the generated variable name for the associated tensor info
+ */
+ struct TensorVariable
+ {
+ public:
+ TensorVariable() = default;
+ TensorVariable(const TensorVariable &) = default;
+ TensorVariable &operator=(const TensorVariable &) = default;
+ ITensorInfo::Id id{ ITensorInfo::invalid_tensor_id };
+ std::string uniq_name{ "empty" }; // Unique name, also the final variable name used in the built code
+ GpuKernelArgumentInfo kernel_argument_info{};
+ bool has_valid_id() const
+ {
+ return id != ITensorInfo::invalid_tensor_id;
+ }
+ };
+ using VariableList = std::vector<TensorVariable>;
+
+public:
+ /** Declare a @ref TensorVariable for a corresponding tensor info.
+ *
+ * @note: Later re-declaration of the intermediate variable will overwrite the previous association to the @ref ITensorInfo
+ * Therefore, the order of declaration is important. It's assumed that the components declaring the variable is already in correct order
+ *
+ * @param[in] tensor Tensor info with which the new variable is associated
+ * @param[in] argument_info Kernel argument information
+ * @param[in] is_interm If the new variable is an intermediate variable
+ * @param[in] alias Alias for the variable. Will be used as part of the variable name
+ */
+ void declare_variable(const ITensorInfo *tensor, GpuKernelArgumentInfo argument_info, bool is_interm = false, const std::string &alias = "unnamed");
+ /** Get the @ref TensorVariable associated with @p tensor
+ *
+ * @param[in] tensor Tensor info to be queried
+ *
+ * @return TensorVariable
+ */
+ TensorVariable get_variable(const ITensorInfo *tensor) const;
+ /** Get the @ref TensorVariable list associated with @p tensors
+ * @note Empty tensors are skipped
+ *
+ * @param[in] tensors List of tensor infos to be queried
+ *
+ * @return VariableList
+ */
+ VariableList get_variable_list(const std::vector<const ITensorInfo *> &tensors) const;
+
+private:
+ std::map<ITensorInfo::Id, TensorVariable> _vars{}; /**< Non-intermediate (function parameter) variables*/
+ TensorVariable _interm_var{}; /**< Intermediate variable */
+ std::set<ITensorInfo::Id> _interm_tensors{}; /**< Tensors associated with the single intermediate variable */
+};
+
+/** A tag value will substitute a tag in a string template during its instantiation */
+struct TagVal
+{
+ /** Default constructor */
+ TagVal() = default;
+ /** Construct a @ref TagVal from a @ref GpuKernelVariableTable::TensorVariable */
+ TagVal(const GpuKernelVariableTable::TensorVariable &var);
+ /** Construct a @ref TagVal from an integral type */
+ template <typename T, ARM_COMPUTE_REQUIRES_TA(std::is_integral<T>::value)>
+ TagVal(T val)
+ : value{ support::cpp11::to_string(val) }
+ {
+ }
+ /** Construct a @ref TagVal from a string */
+ TagVal(const std::string &val);
+ /** Construct a @ref TagVal from a c-style string */
+ TagVal(const char *val);
+ /** Construct a @ref TagVal from a @ref DataType */
+ TagVal(const DataType &data_type);
+ /** Get the value of the TagVal as a converted string */
+ std::string value{};
+};
+
+/** A tag used in a string template is a placeholder string to be substituted by real values during template instantiation */
+using Tag = std::string;
+
+/** Tag lookup table. It is used to instantiate a string template */
+using TagLUT = std::unordered_map<Tag, TagVal>;
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_GPUKERNELVARIABLETABLE */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h b/src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h
new file mode 100644
index 0000000000..c85ddf5a2c
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_IGPUTEMPLATECOMPONENTWRITER
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_IGPUTEMPLATECOMPONENTWRITER
+
+#include "arm_compute/core/CL/CLCompileContext.h"
+#include "arm_compute/core/ITensorInfo.h"
+#include "arm_compute/core/Window.h"
+#include "src/dynamic_fusion/sketch/ArgumentPack.h"
+#include "src/dynamic_fusion/sketch/gpu/components/Types.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Forward declaration */
+class GpuKernelComponentGroup;
+class GpuKernelVariableTable;
+
+/** An interface used by @ref ClTemplateWriter to write source code for a kernel component
+ */
+class IGpuTemplateComponentWriter
+{
+public:
+ using ComponentGroup = GpuKernelComponentGroup;
+
+public:
+ /** Constructor
+ *
+ * @param[in] id Component id
+ * @param[in] tensors Tensor arguments to the components
+ */
+ IGpuTemplateComponentWriter(ComponentId id, const ArgumentPack<ITensorInfo> tensors)
+ : _id{ id }, _tensors{ tensors }
+ {
+ }
+ /** Destructor */
+ virtual ~IGpuTemplateComponentWriter()
+ {
+ }
+ /** Generate kernel component name */
+ virtual std::string get_name() const = 0;
+ /** Generate kernel component code template
+ *
+ * @param[in] comp_group Component group of which the component is a part of
+ *
+ * @return std::string Component code
+ */
+ virtual std::string get_component_code(const ComponentGroup &comp_group) const = 0;
+ /** Declare all variables used by the component in the @p vtable
+ *
+ * @param[out] vtable Variable table
+ * @param[in] comp_group Component group of which the component is a part of
+ */
+ virtual void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const = 0;
+ /** Generate the tag look-up table used to instantiate the component code.
+ *
+ * @param[in] vtable Variable table
+ * @param[in] comp_group Component group of which the component is a part of
+ *
+ * @return TagLUT Tag lookup table
+ */
+ virtual TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const = 0;
+ /** Generate additional macros used in the component */
+ virtual std::string get_additional_macros() const
+ {
+ return "";
+ }
+ /** Generate the build options used in the component
+ *
+ * @param[in] comp_group Component group of which the component is a part of
+ *
+ * @return CLBuildOptions Build options
+ */
+ virtual CLBuildOptions get_build_options(const ComponentGroup &comp_group) const
+ {
+ ARM_COMPUTE_UNUSED(comp_group);
+ return CLBuildOptions{};
+ }
+ /** Generate the component config id string used for tuning */
+ virtual std::string get_config_id() const
+ {
+ return "";
+ }
+ /** Generate the header list used in the component */
+ virtual std::set<std::string> get_headers_list() const
+ {
+ return std::set<std::string> {};
+ }
+ /** Generate the execution window for the component */
+ virtual Window get_window() const
+ {
+ return Window{};
+ }
+ /** Get tensor arguments */
+ ArgumentPack<ITensorInfo> tensors() const
+ {
+ return _tensors;
+ }
+ /** Get component id */
+ ComponentId id() const
+ {
+ return _id;
+ }
+
+private:
+ ComponentId _id{ -1 };
+ ArgumentPack<ITensorInfo> _tensors{};
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_IGPUTEMPLATECOMPONENTWRITER */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp
new file mode 100644
index 0000000000..870de64eb8
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp
@@ -0,0 +1,400 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "ClTemplateDirectConv2d.h"
+
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h"
+
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/WindowHelpers.h"
+
+#include "support/StringSupport.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+ClTemplateDirectConv2d::ClTemplateDirectConv2d(ComponentId id,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes,
+ const Settings &settings)
+ : IGpuTemplateComponentWriter{ id, tensors },
+ _src{},
+ _weight{},
+ _bias{},
+ _dst{},
+ _attributes{ attributes },
+ _settings{ settings }
+{
+ _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
+ _weight = this->tensors().get_const_tensor(TensorType::ACL_SRC_1);
+ if(this->tensors().get_const_tensor(TensorType::ACL_SRC_2))
+ {
+ _bias = this->tensors().get_const_tensor(TensorType::ACL_SRC_2);
+ }
+ _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _weight, _dst);
+}
+
+std::string ClTemplateDirectConv2d::get_name() const
+{
+ return "direct_conv2d";
+}
+
+std::string ClTemplateDirectConv2d::get_component_code(const ComponentGroup &comp_group) const
+{
+ ARM_COMPUTE_UNUSED(comp_group);
+
+ const auto channel_idx = get_data_layout_dimension_index(_src->data_layout(), DataLayoutDimension::CHANNEL);
+ const auto k0 = adjust_vec_size(is_data_type_quantized(_src->data_type()) ? 16u : 8u, _src->dimension(channel_idx));
+ const bool leftover_loop = (_src->dimension(channel_idx) % k0) != 0;
+
+ std::string code = R"_(
+//------------------ START KERNEL {{meta_kernel_id}} ---------------------
+// IN_0(src) {{src}}
+// IN_1(wei) {{weight}}
+)_";
+ if(_bias && _bias->has_valid_id())
+ {
+ code += R"_(
+// IN_1(bia) {{bias}}
+)_";
+ }
+ code += R"_(
+// OUT(dst, accum) {{dst}}
+
+// Initialize the accumulators
+TILE({{ACC_DATA_TYPE}}, M0, N0, {{dst}});
+{
+ // All the tensor dimensions are passed at compile time.
+ // In case of dynamic tensor support, the following dimensions should be passed as function argument.
+#define _IWEI_WIDTH {{WEI_WIDTH}}
+#define _IWEI_HEIGHT {{WEI_HEIGHT}}
+#define _ISRC_WIDTH {{src}}_w
+#define _ISRC_HEIGHT {{src}}_h
+#define _ISRC_CHANNELS {{src}}_c
+#define _IDST_WIDTH {{arg_dst}}_w
+#define _IDST_HEIGHT {{arg_dst}}_h
+#define _IDST_CHANNELS {{arg_dst}}_c
+#define _IY_MULTIPLIER (_IWEI_WIDTH * _IWEI_HEIGHT)
+
+ // .v = access the whole vector (OpenCL vector)
+ // .s[x] = access the vector element at position x (scalar access)
+ TILE(int, M0, 1, xi);
+ TILE(int, M0, 1, yi);
+
+ // Convert the linear index to coordinate
+ LOOP_UNROLLING(int, i, 0, 1, M0,
+ {
+ xi[i].v = ((g_ind_1 + i) % _IDST_WIDTH) * {{STRIDE_X}};
+ yi[i].v = ((g_ind_1 + i) / _IDST_WIDTH) * {{STRIDE_Y}};
+ xi[i].v -= {{PAD_LEFT}};
+ yi[i].v -= {{PAD_TOP}};
+ })
+
+ LOOP_UNROLLING(int, i, 0, 1, M0,
+ {
+ {{dst}}[i].v = 0;
+ })
+
+ for(int i = 0; i < (_IWEI_WIDTH * _IWEI_HEIGHT); ++i)
+ {
+ int ck = 0;
+ int xk = i % _IWEI_WIDTH;
+ int yk = i / _IWEI_WIDTH;
+
+ int k = 0;
+ for(; k <= (_ISRC_CHANNELS - K0); k += K0)
+ {
+ TILE({{SRC_DATA_TYPE}}, M0, K0, a);
+ TILE({{WEI_DATA_TYPE}}, N0, K0, b);
+
+ // Initialize tiles
+ LOOP_UNROLLING(int, i, 0, 1, M0,
+ {
+ a[i].v = {{ZERO_VALUE}};
+ })
+
+ LOOP_UNROLLING(int, i, 0, 1, N0,
+ {
+ b[i].v = {{ZERO_VALUE}};
+ })
+
+ // Load tile from the src tensor
+ T_LOAD_NHWC_INDIRECT({{SRC_DATA_TYPE}}, M0, K0, {{SRC_TENSOR_TYPE}}, {{src}}, g_ind_2, yk, xk, ck, _ISRC_WIDTH, _ISRC_HEIGHT, {{src}}_stride_y, xi, yi, a);
+
+ // Load tile from the weights tensor
+ T_LOAD({{WEI_DATA_TYPE}}, N0, K0, {{WEI_TENSOR_TYPE}}, {{weight}}, ck, g_ind_0 * _IY_MULTIPLIER + i, _IY_MULTIPLIER, {{weight}}_stride_y, b);
+
+ // Compute the matrix multiplication between two tiles
+ T_MMUL({{SRC_DATA_TYPE}}, {{WEI_DATA_TYPE}}, {{ACC_DATA_TYPE}}, M0, N0, K0, NT, T, a, b, {{dst}});
+
+ ck += K0;
+ }
+
+ // We voluntarily use SRC_CHANNELS rather than _DSRC_CHANNELS
+ // This #if directive should be removed in case of dynamic tensor support
+)_";
+
+ if(leftover_loop)
+ {
+ code += R"_(
+ // Left-over accumulations
+ for(; k < _ISRC_CHANNELS; ++k)
+ {
+ TILE({{SRC_DATA_TYPE}}, M0, 1, a);
+ TILE({{WEI_DATA_TYPE}}, N0, 1, b);
+
+ // Initialize tiles
+ LOOP_UNROLLING(int, i, 0, 1, M0,
+ {
+ a[i].v = {{ZERO_VALUE}};
+ })
+
+ LOOP_UNROLLING(int, i, 0, 1, N0,
+ {
+ b[i].v = {{ZERO_VALUE}};
+ })
+
+ // Load tile from the src tensor
+ T_LOAD_NHWC_INDIRECT({{SRC_DATA_TYPE}}, M0, 1, {{SRC_TENSOR_TYPE}}, {{src}}, g_ind_2, yk, xk, ck, _ISRC_WIDTH, _ISRC_HEIGHT, {{src}}_stride_y, xi, yi, a);
+
+ // Load tile from the weights tensor
+ // The T_LOAD for the left-over elements can only use BUFFER because we load one element per iteration
+ T_LOAD({{WEI_DATA_TYPE}}, N0, 1, BUFFER, {{weight}}, ck, g_ind_0 * _IY_MULTIPLIER + i, _IY_MULTIPLIER, {{weight}}_stride_y, b);
+
+ // Compute the matrix multiplication between two tiles
+ T_MMUL({{SRC_DATA_TYPE}}, {{WEI_DATA_TYPE}}, {{ACC_DATA_TYPE}}, M0, N0, 1, NT, T, a, b, {{dst}});
+
+ ++ck;
+ }
+ )_";
+}
+
+code += R"_(
+#undef _I_WEI_WIDTH
+#undef _I_WEI_HEIGHT
+#undef _ISRC_WIDTH
+#undef _ISRC_HEIGHT
+#undef _ISRC_CHANNELS
+#undef _IDST_WIDTH
+#undef _IDST_HEIGHT
+#undef _IDST_CHANNELS
+#undef _IY_MULTIPLIER
+
+ }
+)_";
+
+ if(_bias && _bias->has_valid_id())
+ {
+ code += R"_(
+ TILE({{BIA_DATA_TYPE}}, 1, N0, bias0);
+
+ T_LOAD({{BIA_DATA_TYPE}}, 1, N0, BUFFER, {{bias}}, g_ind_0, 0, 1, 0, bias0);
+
+ // c = c + bias[broadcasted]
+ T_ELTWISE_BROADCAST_ADD_X({{ACC_DATA_TYPE}}, M0, N0, {{dst}}, bias0, {{dst}});
+ )_";
+}
+
+code += R"_(
+}
+//------------------ END KERNEL {{meta_kernel_id}} ---------------------
+)_";
+ return code;
+}
+
+void ClTemplateDirectConv2d::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
+{
+ vtable.declare_variable(
+ _src,
+ GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ comp_group.is_intermediate_tensor(_src),
+ "src");
+
+ const GpuKernelArgumentInfo::Type weight_type = _settings.export_to_cl_image() ? GpuKernelArgumentInfo::Type::Tensor_4D_t_Image : GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer;
+ vtable.declare_variable(
+ _weight,
+ GpuKernelArgumentInfo(weight_type),
+ comp_group.is_intermediate_tensor(_weight),
+ "weight");
+
+ if(_bias && _bias->has_valid_id()) // optional bias
+ {
+ vtable.declare_variable(
+ _bias,
+ GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Vector),
+ comp_group.is_intermediate_tensor(_bias),
+ "bias");
+ }
+ vtable.declare_variable(
+ _dst,
+ GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ comp_group.is_intermediate_tensor(_dst),
+ "dst");
+}
+
+TagLUT ClTemplateDirectConv2d::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
+{
+ TagLUT lut{};
+ // Arguments and global shared variables
+ lut["src"] = vtable.get_variable(_src);
+ lut["weight"] = vtable.get_variable(_weight);
+
+ if(_bias && _bias->has_valid_id()) // optional bias
+ {
+ lut["bias"] = vtable.get_variable(_bias);
+ lut["BIA_DATA_TYPE"] = get_cl_type_from_data_type(_bias->data_type());
+ }
+ lut["dst"] = vtable.get_variable(_dst);
+
+ const auto dst_argument = vtable.get_variable(comp_group.get_dst_tensors()[0]);
+ lut["arg_dst"] = dst_argument.uniq_name;
+
+ // Local build options
+ lut["meta_kernel_id"] = id();
+ lut["ACC_DATA_TYPE"] = _src->data_type();
+ lut["SRC_DATA_TYPE"] = _src->data_type();
+ lut["WEI_DATA_TYPE"] = _weight->data_type();
+
+ lut["SRC_TENSOR_TYPE"] = "BUFFER";
+ switch(vtable.get_variable(_weight).kernel_argument_info.type)
+ {
+ case GpuKernelArgumentInfo::Type::Image_Export_To_ClImage2D:
+ case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D:
+ case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image:
+ {
+ lut["WEI_TENSOR_TYPE"] = "IMAGE";
+ break;
+ }
+ default:
+ {
+ lut["WEI_TENSOR_TYPE"] = "BUFFER";
+ break;
+ }
+ }
+ const auto width_idx = 1;
+ const auto height_idx = 2;
+ lut["WEI_WIDTH"] = _weight->dimension(width_idx);
+ lut["WEI_HEIGHT"] = _weight->dimension(height_idx);
+
+ lut["STRIDE_X"] = _attributes.stride().x();
+ lut["STRIDE_Y"] = _attributes.stride().y();
+
+ lut["PAD_LEFT"] = _attributes.pad().left;
+ lut["PAD_TOP"] = _attributes.pad().top;
+
+ lut["ZERO_VALUE"] = 0;
+
+ return lut;
+}
+
+CLBuildOptions ClTemplateDirectConv2d::get_build_options(const ComponentGroup &comp_group) const
+{
+ const unsigned int channel_idx = get_data_layout_dimension_index(_src->data_layout(), DataLayoutDimension::CHANNEL);
+ const DataType data_type = _src->data_type();
+
+ /// NOTE: For now tile sizes (n0, m0, n0) are set by the execution window. This may change in the future
+ const auto root_window = comp_group.get_root_component()->template_writer()->get_window();
+ const unsigned int n0 = root_window.x().step();
+ const unsigned int m0 = root_window.y().step();
+ const unsigned int k0 = adjust_vec_size(is_data_type_quantized(data_type) ? 16u : 8u, _src->dimension(channel_idx));
+ const unsigned int partial_store_n0 = _dst->dimension(0) % n0;
+
+ CLBuildOptions build_opts{};
+ if(_settings.fast_relaxed_math())
+ {
+ build_opts.add_option("-cl-fast-relaxed-math");
+ }
+ else
+ {
+ // -cl-fast-relaxed-math also sets -cl-finite-math-only and -cl-unsafe-math-optimizations
+ // to disable -cl-finite-math-only, we only include -cl-unsafe-math-optimizations
+ build_opts.add_option("-cl-unsafe-math-optimizations");
+ }
+ build_opts.add_option("-DIS_TILED");
+ build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
+ build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
+ build_opts.add_option("-DK0=" + support::cpp11::to_string(k0));
+ build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0));
+
+ return build_opts;
+}
+
+std::string ClTemplateDirectConv2d::get_config_id() const
+{
+ const DataType data_type = _src->data_type();
+ const DataLayout data_layout = _src->data_layout();
+
+ const unsigned int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const unsigned int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+
+ const unsigned int kernel_size = _weight->dimension(width_idx);
+
+ std::string config_id{};
+ config_id += lower_string(string_from_data_type(data_type));
+ config_id += "_";
+ config_id += support::cpp11::to_string(kernel_size);
+ config_id += "_";
+ config_id += support::cpp11::to_string(_attributes.stride().x());
+ config_id += "_";
+ config_id += support::cpp11::to_string(_attributes.stride().y());
+ config_id += "_";
+ config_id += support::cpp11::to_string(_dst->dimension(width_idx));
+ config_id += "_";
+ config_id += support::cpp11::to_string(_dst->dimension(height_idx));
+ config_id += "_";
+ config_id += lower_string(string_from_data_layout(data_layout));
+ return config_id;
+}
+
+std::set<std::string> ClTemplateDirectConv2d::get_headers_list() const
+{
+ return std::set<std::string>{ "helpers.h", "tile_helpers.h" };
+}
+
+Window ClTemplateDirectConv2d::get_window() const
+{
+ ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
+
+ const auto output_shape = _dst->tensor_shape();
+
+ const unsigned int vec_size = std::min(static_cast<unsigned int>(output_shape[0]), 4u);
+ const unsigned int num_rows = (_dst->tensor_shape()[0] > 16) ? ((_src->data_type() == DataType::F32) ? 2U : 4U) : 1U;
+
+ // Create and configure kernel window
+ Window win = calculate_max_window(output_shape, Steps(vec_size, num_rows));
+
+ const size_t dim_y_collapsed = ceil_to_multiple(output_shape[1] * output_shape[2], num_rows);
+ win.set(Window::DimY, Window::Dimension(0, dim_y_collapsed, num_rows));
+ win.set(Window::DimZ, Window::Dimension(0, output_shape.total_size_upper(3), 1));
+
+ return win;
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h
new file mode 100644
index 0000000000..48027a9b8d
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDIRECTCONV2D
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDIRECTCONV2D
+
+#include "arm_compute/core/experimental/Types.h"
+#include "arm_compute/dynamic_fusion/sketch/OperatorAttributes.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+class ClTemplateDirectConv2d final : public IGpuTemplateComponentWriter
+{
+public:
+ using Attributes = ClComponentDirectConv2d::Attributes;
+ using Settings = ClComponentDirectConv2d::Settings;
+ /** Constructor
+ *
+ * Similar to @ref ClComponentDirectConv2d::validate()
+ *
+ * @param[in] id Component id
+ * @param[in] tensors Tensor arguments to the components
+ * @param[in] attributes Component attributes
+ * @param[in] settings Component settings
+ */
+ ClTemplateDirectConv2d(ComponentId id,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes,
+ const Settings &settings);
+ /** Prevent instances of this class from being copy constructed */
+ ClTemplateDirectConv2d(const ClTemplateDirectConv2d &direct_conv2d) = delete;
+ /** Prevent instances of this class from being copied */
+ ClTemplateDirectConv2d &operator=(const ClTemplateDirectConv2d &direct_conv2d) = delete;
+ /** Allow instances of this class to be move constructed */
+ ClTemplateDirectConv2d(ClTemplateDirectConv2d &&direct_conv2d) = default;
+ /** Allow instances of this class to be moved */
+ ClTemplateDirectConv2d &operator=(ClTemplateDirectConv2d &&direct_conv2d) = default;
+ /** Generate kernel component name */
+ std::string get_name() const override;
+ /** Generate kernel component code template
+ *
+ * @param[in] comp_group Component group of which the component is a part of
+ *
+ * @return std::string Component code
+ */
+ std::string get_component_code(const ComponentGroup &comp_group) const override;
+ /** Declare all variables used by the component in the @p vtable
+ *
+ * @param[out] vtable Variable table
+ * @param[in] comp_group Component group of which the component is a part of
+ */
+ void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
+ /** Generate the tag look-up table used to instantiate the component code.
+ *
+ * @param[in] vtable Variable table
+ * @param[in] comp_group Component group of which the component is a part of
+ *
+ * @return TagLUT Tag lookup table
+ */
+ TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
+ /** Generate the build options used in the component
+ *
+ * @param[in] comp_group Component group of which the component is a part of
+ *
+ * @return CLBuildOptions Build options
+ */
+ CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override;
+ /** Generate the component config id string used for tuning */
+ std::string get_config_id() const override;
+ /** Generate the header list used in the component */
+ std::set<std::string> get_headers_list() const override;
+ /** Generate the execution window for the component */
+ Window get_window() const override;
+
+private:
+ const ITensorInfo *_src;
+ const ITensorInfo *_weight;
+ const ITensorInfo *_bias;
+ const ITensorInfo *_dst;
+ Attributes _attributes;
+ Settings _settings;
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDIRECTCONV2D */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp
new file mode 100644
index 0000000000..6c4b8f52f2
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "ClTemplateStore.h"
+
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+ClTemplateStore::ClTemplateStore(ComponentId id, const ArgumentPack<ITensorInfo> &tensors)
+ : IGpuTemplateComponentWriter{ id, tensors }, _src{}, _dst{}
+{
+ _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
+ _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
+}
+
+std::string ClTemplateStore::get_name() const
+{
+ return "store";
+}
+
+std::string ClTemplateStore::get_component_code(const ComponentGroup &comp_group) const
+{
+ ARM_COMPUTE_UNUSED(comp_group);
+ return R"_(
+//------------------ START KERNEL {{meta_kernel_id}} STORE ---------------------
+{
+// This also follows NHWC layout
+// g_ind_0 maps to global_id(0) maps to Channel
+// g_ind_1 maps to global_id(1) maps to Height and Weight (Collapsed Window)
+// g_ind_2 maps to global_id(2) maps to N / Batch
+#define _IDST_WIDTH {{dst}}_w
+#define _IDST_HEIGHT {{dst}}_h
+ TILE(uint, M0, 1, dst_indirect_y);
+
+ // Calculate the destination indirect Y
+ LOOP_UNROLLING(int, i, 0, 1, M0,
+ {
+ dst_indirect_y[i].v = (uint)min(g_ind_1 + i, (int)(_IDST_WIDTH * _IDST_HEIGHT) - 1);
+ dst_indirect_y[i].v += g_ind_2 * (int)(_IDST_WIDTH * _IDST_HEIGHT);
+ })
+
+ bool x_cond = PARTIAL_N0 != 0 && get_global_id(0) == 0;
+
+ T_STORE_INDIRECT_WIDTH_SELECT({{DST_DATA_TYPE}}, M0, N0, PARTIAL_N0, {{DST_TENSOR_TYPE}}, {{dst}}, g_ind_0, {{dst}}_stride_y, x_cond, {{src}}, dst_indirect_y);
+
+#undef _IDST_WIDTH
+#undef _IDST_HEIGHT
+ //------------------ END KERNEL {{meta_kernel_id}} STORE ---------------------
+}
+
+)_";
+}
+
+void ClTemplateStore::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
+{
+ // ARM_COMPUTE_UNUSED(comp_group)
+ vtable.declare_variable(
+ _src,
+ GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ comp_group.is_intermediate_tensor(_src),
+ "src");
+ vtable.declare_variable(
+ _dst,
+ GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ comp_group.is_intermediate_tensor(_dst),
+ "dst");
+}
+
+TagLUT ClTemplateStore::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
+{
+ TagLUT lut{};
+
+ // Arguments and global shared variables
+ lut["src"] = vtable.get_variable(_src);
+ lut["dst"] = vtable.get_variable(_dst);
+
+ // Local build options
+ lut["meta_kernel_id"] = id();
+ lut["DST_TENSOR_TYPE"] = "BUFFER";
+ const auto dst_info = comp_group.get_dst_tensors()[0];
+ lut["DST_DATA_TYPE"] = dst_info->data_type();
+
+ return lut;
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h
new file mode 100644
index 0000000000..3f97a82204
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATESTORE
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATESTORE
+
+#include "arm_compute/core/experimental/Types.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+class ClTemplateStore final : public IGpuTemplateComponentWriter
+{
+public:
+ /** Constructor
+ *
+ * @param[in] id Component id
+ * @param[in] tensors Tensor arguments to the components
+ */
+ ClTemplateStore(ComponentId id, const ArgumentPack<ITensorInfo> &tensors);
+ /** Prevent instances of this class from being copy constructed */
+ ClTemplateStore(const ClTemplateStore &store) = delete;
+ /** Prevent instances of this class from being copied */
+ ClTemplateStore &operator=(const ClTemplateStore &store) = delete;
+ /** Allow instances of this class to be move constructed */
+ ClTemplateStore(ClTemplateStore &&store) = default;
+ /** Allow instances of this class to be moved */
+ ClTemplateStore &operator=(ClTemplateStore &&store) = default;
+ /** Generate kernel component name */
+ std::string get_name() const override;
+ /** Generate kernel component code template
+ *
+ * @param[in] comp_group Component group of which the component is a part of
+ *
+ * @return std::string Component code
+ */
+ std::string get_component_code(const ComponentGroup &comp_group) const override;
+ /** Declare all variables used by the component in the @p vtable
+ *
+ * @param[out] vtable Variable table
+ * @param[in] comp_group Component group of which the component is a part of
+ */
+ void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
+ /** Generate the tag look-up table used to instantiate the component code.
+ *
+ * @param[in] vtable Variable table
+ * @param[in] comp_group Component group of which the component is a part of
+ *
+ * @return TagLUT Tag lookup table
+ */
+ TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
+
+private:
+ const ITensorInfo *_src;
+ const ITensorInfo *_dst;
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATESTORE */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp
new file mode 100644
index 0000000000..cb643a741d
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "ClTemplateWriter.h"
+
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/// @note: some tags can be unused since they could be used only for the macros, or only for the component code
+std::string ClTemplateWriter::replace_tags(const std::string &code_template, const TagLUT &tags)
+{
+ std::string replaced_code = "";
+ bool scanning_pattern = false;
+ std::string pattern_found = "";
+ for(size_t i = 0; i < code_template.size() - 1; ++i)
+ {
+ if(!scanning_pattern)
+ {
+ if(code_template[i] == '{' && code_template[i + 1] == '{')
+ {
+ i += 1;
+ scanning_pattern = true;
+ pattern_found = "";
+ }
+ else
+ {
+ replaced_code += code_template[i];
+ }
+ }
+ else
+ {
+ if(code_template[i] == '}' && code_template[i + 1] == '}')
+ {
+ i += 1;
+ scanning_pattern = false;
+ std::string err = "Pattern " + pattern_found + " not found in tags";
+ ARM_COMPUTE_ERROR_ON_MSG(tags.find(pattern_found) == tags.end(), err.c_str());
+ replaced_code += tags.find(pattern_found)->second.value;
+ }
+ else
+ {
+ pattern_found += code_template[i];
+ }
+ }
+ }
+
+ return replaced_code;
+}
+ClTemplateWriter::~ClTemplateWriter()
+{
+}
+ClTemplateWriter::ClTemplateWriter(const GpuKernelComponentGroup &components)
+ : _components{ components }
+{
+}
+std::string ClTemplateWriter::get_name()
+{
+ return write_kernel_name();
+}
+std::string ClTemplateWriter::get_code()
+{
+ return write_code();
+}
+std::string ClTemplateWriter::get_config_id()
+{
+ std::string config_id = get_name();
+ for(const auto &comp : _components)
+ {
+ config_id += "--" + comp->template_writer()->get_config_id() + "--";
+ }
+
+ return config_id;
+}
+
+CLBuildOptions ClTemplateWriter::get_build_options()
+{
+ CLBuildOptions build_opts{};
+
+ for(const auto &comp : _components)
+ {
+ build_opts.add_options(comp->template_writer()->get_build_options(_components).options());
+ }
+
+ return build_opts;
+}
+
+Window ClTemplateWriter::get_window() const
+{
+ const auto root_comp = _components.get_root_component();
+ ARM_COMPUTE_ERROR_ON_MSG(root_comp == nullptr, "No root component found");
+ return root_comp->template_writer()->get_window();
+}
+
+std::map<ITensorInfo::Id, GpuKernelArgument> ClTemplateWriter::get_tensors()
+{
+ // Assemble GpuKernelArguments
+ std::map<ITensorInfo::Id, GpuKernelArgument> tensors;
+ for(const auto t : _components.get_argument_tensors())
+ {
+ tensors.emplace(
+ t->id(),
+ GpuKernelArgument{ *t, _vtable.get_variable(t).kernel_argument_info });
+ }
+ return tensors;
+}
+
+std::string ClTemplateWriter::write_code()
+{
+ ARM_COMPUTE_ERROR_ON_MSG(_components.empty(), "No components found");
+
+ // These data structures will hold the data from all the components in the blueprint
+ std::set<std::string> headers_list{};
+ std::set<std::string> additional_macros{};
+ std::vector<std::string> component_codes{}; // vector because order matters
+
+ // Pass 1: Declare all kernel variables
+ for(auto &component : _components)
+ {
+ component->template_writer()->declare_variables(_vtable, _components);
+ }
+ // Pass 2: Generate component codes
+ for(auto &component : _components)
+ {
+ const auto component_writer = component->template_writer();
+ auto curr_headers_list = component_writer->get_headers_list();
+ auto curr_additional_macros = component_writer->get_additional_macros();
+ auto curr_component_code = component_writer->get_component_code(_components);
+ const auto var_lut = component_writer->get_tag_lut(_vtable, _components); // Ideally can be merged with get_component_code once we have finer-grained code generation technique
+ component_codes.push_back(replace_tags(curr_component_code, var_lut));
+
+ headers_list.insert(curr_headers_list.begin(), curr_headers_list.end());
+ if(!additional_macros.empty()) // Some components might not have any
+ {
+ additional_macros.insert(replace_tags(curr_additional_macros, var_lut));
+ }
+ }
+
+ // Step 3: Assemble the data gathered by traversing the graph into the string "code"
+ std::string code = "";
+
+ for(auto &header : headers_list)
+ {
+#if defined(EMBEDDED_KERNELS)
+ code += CLKernelLibrary::get().get_program(header).first;
+#else // defined(EMBEDDED_KERNELS)
+ code += "#include \"" + header + "\"\n";
+#endif // defined(EMBEDDED_KERNELS)
+ }
+
+ for(auto &macros : additional_macros)
+ {
+ code += macros;
+ }
+
+ code += write_kernel_signature(_vtable.get_variable_list(_components.get_argument_tensors()));
+
+ code += "\n{\n\n";
+
+ code += " //------------------ START KERNEL_BUILDER_COORDINATE ---------------------\n\n";
+ code += write_global_section();
+ code += " //------------------ END KERNEL_BUILDER_COORDINATE ---------------------\n";
+
+ for(const auto &component_code : component_codes)
+ {
+ code += component_code;
+ }
+
+ code += "}\n";
+
+ return code;
+}
+std::string ClTemplateWriter::write_global_section() const
+{
+ const auto dst_tensors = _components.get_dst_tensors();
+ ARM_COMPUTE_ERROR_ON_MSG(dst_tensors.size() != 1, "Only one destination tensor per kernel is allowed");
+ const auto dst_info = dst_tensors[0];
+ const auto dst_w = dst_info->dimension(0);
+ const auto tile_w = std::max(1, get_window().x().step());
+ const auto tile_h = std::max(1, get_window().y().step());
+ auto leftover_w = dst_w % tile_w;
+
+ std::string code = "";
+ code += std::string(" int g_ind_0 = GET_SPATIAL_IDX(0, ") + std::to_string(tile_w) + ", " + std::to_string(leftover_w) + ");\n";
+ code += std::string(" int g_ind_1 = GET_SPATIAL_IDX(1, ") + std::to_string(tile_h) + ", " + "0);\n";
+ code += std::string(" int g_ind_2 = GET_SPATIAL_IDX(2, 1, 0);\n\n");
+
+ code += " const bool g_cond_x = (g_ind_0 == 0);\n";
+ code += " const bool g_cond_y = (g_ind_1 == 0);\n";
+
+ return code;
+}
+std::string ClTemplateWriter::write_argument_declaration(const GpuKernelVariableTable::TensorVariable &var) const
+{
+ std::string code;
+ switch(var.kernel_argument_info.type)
+ {
+ case GpuKernelArgumentInfo::Type::Vector:
+ {
+ code += "\n VECTOR_DECLARATION(" + var.uniq_name + ")";
+ break;
+ }
+ case GpuKernelArgumentInfo::Type::Image:
+ {
+ code += "\n IMAGE_DECLARATION(" + var.uniq_name + ")";
+ break;
+ }
+ case GpuKernelArgumentInfo::Type::Image_3D:
+ {
+ code += "\n IMAGE_DECLARATION(" + var.uniq_name + "),";
+ code += "\n unsigned int " + var.uniq_name + "_stride_z";
+ break;
+ }
+ case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D:
+ {
+ code += "\n __read_only image2d_t " + var.uniq_name + "_img,";
+ code += "\n unsigned int " + var.uniq_name + "_stride_z";
+ break;
+ }
+ case GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer:
+ {
+ code += "\n TENSOR4D_T(" + var.uniq_name + ", BUFFER)";
+ break;
+ }
+ case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image:
+ {
+ code += "\n TENSOR4D_T(" + var.uniq_name + ", IMAGE)";
+ break;
+ }
+ default:
+ {
+ ARM_COMPUTE_ERROR("Unsupported declaration generation for GpuKernelArgumentInfo::Type");
+ }
+ }
+ return code;
+}
+std::string ClTemplateWriter::write_kernel_signature(const GpuKernelVariableTable::VariableList &argument_list) const
+{
+ std::string code = "\n__kernel void " + write_kernel_name() + "(";
+
+ for(int i = 0; i < static_cast<int>(argument_list.size()) - 1; ++i)
+ {
+ code += write_argument_declaration(argument_list[i]) + ",";
+ }
+ if(static_cast<int>(argument_list.size()) - 1 >= 0)
+ {
+ code += write_argument_declaration(argument_list[argument_list.size() - 1]);
+ }
+
+ code += ')';
+
+ return code;
+}
+std::string ClTemplateWriter::write_kernel_name() const
+{
+ if(_components.empty())
+ {
+ return "empty_kernel";
+ }
+ std::string name = _components.empty() ? "" : _components[0]->template_writer()->get_name();
+ for(size_t i = 1; i < _components.size(); ++i)
+ {
+ name += "___";
+ name += _components[i]->template_writer()->get_name();
+ }
+
+ return name;
+}
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h
new file mode 100644
index 0000000000..83f617b6c6
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEWRITER
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEWRITER
+
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
+
+#include <map>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Use a templated-string-based method to write kernel code
+ * It stitches the component code templates together based on the valid fusion configuration.
+ * It then instantiates the actual kernel code from the template and the generated tag lookup table.
+ */
+class ClTemplateWriter : public IGpuKernelWriter
+{
+public:
+ /** Instantiates a kernel code string from the kernel code template
+ * @note: some tags can be unused since they could be used only for the macros, or only for the component code
+ *
+ * @param[in] code_template Kernel code template
+ * @param[in] tags Tag lookup table
+ *
+ * @return std::string Instantiated kernel string
+ */
+ static std::string replace_tags(const std::string &code_template, const TagLUT &tags);
+ /** Default constructor */
+ ClTemplateWriter() = default;
+ /** Constructor
+ *
+ * @param[in] components Kernel component group from which the kernel will be generated
+ */
+ ClTemplateWriter(const GpuKernelComponentGroup &components);
+ /** Destructor */
+ ~ClTemplateWriter() override;
+ /** Generate kernel name */
+ std::string get_name() override;
+ /** Generate kernel code */
+ std::string get_code() override;
+ /** Generate build options */
+ CLBuildOptions get_build_options() override;
+ /** Generate config id string of the entire kernel. This is used for tuning */
+ std::string get_config_id() override;
+ /** Generate execution window */
+ Window get_window() const override;
+ /** Get the kernel argument lists of the kernel*/
+ std::map<ITensorInfo::Id, GpuKernelArgument> get_tensors() override;
+
+private:
+ std::string write_kernel_name() const;
+ std::string write_code();
+ std::string write_global_section() const;
+ std::string write_argument_declaration(const GpuKernelVariableTable::TensorVariable &var) const;
+ std::string write_kernel_signature(const GpuKernelVariableTable::VariableList &argument_list) const;
+
+private:
+ GpuKernelComponentGroup _components{};
+ GpuKernelVariableTable _vtable{};
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEWRITER */
diff --git a/src/dynamic_fusion/sketch/utils/DependencyGraph.h b/src/dynamic_fusion/sketch/utils/DependencyGraph.h
new file mode 100644
index 0000000000..55eb4c5c77
--- /dev/null
+++ b/src/dynamic_fusion/sketch/utils/DependencyGraph.h
@@ -0,0 +1,658 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_UTILS_DEPENDENCYGRAPH
+#define SRC_DYNAMIC_FUSION_SKETCH_UTILS_DEPENDENCYGRAPH
+
+#include "arm_compute/core/Error.h"
+#include <algorithm>
+#include <cstdint>
+#include <deque>
+#include <map>
+#include <set>
+#include <tuple>
+#include <vector>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+namespace
+{
+template <typename T>
+bool is_in(const T &v, const std::vector<T> &vec)
+{
+ return std::find(std::begin(vec), std::end(vec), v) != std::end(vec);
+}
+} // namespace
+
+/** A multi-input (tensors), multi-output (tensors) acyclic directed graph
+ * Represented as a doubly-linked adjacency list with the differentiation between source and destination
+ */
+class DependencyGraph
+{
+public:
+ using Id = int32_t;
+ using TensorId = Id;
+ using OperatorId = Id;
+ /** Adjacency list
+ *
+ */
+ using AdjList = std::map<Id, std::vector<Id>>;
+
+ /** A pack of operator including its input and output tensors, used by traversing through the graph in topological order
+ *
+ */
+ struct OpPack
+ {
+ OperatorId op{};
+ std::vector<TensorId> inputs{};
+ std::vector<TensorId> outputs{};
+ friend bool operator==(const OpPack &opp0, const OpPack &opp1)
+ {
+ return std::make_tuple(
+ opp0.op, opp0.inputs, opp0.outputs)
+ == std::make_tuple(
+ opp1.op, opp1.inputs, opp1.outputs);
+ }
+ };
+
+public:
+ DependencyGraph() = default;
+ friend std::ostream &operator<<(std::ostream &os, const DependencyGraph &);
+
+ /** Try adding an operator (without actually adding it), while keeping the graph as a "linear sequence" / list
+ * @note The list is expected to only grow from head to tail
+ *
+ * PRECONDITION: The current graph is already linear
+ *
+ * @return true If the operator can be added while keeping the graph as a linear sequence
+ * @return false Otherwise
+ */
+ bool try_add_operator_as_linear(OperatorId op, const std::vector<TensorId> &inputs, const std::vector<TensorId> &outputs) const
+ {
+ ARM_COMPUTE_UNUSED(op, outputs);
+ if(all_ops().empty())
+ {
+ return true;
+ }
+ std::vector<TensorId> common_tensors{};
+ auto existing_tensors = all_tensors();
+ std::sort(existing_tensors.begin(), existing_tensors.end()); // To use std::set_intersection, both input sets must be sorted
+ std::vector<TensorId> sorted_inputs{ inputs };
+ std::sort(sorted_inputs.begin(), sorted_inputs.end());
+ std::set_intersection(existing_tensors.begin(), existing_tensors.end(),
+ sorted_inputs.begin(), sorted_inputs.end(), std::back_inserter(common_tensors));
+ if(common_tensors.size() != 1U)
+ {
+ return false;
+ }
+ const auto linked_tensor = common_tensors[0];
+ const auto tail_ops = get_dst_ops();
+ ARM_COMPUTE_ERROR_ON(tail_ops.size() != 1U); // PRECONDITION
+ const auto tail = tail_ops[0];
+
+ if(!is_in(linked_tensor, dst_tensors(tail)))
+ {
+ return false;
+ }
+ return true;
+ }
+ /** Add an operator, while keeping the graph as a "linear sequence"
+ *
+ * PRECONDITION: The current graph is already linear
+ * INVARIANT: The list can only grow from head to tail
+ * INVARIANT: POSTCONDITION: The graph is linear
+ */
+ void add_operator_as_linear(OperatorId op, const std::vector<TensorId> &inputs, const std::vector<TensorId> &outputs)
+ {
+ ARM_COMPUTE_ERROR_ON(!try_add_operator_as_linear(op, inputs, outputs));
+ auto success = add_operator(op, inputs, outputs);
+ ARM_COMPUTE_ERROR_ON(!success);
+ }
+ /** Add a new operator
+ * Return invalid if it violates the DAG invariant
+ * Invalid operation will not change the graph
+ *
+ * @param[in] op Operator to add
+ * @param[in] inputs Input tensors to the operator
+ * @param[in] outputs Output tensors to the operator
+ */
+ bool add_operator(OperatorId op, const std::vector<TensorId> &inputs, const std::vector<TensorId> &outputs)
+ {
+ if(operator_exists(op))
+ {
+ return false;
+ }
+ _adj_src_tensors[op] = {};
+ _adj_dst_tensors[op] = {};
+ for(auto in_tensor : inputs)
+ {
+ // Linking input tensor to operator node will never create a cycle / loop because we guarantee
+ // each op is newly created, so every <input, op> pair / edge is new
+ link_input(op, in_tensor);
+ }
+ for(auto out_tensor : outputs)
+ {
+ // If there exists a back path from op's output tensor to op already, then linking the two will create a loop / cycle
+ if(path_exists_from_tensor_to_op(out_tensor, op))
+ {
+ remove_operator(op);
+ return false;
+ }
+ else
+ {
+ link_output(op, out_tensor);
+ }
+ }
+
+ return true;
+ }
+
+ /** Sort the graph in a topological order
+ *
+ * @return std::vector<OpPack>
+ */
+ std::vector<OpPack> topological_sort() const
+ {
+ // Incident degree (number of source operators to an op)
+ std::map<OperatorId, unsigned int> in_degree{};
+ std::set<OperatorId> visited_ops{};
+ std::deque<OperatorId> zero_in_degree_ops{};
+ std::vector<OpPack> sorted_op_packs{};
+ for(auto op : all_ops())
+ {
+ const auto degree = src_ops(op).size();
+ in_degree[op] = degree;
+ if(degree == 0)
+ {
+ zero_in_degree_ops.push_back(op);
+ visited_ops.insert(op);
+ }
+ }
+
+ while(!zero_in_degree_ops.empty())
+ {
+ const OperatorId op = zero_in_degree_ops.front();
+ zero_in_degree_ops.pop_front();
+ sorted_op_packs.push_back(OpPack{ op, src_tensors(op), dst_tensors(op) });
+
+ for(const auto next_op : dst_ops(op))
+ {
+ if(in_degree[next_op] > 0)
+ {
+ in_degree[next_op]--;
+ }
+ if(in_degree[next_op] == 0 && visited_ops.find(next_op) == visited_ops.end())
+ {
+ zero_in_degree_ops.push_back(next_op);
+ visited_ops.insert(op);
+ }
+ }
+ }
+
+ return sorted_op_packs;
+ }
+
+ void find_independent_paths_util(OperatorId op, std::vector<std::vector<OperatorId>> &paths, std::vector<OperatorId> cur_path,
+ const std::map<OperatorId, unsigned int> &in_degree) const
+ {
+ // We have found an unresolved dependency
+ if(in_degree.at(op) > 1)
+ {
+ paths.push_back(cur_path);
+ return;
+ }
+ const auto child_ops = dst_ops(op);
+
+ cur_path.push_back(op);
+ // Hit the leaf op
+ if(child_ops.empty())
+ {
+ paths.push_back(cur_path);
+ return;
+ }
+ for(const auto child_op : child_ops)
+ {
+ find_independent_paths_util(child_op, paths, cur_path, in_degree);
+ }
+ }
+ /** Find all independent linear paths from op, which doesn't depend on any other op
+ *
+ * @return std::vector<OpPack>
+ */
+ std::vector<std::vector<OperatorId>> find_independent_paths(OperatorId op,
+ const std::map<OperatorId, unsigned int> &in_degree) const
+ {
+ std::vector<std::vector<OperatorId>> paths;
+ std::vector<OperatorId> cur_path;
+ find_independent_paths_util(op, paths, cur_path, in_degree);
+ return paths;
+ }
+ /** Find a longest linear path from op, which doesn't depend on any other op
+ *
+ * @return std::vector<OpPack>
+ */
+ std::vector<OperatorId> find_longest_independent_path(OperatorId op,
+ const std::map<OperatorId, unsigned int> &in_degree) const
+ {
+ const auto &paths = find_independent_paths(op, in_degree);
+ ARM_COMPUTE_ERROR_ON(paths.empty());
+ size_t max_len = 0;
+ const std::vector<OperatorId> *max_path = nullptr;
+
+ for(const auto &path : paths)
+ {
+ if(path.size() >= max_len)
+ {
+ max_path = &path;
+ max_len = path.size();
+ }
+ }
+ return *max_path;
+ }
+ std::vector<OperatorId> propose_next_path(std::set<OperatorId> &candidate_ops,
+ const std::map<OperatorId, unsigned int> &in_degree) const
+ {
+ if(candidate_ops.empty())
+ {
+ return {};
+ }
+ size_t max_len = 0;
+ std::vector<OperatorId> max_path;
+ OperatorId chosen_op{};
+ for(auto op : candidate_ops)
+ {
+ const auto path = find_longest_independent_path(op, in_degree);
+ if(path.size() >= max_len)
+ {
+ chosen_op = op;
+ max_path = path;
+ max_len = path.size();
+ }
+ }
+ candidate_ops.erase(chosen_op);
+ return max_path;
+ }
+ /** Partition the graph into a list of linear sub-"graphs", while preserving the topological order, and trying to minimize
+ * the number of partitions
+ */
+ std::vector<std::vector<OpPack>> topological_partition() const
+ {
+ // Initialize zero incident degree and zero in degree ops
+ std::map<OperatorId, unsigned int> in_degree{};
+ std::set<OperatorId> candidate_ops{};
+ for(auto op : all_ops())
+ {
+ const auto degree = src_ops(op).size();
+ in_degree[op] = degree;
+ if(degree == 0)
+ {
+ candidate_ops.insert(op);
+ }
+ }
+
+ std::vector<std::vector<OpPack>> sorted_partitions{};
+ while(!candidate_ops.empty())
+ {
+ // generate_longest_path_from_zero_indegree_ops(in_degree, visited_ops, candidate_ops)
+ const auto path = propose_next_path(candidate_ops, in_degree);
+
+ // Append to sorted_partitions
+ std::vector<OpPack> path_op_pack{};
+ for(auto op : path)
+ {
+ path_op_pack.push_back(OpPack{ op, src_tensors(op), dst_tensors(op) });
+ }
+ sorted_partitions.push_back(path_op_pack);
+ // Remove whole path (Update in_degree, visited_ops, candidate_ops)
+ for(auto op : path)
+ {
+ for(const auto next_op_child : dst_ops(op))
+ {
+ if(in_degree[next_op_child] > 0)
+ {
+ in_degree[next_op_child]--;
+ }
+ if(in_degree[next_op_child] == 0 && !is_in(next_op_child, path)) // We do not want to put the proposed path back into candidates
+ {
+ candidate_ops.insert(next_op_child);
+ }
+ }
+ }
+ }
+ return sorted_partitions;
+ }
+
+ /** Strict equality comparison (all internal ids and order of insertion matter).
+ * In the future this may be replaced with a topological comparison, allowing equivalent graphs with different internal ids to be equal
+ *
+ *
+ * @param[in] g0
+ * @param[in] g1
+ * @return true If the same
+ * @return false Otherwise
+ */
+ friend bool operator==(const DependencyGraph &g0, const DependencyGraph &g1)
+ {
+ // Do not compare id allocators
+ return std::make_tuple(
+ g0._adj_src_tensors, g0._adj_dst_tensors, g0._adj_src_ops, g0._adj_dst_ops)
+ == std::make_tuple(
+ g1._adj_src_tensors, g1._adj_dst_tensors, g1._adj_src_ops, g1._adj_dst_ops);
+ }
+ std::vector<OperatorId> src_ops_from_tensor(TensorId tensor) const
+ {
+ return _adj_src_ops.at(tensor);
+ }
+ std::vector<OperatorId> dst_ops_from_tensor(TensorId tensor) const
+ {
+ return _adj_dst_ops.at(tensor);
+ }
+ /** Get all tensors
+ *
+ * @return std::vector<TensorId>
+ */
+ std::vector<TensorId> all_tensors() const
+ {
+ std::vector<TensorId> tensors{};
+ std::transform(std::begin(_adj_src_ops), std::end(_adj_src_ops), std::back_inserter(tensors), [](const auto & it)
+ {
+ return it.first;
+ });
+ return tensors;
+ }
+ /** Get source tensors of the whole graph
+ *
+ * @return std::vector<TensorId>
+ */
+ std::vector<TensorId> global_src_tensors() const
+ {
+ std::vector<TensorId> tensors;
+ for(auto tensor_src_ops : _adj_src_ops)
+ {
+ if(tensor_src_ops.second.empty())
+ {
+ tensors.push_back(tensor_src_ops.first);
+ }
+ }
+ return tensors;
+ }
+ /** Get destination tensors of the whole graph
+ *
+ * @return std::vector<TensorId>
+ */
+ std::vector<TensorId> global_dst_tensors() const
+ {
+ std::vector<TensorId> tensors;
+ for(auto tensor_dst_ops : _adj_dst_ops)
+ {
+ if(tensor_dst_ops.second.empty())
+ {
+ tensors.push_back(tensor_dst_ops.first);
+ }
+ }
+ return tensors;
+ }
+ /** Get all root ops. Root ops can also be referred to as "src ops" of the whole graph
+ *
+ * @return std::vector<OperatorId>
+ */
+ std::vector<OperatorId> get_root_ops() const
+ {
+ std::vector<OperatorId> ops{};
+ const auto op_list = all_ops();
+
+ for(auto op : op_list)
+ {
+ if(src_ops(op).empty())
+ {
+ ops.emplace_back(op);
+ }
+ }
+ return ops;
+ }
+
+private:
+ void link_input(OperatorId op, TensorId in_tensor)
+ {
+ ARM_COMPUTE_ERROR_ON(!operator_exists(op));
+ if(!tensor_exists(in_tensor))
+ {
+ insert_new_tensor(in_tensor);
+ }
+ ARM_COMPUTE_ERROR_ON(are_connected(op, in_tensor)); // Prevent repetitive linking
+ _adj_src_tensors[op].push_back(in_tensor);
+ _adj_dst_ops[in_tensor].push_back(op);
+ }
+ void link_output(OperatorId op, TensorId out_tensor)
+ {
+ ARM_COMPUTE_ERROR_ON(!operator_exists(op));
+ if(!tensor_exists(out_tensor))
+ {
+ insert_new_tensor(out_tensor);
+ }
+ ARM_COMPUTE_ERROR_ON(are_connected(op, out_tensor)); // Prevent repetitive linking
+ _adj_dst_tensors[op].push_back(out_tensor);
+ _adj_src_ops[out_tensor].push_back(op);
+ }
+
+ std::vector<OperatorId> src_ops(OperatorId op) const
+ {
+ ARM_COMPUTE_ERROR_ON(!operator_exists(op));
+ std::vector<OperatorId> ops{};
+ for(TensorId src_tensor : src_tensors(op))
+ {
+ ops.insert(ops.end(), std::begin(_adj_src_ops.at(src_tensor)), std::end(_adj_src_ops.at(src_tensor)));
+ }
+ return ops;
+ }
+ std::vector<OperatorId> dst_ops(OperatorId op) const
+ {
+ ARM_COMPUTE_ERROR_ON(!operator_exists(op));
+ std::vector<OperatorId> ops{};
+ for(TensorId dst_tensor : _adj_dst_tensors.at(op))
+ {
+ ops.insert(ops.end(), std::begin(_adj_dst_ops.at(dst_tensor)), std::end(_adj_dst_ops.at(dst_tensor)));
+ }
+ return ops;
+ }
+
+ /** Get source tensors to an operator
+ *
+ * @param[in] op
+ * @return std::vector<TensorId>
+ */
+ std::vector<TensorId> src_tensors(OperatorId op) const
+ {
+ ARM_COMPUTE_ERROR_ON(!operator_exists(op));
+ return _adj_src_tensors.at(op);
+ }
+ /** Get destination tensors to an operator
+ *
+ * @param[in] op
+ * @return std::vector<TensorId>
+ */
+ std::vector<TensorId> dst_tensors(OperatorId op) const
+ {
+ ARM_COMPUTE_ERROR_ON(!operator_exists(op));
+ return _adj_dst_tensors.at(op);
+ }
+ /** Get all operators
+ *
+ * @return std::vector<OperatorId>
+ */
+ std::vector<OperatorId> all_ops() const
+ {
+ std::vector<OperatorId> ops{};
+ std::transform(std::begin(_adj_src_tensors), std::end(_adj_src_tensors), std::back_inserter(ops), [](const auto & it)
+ {
+ return it.first;
+ });
+ return ops;
+ }
+ /** Remove an operator from graph.
+ *
+ * @param[in] op
+ */
+ void remove_operator(OperatorId op)
+ {
+ for(auto src_tensor : _adj_src_tensors.at(op))
+ {
+ auto &dst_ops = _adj_dst_ops.at(src_tensor);
+ dst_ops.erase(
+ std::remove(std::begin(dst_ops), std::end(dst_ops), op),
+ std::end(dst_ops));
+ }
+ for(auto dst_tensor : _adj_dst_tensors.at(op))
+ {
+ auto &src_ops = _adj_src_ops.at(dst_tensor);
+ src_ops.erase(
+ std::remove(std::begin(src_ops), std::end(src_ops), op),
+ std::end(src_ops));
+ }
+ // Remove any isolated tensors
+ // An isolated tensor is one where both its _adj_src_ops and _adj_dst_ops are empty
+ for(auto t : all_tensors())
+ {
+ if(_adj_src_ops.at(t).empty() && _adj_dst_ops.at(t).empty())
+ {
+ _adj_src_ops.erase(t);
+ _adj_dst_ops.erase(t);
+ }
+ }
+ _adj_src_tensors.erase(op);
+ _adj_dst_tensors.erase(op);
+ }
+ void insert_new_tensor(TensorId tensor)
+ {
+ _adj_src_ops[tensor] = {};
+ _adj_dst_ops[tensor] = {};
+ }
+ bool tensor_exists(TensorId tensor) const
+ {
+ return _adj_src_ops.find(tensor) != _adj_src_ops.end() && _adj_dst_ops.find(tensor) != _adj_dst_ops.end();
+ }
+ bool operator_exists(OperatorId op) const
+ {
+ return _adj_src_tensors.find(op) != _adj_src_tensors.end() && _adj_dst_tensors.find(op) != _adj_dst_tensors.end();
+ }
+ bool is_src_tensor_of(OperatorId op, TensorId tensor) const
+ {
+ if(!operator_exists(op) || !tensor_exists(tensor))
+ {
+ return false;
+ }
+ const auto op_inputs = src_tensors(op);
+ return std::find(op_inputs.begin(), op_inputs.end(), tensor) != op_inputs.end();
+ }
+ bool is_dst_tensor_of(OperatorId op, TensorId tensor) const
+ {
+ if(!operator_exists(op) || !tensor_exists(tensor))
+ {
+ return false;
+ }
+ const auto op_outputs = dst_tensors(op);
+ return std::find(op_outputs.begin(), op_outputs.end(), tensor) != op_outputs.end();
+ }
+ bool are_connected(OperatorId op, TensorId tensor) const
+ {
+ return is_src_tensor_of(op, tensor) || is_dst_tensor_of(op, tensor);
+ }
+ /** If op is the destination / leaf operator of the whole graph
+ *
+ * @param[in] op
+ * @return true
+ * @return false
+ */
+ bool is_dst_op(OperatorId op) const
+ {
+ return dst_ops(op).empty();
+ }
+ std::vector<OperatorId> get_dst_ops() const
+ {
+ std::vector<OperatorId> ops{};
+ const auto op_list = all_ops();
+
+ for(auto op : op_list)
+ {
+ if(is_dst_op(op))
+ {
+ ops.emplace_back(op);
+ }
+ }
+ return ops;
+ }
+ bool path_exists_from_tensor_to_op(TensorId src_tensor, OperatorId dst_op) const
+ {
+ if(!tensor_exists(src_tensor) || !operator_exists(dst_op))
+ {
+ return false;
+ }
+ for(auto child_op : dst_ops_from_tensor(src_tensor))
+ {
+ if(path_exists_from_op_to_op(child_op, dst_op))
+ {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ bool path_exists_from_op_to_op(OperatorId src_op, OperatorId dst_op) const
+ {
+ if(!operator_exists(src_op) || !operator_exists(dst_op))
+ {
+ return false;
+ }
+ if(src_op == dst_op)
+ {
+ return true;
+ }
+ if(is_in(src_op, get_dst_ops()))
+ {
+ return false;
+ }
+ for(auto child_tensor : dst_tensors(src_op))
+ {
+ if(path_exists_from_tensor_to_op(child_tensor, dst_op))
+ {
+ return true;
+ }
+ }
+ return false;
+ }
+
+private:
+ AdjList _adj_src_tensors{};
+ AdjList _adj_dst_tensors{};
+ AdjList _adj_src_ops{};
+ AdjList _adj_dst_ops{};
+};
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_UTILS_DEPENDENCYGRAPH */