aboutsummaryrefslogtreecommitdiff
path: root/src/dynamic_fusion/sketch/gpu/template_writer/cl
diff options
context:
space:
mode:
Diffstat (limited to 'src/dynamic_fusion/sketch/gpu/template_writer/cl')
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp400
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h113
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp113
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h85
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp297
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h92
6 files changed, 1100 insertions, 0 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp
new file mode 100644
index 0000000000..870de64eb8
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp
@@ -0,0 +1,400 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "ClTemplateDirectConv2d.h"
+
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h"
+
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/WindowHelpers.h"
+
+#include "support/StringSupport.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+ClTemplateDirectConv2d::ClTemplateDirectConv2d(ComponentId id,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes,
+ const Settings &settings)
+ : IGpuTemplateComponentWriter{ id, tensors },
+ _src{},
+ _weight{},
+ _bias{},
+ _dst{},
+ _attributes{ attributes },
+ _settings{ settings }
+{
+ _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
+ _weight = this->tensors().get_const_tensor(TensorType::ACL_SRC_1);
+ if(this->tensors().get_const_tensor(TensorType::ACL_SRC_2))
+ {
+ _bias = this->tensors().get_const_tensor(TensorType::ACL_SRC_2);
+ }
+ _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _weight, _dst);
+}
+
+std::string ClTemplateDirectConv2d::get_name() const
+{
+ return "direct_conv2d";
+}
+
+std::string ClTemplateDirectConv2d::get_component_code(const ComponentGroup &comp_group) const
+{
+ ARM_COMPUTE_UNUSED(comp_group);
+
+ const auto channel_idx = get_data_layout_dimension_index(_src->data_layout(), DataLayoutDimension::CHANNEL);
+ const auto k0 = adjust_vec_size(is_data_type_quantized(_src->data_type()) ? 16u : 8u, _src->dimension(channel_idx));
+ const bool leftover_loop = (_src->dimension(channel_idx) % k0) != 0;
+
+ std::string code = R"_(
+//------------------ START KERNEL {{meta_kernel_id}} ---------------------
+// IN_0(src) {{src}}
+// IN_1(wei) {{weight}}
+)_";
+ if(_bias && _bias->has_valid_id())
+ {
+ code += R"_(
+// IN_1(bia) {{bias}}
+)_";
+ }
+ code += R"_(
+// OUT(dst, accum) {{dst}}
+
+// Initialize the accumulators
+TILE({{ACC_DATA_TYPE}}, M0, N0, {{dst}});
+{
+ // All the tensor dimensions are passed at compile time.
+ // In case of dynamic tensor support, the following dimensions should be passed as function argument.
+#define _IWEI_WIDTH {{WEI_WIDTH}}
+#define _IWEI_HEIGHT {{WEI_HEIGHT}}
+#define _ISRC_WIDTH {{src}}_w
+#define _ISRC_HEIGHT {{src}}_h
+#define _ISRC_CHANNELS {{src}}_c
+#define _IDST_WIDTH {{arg_dst}}_w
+#define _IDST_HEIGHT {{arg_dst}}_h
+#define _IDST_CHANNELS {{arg_dst}}_c
+#define _IY_MULTIPLIER (_IWEI_WIDTH * _IWEI_HEIGHT)
+
+ // .v = access the whole vector (OpenCL vector)
+ // .s[x] = access the vector element at position x (scalar access)
+ TILE(int, M0, 1, xi);
+ TILE(int, M0, 1, yi);
+
+ // Convert the linear index to coordinate
+ LOOP_UNROLLING(int, i, 0, 1, M0,
+ {
+ xi[i].v = ((g_ind_1 + i) % _IDST_WIDTH) * {{STRIDE_X}};
+ yi[i].v = ((g_ind_1 + i) / _IDST_WIDTH) * {{STRIDE_Y}};
+ xi[i].v -= {{PAD_LEFT}};
+ yi[i].v -= {{PAD_TOP}};
+ })
+
+ LOOP_UNROLLING(int, i, 0, 1, M0,
+ {
+ {{dst}}[i].v = 0;
+ })
+
+ for(int i = 0; i < (_IWEI_WIDTH * _IWEI_HEIGHT); ++i)
+ {
+ int ck = 0;
+ int xk = i % _IWEI_WIDTH;
+ int yk = i / _IWEI_WIDTH;
+
+ int k = 0;
+ for(; k <= (_ISRC_CHANNELS - K0); k += K0)
+ {
+ TILE({{SRC_DATA_TYPE}}, M0, K0, a);
+ TILE({{WEI_DATA_TYPE}}, N0, K0, b);
+
+ // Initialize tiles
+ LOOP_UNROLLING(int, i, 0, 1, M0,
+ {
+ a[i].v = {{ZERO_VALUE}};
+ })
+
+ LOOP_UNROLLING(int, i, 0, 1, N0,
+ {
+ b[i].v = {{ZERO_VALUE}};
+ })
+
+ // Load tile from the src tensor
+ T_LOAD_NHWC_INDIRECT({{SRC_DATA_TYPE}}, M0, K0, {{SRC_TENSOR_TYPE}}, {{src}}, g_ind_2, yk, xk, ck, _ISRC_WIDTH, _ISRC_HEIGHT, {{src}}_stride_y, xi, yi, a);
+
+ // Load tile from the weights tensor
+ T_LOAD({{WEI_DATA_TYPE}}, N0, K0, {{WEI_TENSOR_TYPE}}, {{weight}}, ck, g_ind_0 * _IY_MULTIPLIER + i, _IY_MULTIPLIER, {{weight}}_stride_y, b);
+
+ // Compute the matrix multiplication between two tiles
+ T_MMUL({{SRC_DATA_TYPE}}, {{WEI_DATA_TYPE}}, {{ACC_DATA_TYPE}}, M0, N0, K0, NT, T, a, b, {{dst}});
+
+ ck += K0;
+ }
+
+ // We voluntarily use SRC_CHANNELS rather than _DSRC_CHANNELS
+ // This #if directive should be removed in case of dynamic tensor support
+)_";
+
+ if(leftover_loop)
+ {
+ code += R"_(
+ // Left-over accumulations
+ for(; k < _ISRC_CHANNELS; ++k)
+ {
+ TILE({{SRC_DATA_TYPE}}, M0, 1, a);
+ TILE({{WEI_DATA_TYPE}}, N0, 1, b);
+
+ // Initialize tiles
+ LOOP_UNROLLING(int, i, 0, 1, M0,
+ {
+ a[i].v = {{ZERO_VALUE}};
+ })
+
+ LOOP_UNROLLING(int, i, 0, 1, N0,
+ {
+ b[i].v = {{ZERO_VALUE}};
+ })
+
+ // Load tile from the src tensor
+ T_LOAD_NHWC_INDIRECT({{SRC_DATA_TYPE}}, M0, 1, {{SRC_TENSOR_TYPE}}, {{src}}, g_ind_2, yk, xk, ck, _ISRC_WIDTH, _ISRC_HEIGHT, {{src}}_stride_y, xi, yi, a);
+
+ // Load tile from the weights tensor
+ // The T_LOAD for the left-over elements can only use BUFFER because we load one element per iteration
+ T_LOAD({{WEI_DATA_TYPE}}, N0, 1, BUFFER, {{weight}}, ck, g_ind_0 * _IY_MULTIPLIER + i, _IY_MULTIPLIER, {{weight}}_stride_y, b);
+
+ // Compute the matrix multiplication between two tiles
+ T_MMUL({{SRC_DATA_TYPE}}, {{WEI_DATA_TYPE}}, {{ACC_DATA_TYPE}}, M0, N0, 1, NT, T, a, b, {{dst}});
+
+ ++ck;
+ }
+ )_";
+}
+
+code += R"_(
+#undef _I_WEI_WIDTH
+#undef _I_WEI_HEIGHT
+#undef _ISRC_WIDTH
+#undef _ISRC_HEIGHT
+#undef _ISRC_CHANNELS
+#undef _IDST_WIDTH
+#undef _IDST_HEIGHT
+#undef _IDST_CHANNELS
+#undef _IY_MULTIPLIER
+
+ }
+)_";
+
+ if(_bias && _bias->has_valid_id())
+ {
+ code += R"_(
+ TILE({{BIA_DATA_TYPE}}, 1, N0, bias0);
+
+ T_LOAD({{BIA_DATA_TYPE}}, 1, N0, BUFFER, {{bias}}, g_ind_0, 0, 1, 0, bias0);
+
+ // c = c + bias[broadcasted]
+ T_ELTWISE_BROADCAST_ADD_X({{ACC_DATA_TYPE}}, M0, N0, {{dst}}, bias0, {{dst}});
+ )_";
+}
+
+code += R"_(
+}
+//------------------ END KERNEL {{meta_kernel_id}} ---------------------
+)_";
+ return code;
+}
+
+void ClTemplateDirectConv2d::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
+{
+ vtable.declare_variable(
+ _src,
+ GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ comp_group.is_intermediate_tensor(_src),
+ "src");
+
+ const GpuKernelArgumentInfo::Type weight_type = _settings.export_to_cl_image() ? GpuKernelArgumentInfo::Type::Tensor_4D_t_Image : GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer;
+ vtable.declare_variable(
+ _weight,
+ GpuKernelArgumentInfo(weight_type),
+ comp_group.is_intermediate_tensor(_weight),
+ "weight");
+
+ if(_bias && _bias->has_valid_id()) // optional bias
+ {
+ vtable.declare_variable(
+ _bias,
+ GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Vector),
+ comp_group.is_intermediate_tensor(_bias),
+ "bias");
+ }
+ vtable.declare_variable(
+ _dst,
+ GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ comp_group.is_intermediate_tensor(_dst),
+ "dst");
+}
+
+TagLUT ClTemplateDirectConv2d::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
+{
+ TagLUT lut{};
+ // Arguments and global shared variables
+ lut["src"] = vtable.get_variable(_src);
+ lut["weight"] = vtable.get_variable(_weight);
+
+ if(_bias && _bias->has_valid_id()) // optional bias
+ {
+ lut["bias"] = vtable.get_variable(_bias);
+ lut["BIA_DATA_TYPE"] = get_cl_type_from_data_type(_bias->data_type());
+ }
+ lut["dst"] = vtable.get_variable(_dst);
+
+ const auto dst_argument = vtable.get_variable(comp_group.get_dst_tensors()[0]);
+ lut["arg_dst"] = dst_argument.uniq_name;
+
+ // Local build options
+ lut["meta_kernel_id"] = id();
+ lut["ACC_DATA_TYPE"] = _src->data_type();
+ lut["SRC_DATA_TYPE"] = _src->data_type();
+ lut["WEI_DATA_TYPE"] = _weight->data_type();
+
+ lut["SRC_TENSOR_TYPE"] = "BUFFER";
+ switch(vtable.get_variable(_weight).kernel_argument_info.type)
+ {
+ case GpuKernelArgumentInfo::Type::Image_Export_To_ClImage2D:
+ case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D:
+ case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image:
+ {
+ lut["WEI_TENSOR_TYPE"] = "IMAGE";
+ break;
+ }
+ default:
+ {
+ lut["WEI_TENSOR_TYPE"] = "BUFFER";
+ break;
+ }
+ }
+ const auto width_idx = 1;
+ const auto height_idx = 2;
+ lut["WEI_WIDTH"] = _weight->dimension(width_idx);
+ lut["WEI_HEIGHT"] = _weight->dimension(height_idx);
+
+ lut["STRIDE_X"] = _attributes.stride().x();
+ lut["STRIDE_Y"] = _attributes.stride().y();
+
+ lut["PAD_LEFT"] = _attributes.pad().left;
+ lut["PAD_TOP"] = _attributes.pad().top;
+
+ lut["ZERO_VALUE"] = 0;
+
+ return lut;
+}
+
+CLBuildOptions ClTemplateDirectConv2d::get_build_options(const ComponentGroup &comp_group) const
+{
+ const unsigned int channel_idx = get_data_layout_dimension_index(_src->data_layout(), DataLayoutDimension::CHANNEL);
+ const DataType data_type = _src->data_type();
+
+ /// NOTE: For now tile sizes (n0, m0, n0) are set by the execution window. This may change in the future
+ const auto root_window = comp_group.get_root_component()->template_writer()->get_window();
+ const unsigned int n0 = root_window.x().step();
+ const unsigned int m0 = root_window.y().step();
+ const unsigned int k0 = adjust_vec_size(is_data_type_quantized(data_type) ? 16u : 8u, _src->dimension(channel_idx));
+ const unsigned int partial_store_n0 = _dst->dimension(0) % n0;
+
+ CLBuildOptions build_opts{};
+ if(_settings.fast_relaxed_math())
+ {
+ build_opts.add_option("-cl-fast-relaxed-math");
+ }
+ else
+ {
+ // -cl-fast-relaxed-math also sets -cl-finite-math-only and -cl-unsafe-math-optimizations
+ // to disable -cl-finite-math-only, we only include -cl-unsafe-math-optimizations
+ build_opts.add_option("-cl-unsafe-math-optimizations");
+ }
+ build_opts.add_option("-DIS_TILED");
+ build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
+ build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
+ build_opts.add_option("-DK0=" + support::cpp11::to_string(k0));
+ build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0));
+
+ return build_opts;
+}
+
+std::string ClTemplateDirectConv2d::get_config_id() const
+{
+ const DataType data_type = _src->data_type();
+ const DataLayout data_layout = _src->data_layout();
+
+ const unsigned int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const unsigned int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+
+ const unsigned int kernel_size = _weight->dimension(width_idx);
+
+ std::string config_id{};
+ config_id += lower_string(string_from_data_type(data_type));
+ config_id += "_";
+ config_id += support::cpp11::to_string(kernel_size);
+ config_id += "_";
+ config_id += support::cpp11::to_string(_attributes.stride().x());
+ config_id += "_";
+ config_id += support::cpp11::to_string(_attributes.stride().y());
+ config_id += "_";
+ config_id += support::cpp11::to_string(_dst->dimension(width_idx));
+ config_id += "_";
+ config_id += support::cpp11::to_string(_dst->dimension(height_idx));
+ config_id += "_";
+ config_id += lower_string(string_from_data_layout(data_layout));
+ return config_id;
+}
+
+std::set<std::string> ClTemplateDirectConv2d::get_headers_list() const
+{
+ return std::set<std::string>{ "helpers.h", "tile_helpers.h" };
+}
+
+Window ClTemplateDirectConv2d::get_window() const
+{
+ ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
+
+ const auto output_shape = _dst->tensor_shape();
+
+ const unsigned int vec_size = std::min(static_cast<unsigned int>(output_shape[0]), 4u);
+ const unsigned int num_rows = (_dst->tensor_shape()[0] > 16) ? ((_src->data_type() == DataType::F32) ? 2U : 4U) : 1U;
+
+ // Create and configure kernel window
+ Window win = calculate_max_window(output_shape, Steps(vec_size, num_rows));
+
+ const size_t dim_y_collapsed = ceil_to_multiple(output_shape[1] * output_shape[2], num_rows);
+ win.set(Window::DimY, Window::Dimension(0, dim_y_collapsed, num_rows));
+ win.set(Window::DimZ, Window::Dimension(0, output_shape.total_size_upper(3), 1));
+
+ return win;
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h
new file mode 100644
index 0000000000..48027a9b8d
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDIRECTCONV2D
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDIRECTCONV2D
+
+#include "arm_compute/core/experimental/Types.h"
+#include "arm_compute/dynamic_fusion/sketch/OperatorAttributes.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+class ClTemplateDirectConv2d final : public IGpuTemplateComponentWriter
+{
+public:
+ using Attributes = ClComponentDirectConv2d::Attributes;
+ using Settings = ClComponentDirectConv2d::Settings;
+ /** Constructor
+ *
+ * Similar to @ref ClComponentDirectConv2d::validate()
+ *
+ * @param[in] id Component id
+ * @param[in] tensors Tensor arguments to the components
+ * @param[in] attributes Component attributes
+ * @param[in] settings Component settings
+ */
+ ClTemplateDirectConv2d(ComponentId id,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes,
+ const Settings &settings);
+ /** Prevent instances of this class from being copy constructed */
+ ClTemplateDirectConv2d(const ClTemplateDirectConv2d &direct_conv2d) = delete;
+ /** Prevent instances of this class from being copied */
+ ClTemplateDirectConv2d &operator=(const ClTemplateDirectConv2d &direct_conv2d) = delete;
+ /** Allow instances of this class to be move constructed */
+ ClTemplateDirectConv2d(ClTemplateDirectConv2d &&direct_conv2d) = default;
+ /** Allow instances of this class to be moved */
+ ClTemplateDirectConv2d &operator=(ClTemplateDirectConv2d &&direct_conv2d) = default;
+ /** Generate kernel component name */
+ std::string get_name() const override;
+ /** Generate kernel component code template
+ *
+ * @param[in] comp_group Component group of which the component is a part of
+ *
+ * @return std::string Component code
+ */
+ std::string get_component_code(const ComponentGroup &comp_group) const override;
+ /** Declare all variables used by the component in the @p vtable
+ *
+ * @param[out] vtable Variable table
+ * @param[in] comp_group Component group of which the component is a part of
+ */
+ void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
+ /** Generate the tag look-up table used to instantiate the component code.
+ *
+ * @param[in] vtable Variable table
+ * @param[in] comp_group Component group of which the component is a part of
+ *
+ * @return TagLUT Tag lookup table
+ */
+ TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
+ /** Generate the build options used in the component
+ *
+ * @param[in] comp_group Component group of which the component is a part of
+ *
+ * @return CLBuildOptions Build options
+ */
+ CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override;
+ /** Generate the component config id string used for tuning */
+ std::string get_config_id() const override;
+ /** Generate the header list used in the component */
+ std::set<std::string> get_headers_list() const override;
+ /** Generate the execution window for the component */
+ Window get_window() const override;
+
+private:
+ const ITensorInfo *_src;
+ const ITensorInfo *_weight;
+ const ITensorInfo *_bias;
+ const ITensorInfo *_dst;
+ Attributes _attributes;
+ Settings _settings;
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDIRECTCONV2D */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp
new file mode 100644
index 0000000000..6c4b8f52f2
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "ClTemplateStore.h"
+
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+ClTemplateStore::ClTemplateStore(ComponentId id, const ArgumentPack<ITensorInfo> &tensors)
+ : IGpuTemplateComponentWriter{ id, tensors }, _src{}, _dst{}
+{
+ _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
+ _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
+}
+
+std::string ClTemplateStore::get_name() const
+{
+ return "store";
+}
+
+std::string ClTemplateStore::get_component_code(const ComponentGroup &comp_group) const
+{
+ ARM_COMPUTE_UNUSED(comp_group);
+ return R"_(
+//------------------ START KERNEL {{meta_kernel_id}} STORE ---------------------
+{
+// This also follows NHWC layout
+// g_ind_0 maps to global_id(0) maps to Channel
+// g_ind_1 maps to global_id(1) maps to Height and Weight (Collapsed Window)
+// g_ind_2 maps to global_id(2) maps to N / Batch
+#define _IDST_WIDTH {{dst}}_w
+#define _IDST_HEIGHT {{dst}}_h
+ TILE(uint, M0, 1, dst_indirect_y);
+
+ // Calculate the destination indirect Y
+ LOOP_UNROLLING(int, i, 0, 1, M0,
+ {
+ dst_indirect_y[i].v = (uint)min(g_ind_1 + i, (int)(_IDST_WIDTH * _IDST_HEIGHT) - 1);
+ dst_indirect_y[i].v += g_ind_2 * (int)(_IDST_WIDTH * _IDST_HEIGHT);
+ })
+
+ bool x_cond = PARTIAL_N0 != 0 && get_global_id(0) == 0;
+
+ T_STORE_INDIRECT_WIDTH_SELECT({{DST_DATA_TYPE}}, M0, N0, PARTIAL_N0, {{DST_TENSOR_TYPE}}, {{dst}}, g_ind_0, {{dst}}_stride_y, x_cond, {{src}}, dst_indirect_y);
+
+#undef _IDST_WIDTH
+#undef _IDST_HEIGHT
+ //------------------ END KERNEL {{meta_kernel_id}} STORE ---------------------
+}
+
+)_";
+}
+
+void ClTemplateStore::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
+{
+ // ARM_COMPUTE_UNUSED(comp_group)
+ vtable.declare_variable(
+ _src,
+ GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ comp_group.is_intermediate_tensor(_src),
+ "src");
+ vtable.declare_variable(
+ _dst,
+ GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ comp_group.is_intermediate_tensor(_dst),
+ "dst");
+}
+
+TagLUT ClTemplateStore::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
+{
+ TagLUT lut{};
+
+ // Arguments and global shared variables
+ lut["src"] = vtable.get_variable(_src);
+ lut["dst"] = vtable.get_variable(_dst);
+
+ // Local build options
+ lut["meta_kernel_id"] = id();
+ lut["DST_TENSOR_TYPE"] = "BUFFER";
+ const auto dst_info = comp_group.get_dst_tensors()[0];
+ lut["DST_DATA_TYPE"] = dst_info->data_type();
+
+ return lut;
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h
new file mode 100644
index 0000000000..3f97a82204
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATESTORE
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATESTORE
+
+#include "arm_compute/core/experimental/Types.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+class ClTemplateStore final : public IGpuTemplateComponentWriter
+{
+public:
+ /** Constructor
+ *
+ * @param[in] id Component id
+ * @param[in] tensors Tensor arguments to the components
+ */
+ ClTemplateStore(ComponentId id, const ArgumentPack<ITensorInfo> &tensors);
+ /** Prevent instances of this class from being copy constructed */
+ ClTemplateStore(const ClTemplateStore &store) = delete;
+ /** Prevent instances of this class from being copied */
+ ClTemplateStore &operator=(const ClTemplateStore &store) = delete;
+ /** Allow instances of this class to be move constructed */
+ ClTemplateStore(ClTemplateStore &&store) = default;
+ /** Allow instances of this class to be moved */
+ ClTemplateStore &operator=(ClTemplateStore &&store) = default;
+ /** Generate kernel component name */
+ std::string get_name() const override;
+ /** Generate kernel component code template
+ *
+ * @param[in] comp_group Component group of which the component is a part of
+ *
+ * @return std::string Component code
+ */
+ std::string get_component_code(const ComponentGroup &comp_group) const override;
+ /** Declare all variables used by the component in the @p vtable
+ *
+ * @param[out] vtable Variable table
+ * @param[in] comp_group Component group of which the component is a part of
+ */
+ void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
+ /** Generate the tag look-up table used to instantiate the component code.
+ *
+ * @param[in] vtable Variable table
+ * @param[in] comp_group Component group of which the component is a part of
+ *
+ * @return TagLUT Tag lookup table
+ */
+ TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
+
+private:
+ const ITensorInfo *_src;
+ const ITensorInfo *_dst;
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATESTORE */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp
new file mode 100644
index 0000000000..cb643a741d
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "ClTemplateWriter.h"
+
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/// @note: some tags can be unused since they could be used only for the macros, or only for the component code
+std::string ClTemplateWriter::replace_tags(const std::string &code_template, const TagLUT &tags)
+{
+ std::string replaced_code = "";
+ bool scanning_pattern = false;
+ std::string pattern_found = "";
+ for(size_t i = 0; i < code_template.size() - 1; ++i)
+ {
+ if(!scanning_pattern)
+ {
+ if(code_template[i] == '{' && code_template[i + 1] == '{')
+ {
+ i += 1;
+ scanning_pattern = true;
+ pattern_found = "";
+ }
+ else
+ {
+ replaced_code += code_template[i];
+ }
+ }
+ else
+ {
+ if(code_template[i] == '}' && code_template[i + 1] == '}')
+ {
+ i += 1;
+ scanning_pattern = false;
+ std::string err = "Pattern " + pattern_found + " not found in tags";
+ ARM_COMPUTE_ERROR_ON_MSG(tags.find(pattern_found) == tags.end(), err.c_str());
+ replaced_code += tags.find(pattern_found)->second.value;
+ }
+ else
+ {
+ pattern_found += code_template[i];
+ }
+ }
+ }
+
+ return replaced_code;
+}
+ClTemplateWriter::~ClTemplateWriter()
+{
+}
+ClTemplateWriter::ClTemplateWriter(const GpuKernelComponentGroup &components)
+ : _components{ components }
+{
+}
+std::string ClTemplateWriter::get_name()
+{
+ return write_kernel_name();
+}
+std::string ClTemplateWriter::get_code()
+{
+ return write_code();
+}
+std::string ClTemplateWriter::get_config_id()
+{
+ std::string config_id = get_name();
+ for(const auto &comp : _components)
+ {
+ config_id += "--" + comp->template_writer()->get_config_id() + "--";
+ }
+
+ return config_id;
+}
+
+CLBuildOptions ClTemplateWriter::get_build_options()
+{
+ CLBuildOptions build_opts{};
+
+ for(const auto &comp : _components)
+ {
+ build_opts.add_options(comp->template_writer()->get_build_options(_components).options());
+ }
+
+ return build_opts;
+}
+
+Window ClTemplateWriter::get_window() const
+{
+ const auto root_comp = _components.get_root_component();
+ ARM_COMPUTE_ERROR_ON_MSG(root_comp == nullptr, "No root component found");
+ return root_comp->template_writer()->get_window();
+}
+
+std::map<ITensorInfo::Id, GpuKernelArgument> ClTemplateWriter::get_tensors()
+{
+ // Assemble GpuKernelArguments
+ std::map<ITensorInfo::Id, GpuKernelArgument> tensors;
+ for(const auto t : _components.get_argument_tensors())
+ {
+ tensors.emplace(
+ t->id(),
+ GpuKernelArgument{ *t, _vtable.get_variable(t).kernel_argument_info });
+ }
+ return tensors;
+}
+
+std::string ClTemplateWriter::write_code()
+{
+ ARM_COMPUTE_ERROR_ON_MSG(_components.empty(), "No components found");
+
+ // These data structures will hold the data from all the components in the blueprint
+ std::set<std::string> headers_list{};
+ std::set<std::string> additional_macros{};
+ std::vector<std::string> component_codes{}; // vector because order matters
+
+ // Pass 1: Declare all kernel variables
+ for(auto &component : _components)
+ {
+ component->template_writer()->declare_variables(_vtable, _components);
+ }
+ // Pass 2: Generate component codes
+ for(auto &component : _components)
+ {
+ const auto component_writer = component->template_writer();
+ auto curr_headers_list = component_writer->get_headers_list();
+ auto curr_additional_macros = component_writer->get_additional_macros();
+ auto curr_component_code = component_writer->get_component_code(_components);
+ const auto var_lut = component_writer->get_tag_lut(_vtable, _components); // Ideally can be merged with get_component_code once we have finer-grained code generation technique
+ component_codes.push_back(replace_tags(curr_component_code, var_lut));
+
+ headers_list.insert(curr_headers_list.begin(), curr_headers_list.end());
+ if(!additional_macros.empty()) // Some components might not have any
+ {
+ additional_macros.insert(replace_tags(curr_additional_macros, var_lut));
+ }
+ }
+
+ // Step 3: Assemble the data gathered by traversing the graph into the string "code"
+ std::string code = "";
+
+ for(auto &header : headers_list)
+ {
+#if defined(EMBEDDED_KERNELS)
+ code += CLKernelLibrary::get().get_program(header).first;
+#else // defined(EMBEDDED_KERNELS)
+ code += "#include \"" + header + "\"\n";
+#endif // defined(EMBEDDED_KERNELS)
+ }
+
+ for(auto &macros : additional_macros)
+ {
+ code += macros;
+ }
+
+ code += write_kernel_signature(_vtable.get_variable_list(_components.get_argument_tensors()));
+
+ code += "\n{\n\n";
+
+ code += " //------------------ START KERNEL_BUILDER_COORDINATE ---------------------\n\n";
+ code += write_global_section();
+ code += " //------------------ END KERNEL_BUILDER_COORDINATE ---------------------\n";
+
+ for(const auto &component_code : component_codes)
+ {
+ code += component_code;
+ }
+
+ code += "}\n";
+
+ return code;
+}
+std::string ClTemplateWriter::write_global_section() const
+{
+ const auto dst_tensors = _components.get_dst_tensors();
+ ARM_COMPUTE_ERROR_ON_MSG(dst_tensors.size() != 1, "Only one destination tensor per kernel is allowed");
+ const auto dst_info = dst_tensors[0];
+ const auto dst_w = dst_info->dimension(0);
+ const auto tile_w = std::max(1, get_window().x().step());
+ const auto tile_h = std::max(1, get_window().y().step());
+ auto leftover_w = dst_w % tile_w;
+
+ std::string code = "";
+ code += std::string(" int g_ind_0 = GET_SPATIAL_IDX(0, ") + std::to_string(tile_w) + ", " + std::to_string(leftover_w) + ");\n";
+ code += std::string(" int g_ind_1 = GET_SPATIAL_IDX(1, ") + std::to_string(tile_h) + ", " + "0);\n";
+ code += std::string(" int g_ind_2 = GET_SPATIAL_IDX(2, 1, 0);\n\n");
+
+ code += " const bool g_cond_x = (g_ind_0 == 0);\n";
+ code += " const bool g_cond_y = (g_ind_1 == 0);\n";
+
+ return code;
+}
+std::string ClTemplateWriter::write_argument_declaration(const GpuKernelVariableTable::TensorVariable &var) const
+{
+ std::string code;
+ switch(var.kernel_argument_info.type)
+ {
+ case GpuKernelArgumentInfo::Type::Vector:
+ {
+ code += "\n VECTOR_DECLARATION(" + var.uniq_name + ")";
+ break;
+ }
+ case GpuKernelArgumentInfo::Type::Image:
+ {
+ code += "\n IMAGE_DECLARATION(" + var.uniq_name + ")";
+ break;
+ }
+ case GpuKernelArgumentInfo::Type::Image_3D:
+ {
+ code += "\n IMAGE_DECLARATION(" + var.uniq_name + "),";
+ code += "\n unsigned int " + var.uniq_name + "_stride_z";
+ break;
+ }
+ case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D:
+ {
+ code += "\n __read_only image2d_t " + var.uniq_name + "_img,";
+ code += "\n unsigned int " + var.uniq_name + "_stride_z";
+ break;
+ }
+ case GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer:
+ {
+ code += "\n TENSOR4D_T(" + var.uniq_name + ", BUFFER)";
+ break;
+ }
+ case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image:
+ {
+ code += "\n TENSOR4D_T(" + var.uniq_name + ", IMAGE)";
+ break;
+ }
+ default:
+ {
+ ARM_COMPUTE_ERROR("Unsupported declaration generation for GpuKernelArgumentInfo::Type");
+ }
+ }
+ return code;
+}
+std::string ClTemplateWriter::write_kernel_signature(const GpuKernelVariableTable::VariableList &argument_list) const
+{
+ std::string code = "\n__kernel void " + write_kernel_name() + "(";
+
+ for(int i = 0; i < static_cast<int>(argument_list.size()) - 1; ++i)
+ {
+ code += write_argument_declaration(argument_list[i]) + ",";
+ }
+ if(static_cast<int>(argument_list.size()) - 1 >= 0)
+ {
+ code += write_argument_declaration(argument_list[argument_list.size() - 1]);
+ }
+
+ code += ')';
+
+ return code;
+}
+std::string ClTemplateWriter::write_kernel_name() const
+{
+ if(_components.empty())
+ {
+ return "empty_kernel";
+ }
+ std::string name = _components.empty() ? "" : _components[0]->template_writer()->get_name();
+ for(size_t i = 1; i < _components.size(); ++i)
+ {
+ name += "___";
+ name += _components[i]->template_writer()->get_name();
+ }
+
+ return name;
+}
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h
new file mode 100644
index 0000000000..83f617b6c6
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEWRITER
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEWRITER
+
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
+
+#include <map>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Use a templated-string-based method to write kernel code
+ * It stitches the component code templates together based on the valid fusion configuration.
+ * It then instantiates the actual kernel code from the template and the generated tag lookup table.
+ */
+class ClTemplateWriter : public IGpuKernelWriter
+{
+public:
+ /** Instantiates a kernel code string from the kernel code template
+ * @note: some tags can be unused since they could be used only for the macros, or only for the component code
+ *
+ * @param[in] code_template Kernel code template
+ * @param[in] tags Tag lookup table
+ *
+ * @return std::string Instantiated kernel string
+ */
+ static std::string replace_tags(const std::string &code_template, const TagLUT &tags);
+ /** Default constructor */
+ ClTemplateWriter() = default;
+ /** Constructor
+ *
+ * @param[in] components Kernel component group from which the kernel will be generated
+ */
+ ClTemplateWriter(const GpuKernelComponentGroup &components);
+ /** Destructor */
+ ~ClTemplateWriter() override;
+ /** Generate kernel name */
+ std::string get_name() override;
+ /** Generate kernel code */
+ std::string get_code() override;
+ /** Generate build options */
+ CLBuildOptions get_build_options() override;
+ /** Generate config id string of the entire kernel. This is used for tuning */
+ std::string get_config_id() override;
+ /** Generate execution window */
+ Window get_window() const override;
+ /** Get the kernel argument lists of the kernel*/
+ std::map<ITensorInfo::Id, GpuKernelArgument> get_tensors() override;
+
+private:
+ std::string write_kernel_name() const;
+ std::string write_code();
+ std::string write_global_section() const;
+ std::string write_argument_declaration(const GpuKernelVariableTable::TensorVariable &var) const;
+ std::string write_kernel_signature(const GpuKernelVariableTable::VariableList &argument_list) const;
+
+private:
+ GpuKernelComponentGroup _components{};
+ GpuKernelVariableTable _vtable{};
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEWRITER */