aboutsummaryrefslogtreecommitdiff
path: root/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp')
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp297
1 files changed, 297 insertions, 0 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp
new file mode 100644
index 0000000000..cb643a741d
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "ClTemplateWriter.h"
+
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/// @note: some tags can be unused since they could be used only for the macros, or only for the component code
+std::string ClTemplateWriter::replace_tags(const std::string &code_template, const TagLUT &tags)
+{
+ std::string replaced_code = "";
+ bool scanning_pattern = false;
+ std::string pattern_found = "";
+ for(size_t i = 0; i < code_template.size() - 1; ++i)
+ {
+ if(!scanning_pattern)
+ {
+ if(code_template[i] == '{' && code_template[i + 1] == '{')
+ {
+ i += 1;
+ scanning_pattern = true;
+ pattern_found = "";
+ }
+ else
+ {
+ replaced_code += code_template[i];
+ }
+ }
+ else
+ {
+ if(code_template[i] == '}' && code_template[i + 1] == '}')
+ {
+ i += 1;
+ scanning_pattern = false;
+ std::string err = "Pattern " + pattern_found + " not found in tags";
+ ARM_COMPUTE_ERROR_ON_MSG(tags.find(pattern_found) == tags.end(), err.c_str());
+ replaced_code += tags.find(pattern_found)->second.value;
+ }
+ else
+ {
+ pattern_found += code_template[i];
+ }
+ }
+ }
+
+ return replaced_code;
+}
+ClTemplateWriter::~ClTemplateWriter()
+{
+}
+ClTemplateWriter::ClTemplateWriter(const GpuKernelComponentGroup &components)
+ : _components{ components }
+{
+}
+std::string ClTemplateWriter::get_name()
+{
+ return write_kernel_name();
+}
+std::string ClTemplateWriter::get_code()
+{
+ return write_code();
+}
+std::string ClTemplateWriter::get_config_id()
+{
+ std::string config_id = get_name();
+ for(const auto &comp : _components)
+ {
+ config_id += "--" + comp->template_writer()->get_config_id() + "--";
+ }
+
+ return config_id;
+}
+
+CLBuildOptions ClTemplateWriter::get_build_options()
+{
+ CLBuildOptions build_opts{};
+
+ for(const auto &comp : _components)
+ {
+ build_opts.add_options(comp->template_writer()->get_build_options(_components).options());
+ }
+
+ return build_opts;
+}
+
+Window ClTemplateWriter::get_window() const
+{
+ const auto root_comp = _components.get_root_component();
+ ARM_COMPUTE_ERROR_ON_MSG(root_comp == nullptr, "No root component found");
+ return root_comp->template_writer()->get_window();
+}
+
+std::map<ITensorInfo::Id, GpuKernelArgument> ClTemplateWriter::get_tensors()
+{
+ // Assemble GpuKernelArguments
+ std::map<ITensorInfo::Id, GpuKernelArgument> tensors;
+ for(const auto t : _components.get_argument_tensors())
+ {
+ tensors.emplace(
+ t->id(),
+ GpuKernelArgument{ *t, _vtable.get_variable(t).kernel_argument_info });
+ }
+ return tensors;
+}
+
+std::string ClTemplateWriter::write_code()
+{
+ ARM_COMPUTE_ERROR_ON_MSG(_components.empty(), "No components found");
+
+ // These data structures will hold the data from all the components in the blueprint
+ std::set<std::string> headers_list{};
+ std::set<std::string> additional_macros{};
+ std::vector<std::string> component_codes{}; // vector because order matters
+
+ // Pass 1: Declare all kernel variables
+ for(auto &component : _components)
+ {
+ component->template_writer()->declare_variables(_vtable, _components);
+ }
+ // Pass 2: Generate component codes
+ for(auto &component : _components)
+ {
+ const auto component_writer = component->template_writer();
+ auto curr_headers_list = component_writer->get_headers_list();
+ auto curr_additional_macros = component_writer->get_additional_macros();
+ auto curr_component_code = component_writer->get_component_code(_components);
+ const auto var_lut = component_writer->get_tag_lut(_vtable, _components); // Ideally can be merged with get_component_code once we have finer-grained code generation technique
+ component_codes.push_back(replace_tags(curr_component_code, var_lut));
+
+ headers_list.insert(curr_headers_list.begin(), curr_headers_list.end());
+ if(!additional_macros.empty()) // Some components might not have any
+ {
+ additional_macros.insert(replace_tags(curr_additional_macros, var_lut));
+ }
+ }
+
+ // Step 3: Assemble the data gathered by traversing the graph into the string "code"
+ std::string code = "";
+
+ for(auto &header : headers_list)
+ {
+#if defined(EMBEDDED_KERNELS)
+ code += CLKernelLibrary::get().get_program(header).first;
+#else // defined(EMBEDDED_KERNELS)
+ code += "#include \"" + header + "\"\n";
+#endif // defined(EMBEDDED_KERNELS)
+ }
+
+ for(auto &macros : additional_macros)
+ {
+ code += macros;
+ }
+
+ code += write_kernel_signature(_vtable.get_variable_list(_components.get_argument_tensors()));
+
+ code += "\n{\n\n";
+
+ code += " //------------------ START KERNEL_BUILDER_COORDINATE ---------------------\n\n";
+ code += write_global_section();
+ code += " //------------------ END KERNEL_BUILDER_COORDINATE ---------------------\n";
+
+ for(const auto &component_code : component_codes)
+ {
+ code += component_code;
+ }
+
+ code += "}\n";
+
+ return code;
+}
+std::string ClTemplateWriter::write_global_section() const
+{
+ const auto dst_tensors = _components.get_dst_tensors();
+ ARM_COMPUTE_ERROR_ON_MSG(dst_tensors.size() != 1, "Only one destination tensor per kernel is allowed");
+ const auto dst_info = dst_tensors[0];
+ const auto dst_w = dst_info->dimension(0);
+ const auto tile_w = std::max(1, get_window().x().step());
+ const auto tile_h = std::max(1, get_window().y().step());
+ auto leftover_w = dst_w % tile_w;
+
+ std::string code = "";
+ code += std::string(" int g_ind_0 = GET_SPATIAL_IDX(0, ") + std::to_string(tile_w) + ", " + std::to_string(leftover_w) + ");\n";
+ code += std::string(" int g_ind_1 = GET_SPATIAL_IDX(1, ") + std::to_string(tile_h) + ", " + "0);\n";
+ code += std::string(" int g_ind_2 = GET_SPATIAL_IDX(2, 1, 0);\n\n");
+
+ code += " const bool g_cond_x = (g_ind_0 == 0);\n";
+ code += " const bool g_cond_y = (g_ind_1 == 0);\n";
+
+ return code;
+}
+std::string ClTemplateWriter::write_argument_declaration(const GpuKernelVariableTable::TensorVariable &var) const
+{
+ std::string code;
+ switch(var.kernel_argument_info.type)
+ {
+ case GpuKernelArgumentInfo::Type::Vector:
+ {
+ code += "\n VECTOR_DECLARATION(" + var.uniq_name + ")";
+ break;
+ }
+ case GpuKernelArgumentInfo::Type::Image:
+ {
+ code += "\n IMAGE_DECLARATION(" + var.uniq_name + ")";
+ break;
+ }
+ case GpuKernelArgumentInfo::Type::Image_3D:
+ {
+ code += "\n IMAGE_DECLARATION(" + var.uniq_name + "),";
+ code += "\n unsigned int " + var.uniq_name + "_stride_z";
+ break;
+ }
+ case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D:
+ {
+ code += "\n __read_only image2d_t " + var.uniq_name + "_img,";
+ code += "\n unsigned int " + var.uniq_name + "_stride_z";
+ break;
+ }
+ case GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer:
+ {
+ code += "\n TENSOR4D_T(" + var.uniq_name + ", BUFFER)";
+ break;
+ }
+ case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image:
+ {
+ code += "\n TENSOR4D_T(" + var.uniq_name + ", IMAGE)";
+ break;
+ }
+ default:
+ {
+ ARM_COMPUTE_ERROR("Unsupported declaration generation for GpuKernelArgumentInfo::Type");
+ }
+ }
+ return code;
+}
+std::string ClTemplateWriter::write_kernel_signature(const GpuKernelVariableTable::VariableList &argument_list) const
+{
+ std::string code = "\n__kernel void " + write_kernel_name() + "(";
+
+ for(int i = 0; i < static_cast<int>(argument_list.size()) - 1; ++i)
+ {
+ code += write_argument_declaration(argument_list[i]) + ",";
+ }
+ if(static_cast<int>(argument_list.size()) - 1 >= 0)
+ {
+ code += write_argument_declaration(argument_list[argument_list.size() - 1]);
+ }
+
+ code += ')';
+
+ return code;
+}
+std::string ClTemplateWriter::write_kernel_name() const
+{
+ if(_components.empty())
+ {
+ return "empty_kernel";
+ }
+ std::string name = _components.empty() ? "" : _components[0]->template_writer()->get_name();
+ for(size_t i = 1; i < _components.size(); ++i)
+ {
+ name += "___";
+ name += _components[i]->template_writer()->get_name();
+ }
+
+ return name;
+}
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute