/* * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "ClTemplateWriter.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h" #include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" namespace arm_compute { namespace experimental { namespace dynamic_fusion { /// @note: some tags can be unused since they could be used only for the macros, or only for the component code std::string ClTemplateWriter::replace_tags(const std::string &code_template, const TagLUT &tags) { std::string replaced_code = ""; bool scanning_pattern = false; std::string pattern_found = ""; for (size_t i = 0; i < code_template.size() - 1; ++i) { if (!scanning_pattern) { if (code_template[i] == '{' && code_template[i + 1] == '{') { i += 1; scanning_pattern = true; pattern_found = ""; } else { replaced_code += code_template[i]; } } else { if (code_template[i] == '}' && code_template[i + 1] == '}') { i += 1; scanning_pattern = false; std::string err = "Pattern " + pattern_found + " not found in tags"; ARM_COMPUTE_ERROR_ON_MSG(tags.find(pattern_found) == tags.end(), err.c_str()); replaced_code += tags.find(pattern_found)->second.value; } else { pattern_found += code_template[i]; } } } return replaced_code; } ClTemplateWriter::~ClTemplateWriter() { } ClTemplateWriter::ClTemplateWriter(const GpuKernelComponentGroup &components) : _components{components} { } std::string ClTemplateWriter::get_name() { return write_kernel_name(); } std::string ClTemplateWriter::get_code() { return write_code(); } std::string ClTemplateWriter::get_config_id() { std::string config_id = get_name(); for (const auto &comp : _components) { config_id += "--" + comp->template_writer()->get_config_id() + "--"; } return config_id; } CLBuildOptions ClTemplateWriter::get_build_options() { CLBuildOptions build_opts{}; for (const auto &comp : _components) { build_opts.add_options(comp->template_writer()->get_build_options(_components).options()); } return build_opts; } Window ClTemplateWriter::get_window() const { const auto root_comp = _components.get_root_component(); ARM_COMPUTE_ERROR_ON_MSG(root_comp == nullptr, "No root component found"); return root_comp->template_writer()->get_window(); } std::map ClTemplateWriter::get_tensors() { // Assemble GpuKernelArguments std::map tensors; for (const auto t : _components.get_argument_tensors()) { tensors.emplace(t->id(), GpuKernelArgument{*t, _vtable.get_variable(t).kernel_argument_info}); } return tensors; } std::string ClTemplateWriter::write_code() { ARM_COMPUTE_ERROR_ON_MSG(_components.empty(), "No components found"); // These data structures will hold the data from all the components in the blueprint std::set headers_list{}; std::set additional_macros{}; std::vector component_codes{}; // vector because order matters // Pass 1: Declare all kernel variables for (auto &component : _components) { component->template_writer()->declare_variables(_vtable, _components); } // Pass 2: Generate component codes for (auto &component : _components) { const auto component_writer = component->template_writer(); auto curr_headers_list = component_writer->get_headers_list(); auto curr_additional_macros = component_writer->get_additional_macros(); auto curr_component_code = component_writer->get_component_code(_components); const auto var_lut = component_writer->get_tag_lut( _vtable, _components); // Ideally can be merged with get_component_code once we have finer-grained code generation technique component_codes.push_back(replace_tags(curr_component_code, var_lut)); headers_list.insert(curr_headers_list.begin(), curr_headers_list.end()); if (!additional_macros.empty()) // Some components might not have any { additional_macros.insert(replace_tags(curr_additional_macros, var_lut)); } } // Step 3: Assemble the data gathered by traversing the graph into the string "code" std::string code = ""; for (auto &header : headers_list) { #if defined(EMBEDDED_KERNELS) code += CLKernelLibrary::get().get_program(header).first; #else // defined(EMBEDDED_KERNELS) code += "#include \"" + header + "\"\n"; #endif // defined(EMBEDDED_KERNELS) } for (auto ¯os : additional_macros) { code += macros; } auto arguments = _components.get_argument_tensors(); std::sort(arguments.begin(), arguments.end(), [](const ITensorInfo *l, const ITensorInfo *r) { return l->id() < r->id(); }); code += write_kernel_signature(_vtable.get_variable_list(arguments)); code += "\n{\n\n"; code += " //------------------ START KERNEL_BUILDER_COORDINATE ---------------------\n\n"; code += write_global_section(); code += " //------------------ END KERNEL_BUILDER_COORDINATE ---------------------\n"; { const auto tiles = _components.get_tiles(); std::stringstream tiles_ss; tiles_ss << " //------------------ START TILE DECLARATION ---------------------\n"; for (auto tile : tiles) { const auto var = _vtable.get_variable(tile); const auto data_type = get_cl_type_from_data_type(tile->data_type()); const auto var_name = var.uniq_name; tiles_ss << " TILE(" << data_type << ", M0, N0, " << var_name << ");\n"; } tiles_ss << " //------------------ END TILE DECLARATION ---------------------\n"; code += tiles_ss.str(); } for (const auto &component_code : component_codes) { code += component_code; code += "\n"; } code += "}\n"; return code; } std::string ClTemplateWriter::write_global_section() const { const auto dst_info = _components.get_any_dst_tensor(); const auto dst_w = dst_info->dimension(0); const auto tile_w = std::max(1, get_window().x().step()); const auto tile_h = std::max(1, get_window().y().step()); auto leftover_w = dst_w % tile_w; std::string code = ""; code += std::string(" int g_ind_0 = GET_SPATIAL_IDX(0, ") + std::to_string(tile_w) + ", " + std::to_string(leftover_w) + ");\n"; code += std::string(" int g_ind_1 = GET_SPATIAL_IDX(1, ") + std::to_string(tile_h) + ", " + "0);\n"; code += std::string(" int g_ind_2 = GET_SPATIAL_IDX(2, 1, 0);\n\n"); code += " const bool g_cond_x = (g_ind_0 == 0);\n"; code += " const bool g_cond_y = (g_ind_1 == 0);\n"; return code; } std::string ClTemplateWriter::write_argument_declaration(const GpuKernelVariableTable::TensorVariable &var) const { std::string code; switch (var.kernel_argument_info.type) { case GpuKernelArgumentInfo::Type::Vector: { code += "\n VECTOR_DECLARATION(" + var.uniq_name + ")"; break; } case GpuKernelArgumentInfo::Type::Image: { code += "\n IMAGE_DECLARATION(" + var.uniq_name + ")"; break; } case GpuKernelArgumentInfo::Type::Image_3D: { code += "\n IMAGE_DECLARATION(" + var.uniq_name + "),"; code += "\n unsigned int " + var.uniq_name + "_stride_z"; break; } case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D: { code += "\n __read_only image2d_t " + var.uniq_name + "_img,"; code += "\n unsigned int " + var.uniq_name + "_stride_z"; break; } case GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer: { code += "\n TENSOR4D_T(" + var.uniq_name + ", BUFFER)"; break; } case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image: { code += "\n TENSOR4D_T(" + var.uniq_name + ", IMAGE)"; break; } case GpuKernelArgumentInfo::Type::Tensor_3D: { code += "\n TENSOR3D_DECLARATION(" + var.uniq_name + ")"; break; } default: { ARM_COMPUTE_ERROR("Unsupported declaration generation for GpuKernelArgumentInfo::Type"); } } return code; } std::string ClTemplateWriter::write_kernel_signature(const GpuKernelVariableTable::VariableList &argument_list) const { std::string code = "\n__kernel void " + write_kernel_name() + "("; for (int i = 0; i < static_cast(argument_list.size()) - 1; ++i) { code += write_argument_declaration(argument_list[i]) + ","; } if (static_cast(argument_list.size()) - 1 >= 0) { code += write_argument_declaration(argument_list[argument_list.size() - 1]); } code += ')'; return code; } std::string ClTemplateWriter::write_kernel_name() const { if (_components.empty()) { return "empty_kernel"; } std::string name = _components.empty() ? "" : _components[0]->template_writer()->get_name(); for (size_t i = 1; i < _components.size(); ++i) { name += "___"; name += _components[i]->template_writer()->get_name(); } return name; } } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute