diff options
Diffstat (limited to 'src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp')
-rw-r--r-- | src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp | 297 |
1 files changed, 297 insertions, 0 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp new file mode 100644 index 0000000000..cb643a741d --- /dev/null +++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp @@ -0,0 +1,297 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "ClTemplateWriter.h" + +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h" +#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ +/// @note: some tags can be unused since they could be used only for the macros, or only for the component code +std::string ClTemplateWriter::replace_tags(const std::string &code_template, const TagLUT &tags) +{ + std::string replaced_code = ""; + bool scanning_pattern = false; + std::string pattern_found = ""; + for(size_t i = 0; i < code_template.size() - 1; ++i) + { + if(!scanning_pattern) + { + if(code_template[i] == '{' && code_template[i + 1] == '{') + { + i += 1; + scanning_pattern = true; + pattern_found = ""; + } + else + { + replaced_code += code_template[i]; + } + } + else + { + if(code_template[i] == '}' && code_template[i + 1] == '}') + { + i += 1; + scanning_pattern = false; + std::string err = "Pattern " + pattern_found + " not found in tags"; + ARM_COMPUTE_ERROR_ON_MSG(tags.find(pattern_found) == tags.end(), err.c_str()); + replaced_code += tags.find(pattern_found)->second.value; + } + else + { + pattern_found += code_template[i]; + } + } + } + + return replaced_code; +} +ClTemplateWriter::~ClTemplateWriter() +{ +} +ClTemplateWriter::ClTemplateWriter(const GpuKernelComponentGroup &components) + : _components{ components } +{ +} +std::string ClTemplateWriter::get_name() +{ + return write_kernel_name(); +} +std::string ClTemplateWriter::get_code() +{ + return write_code(); +} +std::string ClTemplateWriter::get_config_id() +{ + std::string config_id = get_name(); + for(const auto &comp : _components) + { + config_id += "--" + comp->template_writer()->get_config_id() + "--"; + } + + return config_id; +} + +CLBuildOptions ClTemplateWriter::get_build_options() +{ + CLBuildOptions build_opts{}; + + for(const auto &comp : _components) + { + build_opts.add_options(comp->template_writer()->get_build_options(_components).options()); + } + + return build_opts; +} + +Window ClTemplateWriter::get_window() const +{ + const auto root_comp = _components.get_root_component(); + ARM_COMPUTE_ERROR_ON_MSG(root_comp == nullptr, "No root component found"); + return root_comp->template_writer()->get_window(); +} + +std::map<ITensorInfo::Id, GpuKernelArgument> ClTemplateWriter::get_tensors() +{ + // Assemble GpuKernelArguments + std::map<ITensorInfo::Id, GpuKernelArgument> tensors; + for(const auto t : _components.get_argument_tensors()) + { + tensors.emplace( + t->id(), + GpuKernelArgument{ *t, _vtable.get_variable(t).kernel_argument_info }); + } + return tensors; +} + +std::string ClTemplateWriter::write_code() +{ + ARM_COMPUTE_ERROR_ON_MSG(_components.empty(), "No components found"); + + // These data structures will hold the data from all the components in the blueprint + std::set<std::string> headers_list{}; + std::set<std::string> additional_macros{}; + std::vector<std::string> component_codes{}; // vector because order matters + + // Pass 1: Declare all kernel variables + for(auto &component : _components) + { + component->template_writer()->declare_variables(_vtable, _components); + } + // Pass 2: Generate component codes + for(auto &component : _components) + { + const auto component_writer = component->template_writer(); + auto curr_headers_list = component_writer->get_headers_list(); + auto curr_additional_macros = component_writer->get_additional_macros(); + auto curr_component_code = component_writer->get_component_code(_components); + const auto var_lut = component_writer->get_tag_lut(_vtable, _components); // Ideally can be merged with get_component_code once we have finer-grained code generation technique + component_codes.push_back(replace_tags(curr_component_code, var_lut)); + + headers_list.insert(curr_headers_list.begin(), curr_headers_list.end()); + if(!additional_macros.empty()) // Some components might not have any + { + additional_macros.insert(replace_tags(curr_additional_macros, var_lut)); + } + } + + // Step 3: Assemble the data gathered by traversing the graph into the string "code" + std::string code = ""; + + for(auto &header : headers_list) + { +#if defined(EMBEDDED_KERNELS) + code += CLKernelLibrary::get().get_program(header).first; +#else // defined(EMBEDDED_KERNELS) + code += "#include \"" + header + "\"\n"; +#endif // defined(EMBEDDED_KERNELS) + } + + for(auto ¯os : additional_macros) + { + code += macros; + } + + code += write_kernel_signature(_vtable.get_variable_list(_components.get_argument_tensors())); + + code += "\n{\n\n"; + + code += " //------------------ START KERNEL_BUILDER_COORDINATE ---------------------\n\n"; + code += write_global_section(); + code += " //------------------ END KERNEL_BUILDER_COORDINATE ---------------------\n"; + + for(const auto &component_code : component_codes) + { + code += component_code; + } + + code += "}\n"; + + return code; +} +std::string ClTemplateWriter::write_global_section() const +{ + const auto dst_tensors = _components.get_dst_tensors(); + ARM_COMPUTE_ERROR_ON_MSG(dst_tensors.size() != 1, "Only one destination tensor per kernel is allowed"); + const auto dst_info = dst_tensors[0]; + const auto dst_w = dst_info->dimension(0); + const auto tile_w = std::max(1, get_window().x().step()); + const auto tile_h = std::max(1, get_window().y().step()); + auto leftover_w = dst_w % tile_w; + + std::string code = ""; + code += std::string(" int g_ind_0 = GET_SPATIAL_IDX(0, ") + std::to_string(tile_w) + ", " + std::to_string(leftover_w) + ");\n"; + code += std::string(" int g_ind_1 = GET_SPATIAL_IDX(1, ") + std::to_string(tile_h) + ", " + "0);\n"; + code += std::string(" int g_ind_2 = GET_SPATIAL_IDX(2, 1, 0);\n\n"); + + code += " const bool g_cond_x = (g_ind_0 == 0);\n"; + code += " const bool g_cond_y = (g_ind_1 == 0);\n"; + + return code; +} +std::string ClTemplateWriter::write_argument_declaration(const GpuKernelVariableTable::TensorVariable &var) const +{ + std::string code; + switch(var.kernel_argument_info.type) + { + case GpuKernelArgumentInfo::Type::Vector: + { + code += "\n VECTOR_DECLARATION(" + var.uniq_name + ")"; + break; + } + case GpuKernelArgumentInfo::Type::Image: + { + code += "\n IMAGE_DECLARATION(" + var.uniq_name + ")"; + break; + } + case GpuKernelArgumentInfo::Type::Image_3D: + { + code += "\n IMAGE_DECLARATION(" + var.uniq_name + "),"; + code += "\n unsigned int " + var.uniq_name + "_stride_z"; + break; + } + case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D: + { + code += "\n __read_only image2d_t " + var.uniq_name + "_img,"; + code += "\n unsigned int " + var.uniq_name + "_stride_z"; + break; + } + case GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer: + { + code += "\n TENSOR4D_T(" + var.uniq_name + ", BUFFER)"; + break; + } + case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image: + { + code += "\n TENSOR4D_T(" + var.uniq_name + ", IMAGE)"; + break; + } + default: + { + ARM_COMPUTE_ERROR("Unsupported declaration generation for GpuKernelArgumentInfo::Type"); + } + } + return code; +} +std::string ClTemplateWriter::write_kernel_signature(const GpuKernelVariableTable::VariableList &argument_list) const +{ + std::string code = "\n__kernel void " + write_kernel_name() + "("; + + for(int i = 0; i < static_cast<int>(argument_list.size()) - 1; ++i) + { + code += write_argument_declaration(argument_list[i]) + ","; + } + if(static_cast<int>(argument_list.size()) - 1 >= 0) + { + code += write_argument_declaration(argument_list[argument_list.size() - 1]); + } + + code += ')'; + + return code; +} +std::string ClTemplateWriter::write_kernel_name() const +{ + if(_components.empty()) + { + return "empty_kernel"; + } + std::string name = _components.empty() ? "" : _components[0]->template_writer()->get_name(); + for(size_t i = 1; i < _components.size(); ++i) + { + name += "___"; + name += _components[i]->template_writer()->get_name(); + } + + return name; +} +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute |