aboutsummaryrefslogtreecommitdiff
path: root/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h')
-rw-r--r--src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h930
1 files changed, 0 insertions, 930 deletions
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h
deleted file mode 100644
index 04919acb83..0000000000
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h
+++ /dev/null
@@ -1,930 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-
-#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMMON_H
-#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMMON_H
-
-#include "arm_compute/core/CL/CLCompileContext.h"
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GPUTarget.h"
-#include "src/core/common/Macros.h"
-#include "support/Requires.h"
-#include "support/StringSupport.h"
-
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h"
-
-#include <iostream>
-#include <queue>
-#include <stack>
-#include <string>
-#include <unordered_set>
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-/** We introduce the concept of *Shared Variables* in the context of kernel building.
- * They are variables that can be accessed / shared among all the kernel components within a single kernel.
- * For now we consider 2 groups of shared variables:
- * Argument: The argument variables (parameters) of a kernel
- * Automatic: The automatic variables declared inside a kernel
- * All Shared Variables have the same kernel scope, and are thus visible to all kernel components
-*/
-
-enum class SharedVarIO
-{
- Input,
- Output
-};
-
-enum class SharedVarGroup
-{
- Argument, // Parameters to a kernel function == dst or src tensors of the whole blueprint graph
- Automatic // Automatic variables declared within the kernel body == intermediate tensors of the whole blueprint graph
-};
-
-/** Specifies a shared variable link for a component.
- * It describes all the information that's available when a component is constructed / added:
- * e.g. its linkage (via ArgumentID and io) and its group
- * This is not shared variable on its own, but is used for instantiating a SharedVar when building the code
- */
-struct SharedVarLink
-{
- ArgumentID arg_id{ g_arg_placeholder };
- SharedVarIO io{ SharedVarIO::Input };
- bool is_empty() const
- {
- return arg_id == g_arg_placeholder;
- }
-};
-
-/** A table of all the variables used in the kernel / blueprint
- * Because we limit the DependencyGraph in the blueprint to a Linear Sequence for now, we only allow ** a single global variable (the accumulator) **
- *
- * NOTE: the order they appear in the table is the order of their "declaration" in the component code, and is also their ID
- * NOTE: the variables all have the scope of the full kernel function
- */
-class SharedVarTable
-{
-public:
- /** A fully realized SharedVarLink
- */
- struct SharedVar
- {
- ArgumentID arg_id{ g_arg_placeholder };
- SharedVarIO io{ SharedVarIO::Input };
- SharedVarGroup group{ SharedVarGroup::Argument };
- std::string uniq_name{}; // Unique name, also the final variable name used in the built code
- ClKernelArgDescriptor desc{}; // Automatic variables can and should still be described using this struct
- bool is_empty() const
- {
- return arg_id == g_arg_placeholder;
- }
- };
-
- class Arguments
- {
- public:
- Arguments() = default;
- void add_var(const SharedVar &var)
- {
- ARM_COMPUTE_ERROR_ON(var.group != SharedVarGroup::Argument);
- _vars.push_back(var);
- }
- std::vector<SharedVar> get_all_vars() const
- {
- return _vars;
- }
- std::vector<SharedVar> get_src_vars() const
- {
- std::vector<SharedVar> src_vars;
- std::copy_if(_vars.begin(), _vars.end(), std::back_inserter(src_vars), [](const SharedVar & var)
- {
- return var.io == SharedVarIO::Input;
- });
- return src_vars;
- }
- SharedVar get_dst_var() const
- {
- std::vector<SharedVar> dst_vars;
- std::copy_if(_vars.begin(), _vars.end(), std::back_inserter(dst_vars), [](const SharedVar & var)
- {
- return var.io == SharedVarIO::Output;
- });
- ARM_COMPUTE_ERROR_ON(dst_vars.size() != 1);
- return dst_vars.at(0);
- }
-
- private:
- std::vector<SharedVar> _vars{};
- };
-
- /** Create a SharedVar for a corresponding SharedVarLink (contains ArgumentID). If one has already been created for the SharedVarLink, simply return it instead of creating a new one
- *
- * @note: The order of insertion is important. There is one precondition:
- * PRECOND: The components have been sorted topologically / is being traversed in topological order
- * This ensures that all the consumer var links (Output, Automatic Links) can consume (return) the producer var links when they're referred
- */
- void add(SharedVarLink var_link, SharedVarGroup group, ClKernelArgDescriptor runtime_desc, const std::string &name = "unnamed")
- {
- ARM_COMPUTE_ERROR_ON_MSG(var_link.is_empty(), "Non-empty SharedVarLink expected");
- if(!get(var_link).is_empty())
- {
- return;
- }
-
- auto var_id = _num_var;
- std::stringstream ss;
- ss << name << "_" << var_id;
- const auto uniq_name = ss.str();
- SharedVar var{ var_link.arg_id, var_link.io, group, uniq_name, runtime_desc };
-
- if(group == SharedVarGroup::Argument)
- {
- _arguments.emplace(var_id, var);
- _arg_id_map.emplace(var_link.arg_id, var_id);
- _num_var++;
- }
- else if(group == SharedVarGroup::Automatic)
- {
- if(_global_vars.empty())
- {
- if(var_link.io == SharedVarIO::Output)
- {
- _global_vars.emplace(var_id, var);
- _arg_id_map.emplace(var_link.arg_id, var_id);
- _num_var++;
- }
- else
- {
- ARM_COMPUTE_ERROR("Component likely not traversed in topological order");
- }
- }
- else
- {
- // Associate additional SharedVarLinks with the single global shared variable
- const auto global_var_id = _global_vars.begin()->first;
- _arg_id_map[var_link.arg_id] = global_var_id;
- }
- }
- else
- {
- ARM_COMPUTE_ERROR("Unrecognised SharedVarGroup");
- }
- }
-
- /** Get the SharedVar associated with @p var_link
- *
- * @param var_link
- * @return SharedVar
- */
- SharedVar get(const SharedVarLink &var_link) const
- {
- const SharedVar empty_var{};
- if(_arg_id_map.find(var_link.arg_id) != _arg_id_map.end())
- {
- const auto var_id = _arg_id_map.at(var_link.arg_id);
- const auto arg_var = _arguments.find(var_id);
- if(arg_var != _arguments.end())
- {
- return arg_var->second;
- }
- else
- {
- return _global_vars.at(var_id);
- }
- }
- return empty_var;
- }
-
- /** @note The arguments are returned in the order they are added
- */
- Arguments get_kernel_arguments() const
- {
- Arguments args{};
- for(const auto &a : _arguments)
- {
- args.add_var(a.second);
- }
- return args;
- }
-
-private:
- using VarID = int32_t;
-
-private:
- std::map<VarID, SharedVar> _global_vars{}; // Shared, global variable
- std::map<VarID, SharedVar> _arguments{};
- std::map<ArgumentID, VarID> _arg_id_map{}; // Track ArgumentIDs that have already been added
- VarID _num_var{ 0 };
-};
-
-enum class ComponentType
-{
- Simple,
- Complex,
- Store
-};
-
-using ComponentID = DependencyGraph::Id;
-using ComponentList = std::vector<ComponentID>;
-class IClKernelComponent
-{
-public:
- using Link = SharedVarLink;
- using Tag = std::string;
- struct TagVal
- {
- TagVal() = default;
- TagVal(const SharedVarTable::SharedVar &var)
- : value{ var.uniq_name }
- {
- }
-
- template <typename T, ARM_COMPUTE_REQUIRES_TA(std::is_integral<T>::value)>
- TagVal(T val)
- : value{ support::cpp11::to_string(val) }
- {
- }
-
- TagVal(const std::string &val)
- : value{ val }
- {
- }
-
- TagVal(const char *val)
- : value{ std::string(val) }
- {
- }
-
- TagVal(const DataType &data_type)
- : value{ get_cl_type_from_data_type(data_type) }
- {
- }
-
- std::string value{};
- };
- using TagLUT = std::unordered_map<Tag, TagVal>; // Used to instantiating a code template / replacing tags
-public:
- IClKernelComponent(ClKernelBlueprint *blueprint)
- : _blueprint(blueprint)
- {
- }
-
- ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(IClKernelComponent);
-
- virtual ~IClKernelComponent() = default;
- virtual ComponentType get_component_type() const = 0;
- virtual std::vector<Link> get_links() const = 0;
- virtual std::string name() const = 0;
-
- // @note: some tags can be unused since they could be used only for the macros, or only for the component code
- static std::string replace_tags(const std::string &code_template, const TagLUT &tags)
- {
- std::string replaced_code = "";
- bool scanning_pattern = false;
- std::string pattern_found = "";
- for(size_t i = 0; i < code_template.size() - 1; ++i)
- {
- if(!scanning_pattern)
- {
- if(code_template[i] == '{' && code_template[i + 1] == '{')
- {
- i += 1;
- scanning_pattern = true;
- pattern_found = "";
- }
- else
- {
- replaced_code += code_template[i];
- }
- }
- else
- {
- if(code_template[i] == '}' && code_template[i + 1] == '}')
- {
- i += 1;
- scanning_pattern = false;
- std::string err = "Pattern " + pattern_found + " not found in tags";
- ARM_COMPUTE_ERROR_ON_MSG(tags.find(pattern_found) == tags.end(), err.c_str());
- replaced_code += tags.find(pattern_found)->second.value;
- }
- else
- {
- pattern_found += code_template[i];
- }
- }
- }
-
- return replaced_code;
- }
- ComponentID id() const
- {
- return _id;
- }
- void set_id(ComponentID id)
- {
- _id = id;
- }
-
- virtual std::set<std::string> get_headers_list() const
- {
- return std::set<std::string> {};
- }
-
- virtual std::string get_additional_macros() const
- {
- return "";
- }
-
- virtual std::string get_component_code() const
- {
- return "";
- }
-
- virtual Window get_window() const
- {
- return Window{};
- }
-
- /** Get the tag look-up table used to instantiate the component code.
- *
- * @param vtable
- * @return TagLUT
- */
- virtual TagLUT get_tag_lut(const SharedVarTable &vtable) const = 0;
-
- /** Allocate all shared variables used by the component in the @p vtable
- *
- * @param vtable
- */
- virtual void allocate_shared_vars(SharedVarTable &vtable) const = 0;
-
- virtual std::string get_dst_addr_calculation() const
- {
- return "";
- }
-
- /** Generate config id of the component
- *
- * @return std::string
- */
- virtual std::string generate_config_id() const
- {
- return "";
- }
-
- virtual CLBuildOptions generate_build_options() const
- {
- return CLBuildOptions{};
- }
-
-protected:
- ClKernelBlueprint *_blueprint;
-
-private:
- ComponentID _id{};
-};
-
-using ComponentUniquePtr = std::unique_ptr<IClKernelComponent>;
-
-/** Intermediate representation of the final, complete kernel source.
- */
-struct ClKernelBlueprint::Implementation
-{
-public:
- Implementation() = default;
- ~Implementation() = default;
-
-public:
- Status update_merge_point(ArgumentID t_id, ArgumentID merge_point)
- {
- return _graph.update_merge_point(t_id, merge_point);
- }
-
- ArgumentID add_kernel_tensor(ITensorInfo *tensor_info, ArgumentID merge_point = DependencyGraph::empty_id())
- {
- const auto id = _graph.add_tensor(merge_point);
- if(_kernel_tensors.find(id) == _kernel_tensors.end())
- {
- _kernel_tensors.insert(std::make_pair(id, tensor_info));
- }
- return id;
- }
-
- void set_tile_info(const TileDescriptor &tile_info)
- {
- _tile_info = tile_info;
- }
-
- SharedVarGroup group(ArgumentID arg_id) const
- {
- if(arg_id == g_arg_placeholder)
- {
- // In case of placeholder, don't care what we return;
- return SharedVarGroup::Argument;
- }
- return _shared_var_group_lut.at(arg_id);
- }
-
- void validate_arg_ids(std::initializer_list<ArgumentID> args) const
- {
- for(const auto arg_id : args)
- {
- ARM_COMPUTE_UNUSED(arg_id);
- ARM_COMPUTE_ERROR_ON_MSG(_kernel_tensors.find(arg_id) == _kernel_tensors.end() && arg_id != g_arg_placeholder,
- "Trying to use an argument that hasn't been added to the blueprint");
- }
- }
-
- void add_component(ComponentUniquePtr component)
- {
- if(component->get_component_type() == ComponentType::Complex)
- {
- ++_num_complex_components;
- ARM_COMPUTE_ERROR_ON_MSG(_num_complex_components > 1, "Only one complex component per blueprint is supported.");
- }
-
- // Get an unique ID for the component that's being added
- std::vector<ArgumentID> src_tensors;
- std::vector<ArgumentID> dst_tensors;
- for(const auto &link : component->get_links())
- {
- if(link.is_empty())
- {
- continue;
- }
- if(link.io == SharedVarIO::Input)
- {
- src_tensors.push_back(link.arg_id);
- }
- else
- {
- dst_tensors.push_back(link.arg_id);
- }
- }
- const ComponentID component_id = _graph.add_operator(src_tensors, dst_tensors).second;
- component->set_id(component_id);
-
- // Add this component to the component graph. Don't connect it to anything yet
- _component_graph.emplace(component_id, ComponentList{});
-
- // For every { arg_id, arg_io } passed along with this component...
- for(const auto &link : component->get_links())
- {
- const ArgumentID &arg_id = link.arg_id;
- const SharedVarIO &arg_io = link.io;
-
- // Add the arg_id to the map describing the input/output relationship between an argument and the components that use it, if it doesn't yet exist there
- if(_outgoing_components.find(arg_id) == _outgoing_components.end())
- {
- _outgoing_components.emplace(arg_id, ComponentList{});
- _incoming_components.emplace(arg_id, ComponentList{});
- }
-
- // If it's an input argument, connect any other component that has it as output with this component
- // Additionally, set this component as one that treats this argument as "Input" (append to index 0)
- // This is used so that we keep track of whether two components use the same argument, one as input and one as output
- if(arg_io == SharedVarIO::Input)
- {
- for(const auto &prev_component : _incoming_components[arg_id])
- {
- _component_graph[prev_component].push_back(component_id);
- }
-
- _outgoing_components[arg_id].push_back(component_id);
- }
- // If it's an output argument, connect this component with any other component that has it as input
- // Additionally, set this component as one that treats this argument as "Output" (append to index 1)
- else
- {
- if(component->get_component_type() == ComponentType::Store)
- {
- ARM_COMPUTE_ERROR_ON_MSG(_dst_id >= 0, "Trying to add more than one dst argument to the graph");
- _dst_id = arg_id;
- }
-
- for(const auto &subseq_component : _outgoing_components[arg_id])
- {
- _component_graph[component_id].push_back(subseq_component);
- }
-
- _incoming_components[arg_id].push_back(component_id);
- }
- }
-
- ARM_COMPUTE_ERROR_ON_MSG(_graph.get_root_ops().size() != 1, "Trying to add more than one root to the graph");
-
- // Finally, add this component to the dictionary of components
- _components.insert(std::make_pair(component_id, std::move(component)));
- }
-
- std::string build_kernel_name() const
- {
- std::string name = "";
-
- traverse([&](std::stack<ComponentID> stack)
- {
- name += _components.find(stack.top())->second->name() + (stack.size() > 2 ? "___" : "");
- });
-
- return name;
- }
-
- std::string build_code()
- {
- ARM_COMPUTE_ERROR_ON_MSG(_graph_root == -1, "No root found in the component graph");
-
- // These data structures will hold the data from all the components in the blueprint
- std::set<std::string> headers_list{};
- std::set<std::string> additional_macros{};
- std::vector<std::string> component_codes{}; // vector because order matters
-
- // Step 1: Allocate all kernel argument shared variables before generating the component code
- auto stack = topological_sort();
- while(!stack.empty())
- {
- auto curr_component_id = stack.top();
- auto &curr_component = _components.find(curr_component_id)->second;
-
- curr_component->allocate_shared_vars(_vtable);
-
- stack.pop();
- }
- // Step 2: Generate component codes
- stack = topological_sort();
- while(!stack.empty())
- {
- auto curr_component_id = stack.top();
- auto &curr_component = _components.find(curr_component_id)->second;
-
- auto curr_headers_list = curr_component->get_headers_list();
- auto curr_additional_macros = curr_component->get_additional_macros();
- auto curr_component_code = curr_component->get_component_code();
- const auto var_lut = curr_component->get_tag_lut(_vtable); // Ideally can be merged with get_component_code once we have finer-grained code generation technique
- component_codes.push_back(IClKernelComponent::replace_tags(curr_component_code, var_lut));
-
- headers_list.insert(curr_headers_list.begin(), curr_headers_list.end());
- if(!curr_additional_macros.empty()) // Some components might not have any
- {
- additional_macros.insert(IClKernelComponent::replace_tags(curr_additional_macros, var_lut));
- }
-
- stack.pop();
- }
-
- // Step 3: Assemble the data gathered by traversing the graph into the string "code"
- std::string code = "";
-
- for(auto &header : headers_list)
- {
-#if defined(EMBEDDED_KERNELS)
- code += CLKernelLibrary::get().get_program(header).first;
-#else // defined(EMBEDDED_KERNELS)
- code += "#include \"" + header + "\"\n";
-#endif // defined(EMBEDDED_KERNELS)
- }
-
- for(auto &macros : additional_macros)
- {
- code += macros;
- }
-
- code += generate_kernel_signature(_vtable.get_kernel_arguments());
-
- code += "\n{\n\n";
-
- code += " //------------------ START KERNEL_BUILDER_COORDINATE ---------------------\n\n";
- code += generate_global_section();
- code += " //------------------ END KERNEL_BUILDER_COORDINATE ---------------------\n";
-
- for(auto &component_code : component_codes)
- {
- code += component_code;
- }
-
- code += "}\n";
-
- return code;
- }
-
- /** Generate config id of the entire kernel
- *
- * Format: kernel_name--comp0_config_id--comp1_config_id--...
- *
- * @return std::string
- */
- std::string build_config_id() const
- {
- std::string config_id = build_kernel_name();
- traverse([&](std::stack<ComponentID> stack)
- {
- config_id += "--" + _components.find(stack.top())->second->generate_config_id() + "--";
- });
-
- return config_id;
- }
-
- CLBuildOptions build_options() const
- {
- CLBuildOptions build_opts{};
-
- traverse([&](std::stack<ComponentID> stack)
- {
- build_opts.add_options(_components.find(stack.top())->second->generate_build_options().options());
- });
-
- return build_opts;
- }
-
- TileDescriptor get_tile_info() const
- {
- return _tile_info;
- }
-
- // Get the global execution window, i.e. that of the root component
- Window get_execution_window() const
- {
- ARM_COMPUTE_ERROR_ON_MSG(_graph_root == -1, "No root found in the component graph");
- ARM_COMPUTE_ERROR_ON_MSG(_dst_id == -1, "Destination Tensor Id should be ready before calling get_execution_window()");
-
- return _components.find(_graph_root)->second->get_window();
- }
-
- ArgumentID get_dst_id() const
- {
- return _dst_id;
- }
-
- ClKernelArgList get_arguments() const
- {
- ClKernelArgList arg_list{};
- for(const auto &arg_var : _vtable.get_kernel_arguments().get_all_vars())
- {
- arg_list[arg_var.desc.arg_id] = arg_var.desc;
- }
- return arg_list;
- }
-
- /** Get the arguments as shared vars from the vtable
- *
- * @return SharedVarTable::Arguments
- */
- SharedVarTable::Arguments get_argument_shared_vars() const
- {
- return _vtable.get_kernel_arguments();
- }
-
- const ITensorInfo *get_kernel_argument_info(const ArgumentID id) const
- {
- auto it = _kernel_tensors.find(id);
- if(it != _kernel_tensors.end())
- {
- return it->second;
- }
- return nullptr;
- }
-
- ITensorInfo *get_kernel_argument_info(const ArgumentID id)
- {
- auto it = _kernel_tensors.find(id);
- if(it != _kernel_tensors.end())
- {
- return it->second;
- }
- return nullptr;
- }
- /** Finalize graph construction. Graph is expected to not mutate after being finalized
- */
- void finalize()
- {
- cache_root_component();
- assign_shared_var_group();
- }
-
- DependencyGraph get_graph() const
- {
- return _graph;
- }
-
-private:
- void cache_root_component()
- {
- const auto roots = _graph.get_root_ops();
- ARM_COMPUTE_ERROR_ON_MSG(roots.size() != 1, "Trying to add more than one root to the graph");
- _graph_root = roots.at(0);
- }
- /** Assign the group for each shared var. Can only be performed at the end of the graph construction, before building
- */
- void assign_shared_var_group()
- {
- for(const auto &tensor : _kernel_tensors)
- {
- const auto tensor_id = tensor.first;
- if(_graph.is_src_tensor(tensor_id) || _graph.is_dst_tensor(tensor_id))
- {
- _shared_var_group_lut[tensor_id] = SharedVarGroup::Argument;
- }
- else
- {
- _shared_var_group_lut[tensor_id] = SharedVarGroup::Automatic;
- }
- }
- }
-
- void topological_sort_utility(ComponentID component_id, std::unordered_set<ComponentID> &visited, std::stack<ComponentID> &stack) const
- {
- visited.insert(component_id);
-
- for(auto connected_component : _component_graph.find(component_id)->second)
- {
- if(visited.find(connected_component) == visited.end())
- {
- topological_sort_utility(connected_component, visited, stack);
- }
- }
-
- stack.push(component_id);
- }
-
- std::stack<ComponentID> topological_sort() const
- {
- std::stack<ComponentID> stack{};
- std::unordered_set<ComponentID> visited{};
-
- topological_sort_utility(_graph_root, visited, stack);
-
- return stack;
- }
-
- void traverse(const std::function<void(std::stack<ComponentID>)> &func) const
- {
- std::stack<ComponentID> stack = topological_sort();
-
- while(!stack.empty())
- {
- func(stack);
- stack.pop();
- }
- }
-
- std::string generate_argument_declaration(const SharedVarTable::SharedVar &var) const
- {
- ARM_COMPUTE_ERROR_ON_MSG(var.group != SharedVarGroup::Argument, "An argument declaration can only be generated from a kernel argument");
- std::string code;
- switch(var.desc.tensor_arg_type)
- {
- case ClKernelTensorArgType::Vector:
- {
- code += "\n VECTOR_DECLARATION(" + var.uniq_name + ")";
- break;
- }
- case ClKernelTensorArgType::Image:
- {
- code += "\n IMAGE_DECLARATION(" + var.uniq_name + ")";
- break;
- }
- case ClKernelTensorArgType::Image_3D:
- {
- code += "\n IMAGE_DECLARATION(" + var.uniq_name + "),";
- code += "\n uint " + var.uniq_name + "_stride_z";
- break;
- }
- case ClKernelTensorArgType::Image_3D_Export_To_ClImage2D:
- {
- code += "\n __read_only image2d_t " + var.uniq_name + "_img,";
- code += "\n uint " + var.uniq_name + "_stride_z";
- break;
- }
- case ClKernelTensorArgType::Tensor_4D_t_Buffer:
- {
- code += "\n TENSOR4D_T(" + var.uniq_name + ", BUFFER)";
- break;
- }
- case ClKernelTensorArgType::Tensor_4D_t_Image:
- {
- code += "\n TENSOR4D_T(" + var.uniq_name + ", IMAGE)";
- break;
- }
- default:
- {
- ARM_COMPUTE_ERROR("Unsupported declaration generation for ClKernelTensorArgType");
- }
- }
- return code;
- }
-
- std::string generate_kernel_signature(const SharedVarTable::Arguments &argument_list) const
- {
- std::string code = "\n__kernel void " + build_kernel_name() + "(";
-
- for(const auto &arg : argument_list.get_all_vars())
- {
- code += generate_argument_declaration(arg) + ",";
- }
-
- code[code.length() - 1] = ')';
-
- return code;
- }
-
- std::string generate_global_section() const
- {
- auto dst_info = get_kernel_argument_info(_dst_id);
- auto dst_w = dst_info->dimension(0);
- const auto tile_w = std::max(1, get_execution_window().x().step());
- const auto tile_h = std::max(1, get_execution_window().y().step());
- auto leftover_w = dst_w % tile_w;
-
- std::string code = "";
- code += std::string(" int cout = GET_SPATIAL_IDX(0, ") + std::to_string(tile_w) + ", " + std::to_string(leftover_w) + ");\n";
- code += std::string(" int mout = GET_SPATIAL_IDX(1, ") + std::to_string(tile_h) + ", " + "0);\n";
- code += std::string(" int bout = GET_SPATIAL_IDX(2, 1, 0);\n\n");
-
- switch(_tile_info.clipping)
- {
- case ClippingStrategy::TOP_LEFT:
- code += " const bool g_cond_x = (cout == 0);\n";
- code += " const bool g_cond_y = (mout == 0);\n";
- break;
- case ClippingStrategy::TOP_RIGHT:
- code += " const bool g_cond_x = ((cout + 1) * " + std::to_string(tile_w) + " >= " + std::to_string(_tile_info.boundaries.x()) + ");\n";
- code += " const bool g_cond_y = (mout == 0);\n";
- break;
- case ClippingStrategy::BOTTOM_LEFT:
- code += " const bool g_cond_x = (cout == 0);\n";
- code += " const bool g_cond_y = ((mout + 1) * " + std::to_string(tile_h) + " >= " + std::to_string(_tile_info.boundaries.y()) + ");\n";
- break;
- case ClippingStrategy::BOTTOM_RIGHT:
- code += " const bool g_cond_x = ((cout + 1) * " + std::to_string(tile_w) + " >= " + std::to_string(_tile_info.boundaries.x()) + ");\n";
- code += " const bool g_cond_y = ((mout + 1) * " + std::to_string(tile_h) + " >= " + std::to_string(_tile_info.boundaries.y()) + ");\n";
- break;
- default:
- ARM_COMPUTE_ERROR("Unsupported clipping strategy");
- }
-
- return code;
- }
-
- TileDescriptor _tile_info{};
-
- int32_t _num_complex_components{};
-
- ArgumentID _dst_id{ -1 }; // Initially set to -1, which means the graph has no dst yet, since node IDs are positive numbers
-
- DependencyGraph _graph{};
-
- // Tensors, components and IDs with corresponding ptrs (except intermediate)
- std::unordered_map<ComponentID, ComponentUniquePtr> _components{};
- std::unordered_map<ArgumentID, ITensorInfo *> _kernel_tensors{};
- // Argument group lookup. Can be replaced by extending the ArgumentID type to include group info
- std::unordered_map<ArgumentID, SharedVarGroup> _shared_var_group_lut{};
-
- // Tracks all variables (e.g.: kernel arguments, kernel "global variables")
- SharedVarTable _vtable{};
-
- // Component directed graph (represented by an adjecency list of Component IDs)
- // This is used to understand the ordering and bindings between components when generating the kernel
- // It's initially set to -1 which means the graph has no root yet, since node IDs are positive numbers
- ComponentID _graph_root{ -1 };
- std::unordered_map<ComponentID, ComponentList> _component_graph{};
-
- // Additional data structures used to define the relationships between components and arguments
- // For each argument, it contains the list of components that consider it as an incoming or an outgoing argument
- // E.g. tensor0 -> component0 -> tensor1
- // _outgoing_components[tensor0] == {component0} (component0 is the outgoing component of tensor0. Component0 treats tensor0 as an input tensor)
- // _incoming_components[tensor1] == {component0} (component0 is the incoming component of tensor1. Component1 treats tensor1 as an output tensor)
- std::unordered_map<ArgumentID, ComponentList> _outgoing_components{};
- std::unordered_map<ArgumentID, ComponentList> _incoming_components{};
-};
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMMON_H
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */