aboutsummaryrefslogtreecommitdiff
path: root/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/experimental/dynamic_fusion/ClKernelBuildingImpl')
-rw-r--r--src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h930
-rw-r--r--src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Utils.h76
-rw-r--r--src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp409
-rw-r--r--src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.h81
-rw-r--r--src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.cpp266
-rw-r--r--src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h90
-rw-r--r--src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.cpp153
-rw-r--r--src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h85
-rw-r--r--src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClKernelComponents.h35
-rw-r--r--src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.cpp171
-rw-r--r--src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.h97
11 files changed, 0 insertions, 2393 deletions
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h
deleted file mode 100644
index 04919acb83..0000000000
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h
+++ /dev/null
@@ -1,930 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-
-#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMMON_H
-#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMMON_H
-
-#include "arm_compute/core/CL/CLCompileContext.h"
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GPUTarget.h"
-#include "src/core/common/Macros.h"
-#include "support/Requires.h"
-#include "support/StringSupport.h"
-
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h"
-
-#include <iostream>
-#include <queue>
-#include <stack>
-#include <string>
-#include <unordered_set>
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-/** We introduce the concept of *Shared Variables* in the context of kernel building.
- * They are variables that can be accessed / shared among all the kernel components within a single kernel.
- * For now we consider 2 groups of shared variables:
- * Argument: The argument variables (parameters) of a kernel
- * Automatic: The automatic variables declared inside a kernel
- * All Shared Variables have the same kernel scope, and are thus visible to all kernel components
-*/
-
-enum class SharedVarIO
-{
- Input,
- Output
-};
-
-enum class SharedVarGroup
-{
- Argument, // Parameters to a kernel function == dst or src tensors of the whole blueprint graph
- Automatic // Automatic variables declared within the kernel body == intermediate tensors of the whole blueprint graph
-};
-
-/** Specifies a shared variable link for a component.
- * It describes all the information that's available when a component is constructed / added:
- * e.g. its linkage (via ArgumentID and io) and its group
- * This is not shared variable on its own, but is used for instantiating a SharedVar when building the code
- */
-struct SharedVarLink
-{
- ArgumentID arg_id{ g_arg_placeholder };
- SharedVarIO io{ SharedVarIO::Input };
- bool is_empty() const
- {
- return arg_id == g_arg_placeholder;
- }
-};
-
-/** A table of all the variables used in the kernel / blueprint
- * Because we limit the DependencyGraph in the blueprint to a Linear Sequence for now, we only allow ** a single global variable (the accumulator) **
- *
- * NOTE: the order they appear in the table is the order of their "declaration" in the component code, and is also their ID
- * NOTE: the variables all have the scope of the full kernel function
- */
-class SharedVarTable
-{
-public:
- /** A fully realized SharedVarLink
- */
- struct SharedVar
- {
- ArgumentID arg_id{ g_arg_placeholder };
- SharedVarIO io{ SharedVarIO::Input };
- SharedVarGroup group{ SharedVarGroup::Argument };
- std::string uniq_name{}; // Unique name, also the final variable name used in the built code
- ClKernelArgDescriptor desc{}; // Automatic variables can and should still be described using this struct
- bool is_empty() const
- {
- return arg_id == g_arg_placeholder;
- }
- };
-
- class Arguments
- {
- public:
- Arguments() = default;
- void add_var(const SharedVar &var)
- {
- ARM_COMPUTE_ERROR_ON(var.group != SharedVarGroup::Argument);
- _vars.push_back(var);
- }
- std::vector<SharedVar> get_all_vars() const
- {
- return _vars;
- }
- std::vector<SharedVar> get_src_vars() const
- {
- std::vector<SharedVar> src_vars;
- std::copy_if(_vars.begin(), _vars.end(), std::back_inserter(src_vars), [](const SharedVar & var)
- {
- return var.io == SharedVarIO::Input;
- });
- return src_vars;
- }
- SharedVar get_dst_var() const
- {
- std::vector<SharedVar> dst_vars;
- std::copy_if(_vars.begin(), _vars.end(), std::back_inserter(dst_vars), [](const SharedVar & var)
- {
- return var.io == SharedVarIO::Output;
- });
- ARM_COMPUTE_ERROR_ON(dst_vars.size() != 1);
- return dst_vars.at(0);
- }
-
- private:
- std::vector<SharedVar> _vars{};
- };
-
- /** Create a SharedVar for a corresponding SharedVarLink (contains ArgumentID). If one has already been created for the SharedVarLink, simply return it instead of creating a new one
- *
- * @note: The order of insertion is important. There is one precondition:
- * PRECOND: The components have been sorted topologically / is being traversed in topological order
- * This ensures that all the consumer var links (Output, Automatic Links) can consume (return) the producer var links when they're referred
- */
- void add(SharedVarLink var_link, SharedVarGroup group, ClKernelArgDescriptor runtime_desc, const std::string &name = "unnamed")
- {
- ARM_COMPUTE_ERROR_ON_MSG(var_link.is_empty(), "Non-empty SharedVarLink expected");
- if(!get(var_link).is_empty())
- {
- return;
- }
-
- auto var_id = _num_var;
- std::stringstream ss;
- ss << name << "_" << var_id;
- const auto uniq_name = ss.str();
- SharedVar var{ var_link.arg_id, var_link.io, group, uniq_name, runtime_desc };
-
- if(group == SharedVarGroup::Argument)
- {
- _arguments.emplace(var_id, var);
- _arg_id_map.emplace(var_link.arg_id, var_id);
- _num_var++;
- }
- else if(group == SharedVarGroup::Automatic)
- {
- if(_global_vars.empty())
- {
- if(var_link.io == SharedVarIO::Output)
- {
- _global_vars.emplace(var_id, var);
- _arg_id_map.emplace(var_link.arg_id, var_id);
- _num_var++;
- }
- else
- {
- ARM_COMPUTE_ERROR("Component likely not traversed in topological order");
- }
- }
- else
- {
- // Associate additional SharedVarLinks with the single global shared variable
- const auto global_var_id = _global_vars.begin()->first;
- _arg_id_map[var_link.arg_id] = global_var_id;
- }
- }
- else
- {
- ARM_COMPUTE_ERROR("Unrecognised SharedVarGroup");
- }
- }
-
- /** Get the SharedVar associated with @p var_link
- *
- * @param var_link
- * @return SharedVar
- */
- SharedVar get(const SharedVarLink &var_link) const
- {
- const SharedVar empty_var{};
- if(_arg_id_map.find(var_link.arg_id) != _arg_id_map.end())
- {
- const auto var_id = _arg_id_map.at(var_link.arg_id);
- const auto arg_var = _arguments.find(var_id);
- if(arg_var != _arguments.end())
- {
- return arg_var->second;
- }
- else
- {
- return _global_vars.at(var_id);
- }
- }
- return empty_var;
- }
-
- /** @note The arguments are returned in the order they are added
- */
- Arguments get_kernel_arguments() const
- {
- Arguments args{};
- for(const auto &a : _arguments)
- {
- args.add_var(a.second);
- }
- return args;
- }
-
-private:
- using VarID = int32_t;
-
-private:
- std::map<VarID, SharedVar> _global_vars{}; // Shared, global variable
- std::map<VarID, SharedVar> _arguments{};
- std::map<ArgumentID, VarID> _arg_id_map{}; // Track ArgumentIDs that have already been added
- VarID _num_var{ 0 };
-};
-
-enum class ComponentType
-{
- Simple,
- Complex,
- Store
-};
-
-using ComponentID = DependencyGraph::Id;
-using ComponentList = std::vector<ComponentID>;
-class IClKernelComponent
-{
-public:
- using Link = SharedVarLink;
- using Tag = std::string;
- struct TagVal
- {
- TagVal() = default;
- TagVal(const SharedVarTable::SharedVar &var)
- : value{ var.uniq_name }
- {
- }
-
- template <typename T, ARM_COMPUTE_REQUIRES_TA(std::is_integral<T>::value)>
- TagVal(T val)
- : value{ support::cpp11::to_string(val) }
- {
- }
-
- TagVal(const std::string &val)
- : value{ val }
- {
- }
-
- TagVal(const char *val)
- : value{ std::string(val) }
- {
- }
-
- TagVal(const DataType &data_type)
- : value{ get_cl_type_from_data_type(data_type) }
- {
- }
-
- std::string value{};
- };
- using TagLUT = std::unordered_map<Tag, TagVal>; // Used to instantiating a code template / replacing tags
-public:
- IClKernelComponent(ClKernelBlueprint *blueprint)
- : _blueprint(blueprint)
- {
- }
-
- ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(IClKernelComponent);
-
- virtual ~IClKernelComponent() = default;
- virtual ComponentType get_component_type() const = 0;
- virtual std::vector<Link> get_links() const = 0;
- virtual std::string name() const = 0;
-
- // @note: some tags can be unused since they could be used only for the macros, or only for the component code
- static std::string replace_tags(const std::string &code_template, const TagLUT &tags)
- {
- std::string replaced_code = "";
- bool scanning_pattern = false;
- std::string pattern_found = "";
- for(size_t i = 0; i < code_template.size() - 1; ++i)
- {
- if(!scanning_pattern)
- {
- if(code_template[i] == '{' && code_template[i + 1] == '{')
- {
- i += 1;
- scanning_pattern = true;
- pattern_found = "";
- }
- else
- {
- replaced_code += code_template[i];
- }
- }
- else
- {
- if(code_template[i] == '}' && code_template[i + 1] == '}')
- {
- i += 1;
- scanning_pattern = false;
- std::string err = "Pattern " + pattern_found + " not found in tags";
- ARM_COMPUTE_ERROR_ON_MSG(tags.find(pattern_found) == tags.end(), err.c_str());
- replaced_code += tags.find(pattern_found)->second.value;
- }
- else
- {
- pattern_found += code_template[i];
- }
- }
- }
-
- return replaced_code;
- }
- ComponentID id() const
- {
- return _id;
- }
- void set_id(ComponentID id)
- {
- _id = id;
- }
-
- virtual std::set<std::string> get_headers_list() const
- {
- return std::set<std::string> {};
- }
-
- virtual std::string get_additional_macros() const
- {
- return "";
- }
-
- virtual std::string get_component_code() const
- {
- return "";
- }
-
- virtual Window get_window() const
- {
- return Window{};
- }
-
- /** Get the tag look-up table used to instantiate the component code.
- *
- * @param vtable
- * @return TagLUT
- */
- virtual TagLUT get_tag_lut(const SharedVarTable &vtable) const = 0;
-
- /** Allocate all shared variables used by the component in the @p vtable
- *
- * @param vtable
- */
- virtual void allocate_shared_vars(SharedVarTable &vtable) const = 0;
-
- virtual std::string get_dst_addr_calculation() const
- {
- return "";
- }
-
- /** Generate config id of the component
- *
- * @return std::string
- */
- virtual std::string generate_config_id() const
- {
- return "";
- }
-
- virtual CLBuildOptions generate_build_options() const
- {
- return CLBuildOptions{};
- }
-
-protected:
- ClKernelBlueprint *_blueprint;
-
-private:
- ComponentID _id{};
-};
-
-using ComponentUniquePtr = std::unique_ptr<IClKernelComponent>;
-
-/** Intermediate representation of the final, complete kernel source.
- */
-struct ClKernelBlueprint::Implementation
-{
-public:
- Implementation() = default;
- ~Implementation() = default;
-
-public:
- Status update_merge_point(ArgumentID t_id, ArgumentID merge_point)
- {
- return _graph.update_merge_point(t_id, merge_point);
- }
-
- ArgumentID add_kernel_tensor(ITensorInfo *tensor_info, ArgumentID merge_point = DependencyGraph::empty_id())
- {
- const auto id = _graph.add_tensor(merge_point);
- if(_kernel_tensors.find(id) == _kernel_tensors.end())
- {
- _kernel_tensors.insert(std::make_pair(id, tensor_info));
- }
- return id;
- }
-
- void set_tile_info(const TileDescriptor &tile_info)
- {
- _tile_info = tile_info;
- }
-
- SharedVarGroup group(ArgumentID arg_id) const
- {
- if(arg_id == g_arg_placeholder)
- {
- // In case of placeholder, don't care what we return;
- return SharedVarGroup::Argument;
- }
- return _shared_var_group_lut.at(arg_id);
- }
-
- void validate_arg_ids(std::initializer_list<ArgumentID> args) const
- {
- for(const auto arg_id : args)
- {
- ARM_COMPUTE_UNUSED(arg_id);
- ARM_COMPUTE_ERROR_ON_MSG(_kernel_tensors.find(arg_id) == _kernel_tensors.end() && arg_id != g_arg_placeholder,
- "Trying to use an argument that hasn't been added to the blueprint");
- }
- }
-
- void add_component(ComponentUniquePtr component)
- {
- if(component->get_component_type() == ComponentType::Complex)
- {
- ++_num_complex_components;
- ARM_COMPUTE_ERROR_ON_MSG(_num_complex_components > 1, "Only one complex component per blueprint is supported.");
- }
-
- // Get an unique ID for the component that's being added
- std::vector<ArgumentID> src_tensors;
- std::vector<ArgumentID> dst_tensors;
- for(const auto &link : component->get_links())
- {
- if(link.is_empty())
- {
- continue;
- }
- if(link.io == SharedVarIO::Input)
- {
- src_tensors.push_back(link.arg_id);
- }
- else
- {
- dst_tensors.push_back(link.arg_id);
- }
- }
- const ComponentID component_id = _graph.add_operator(src_tensors, dst_tensors).second;
- component->set_id(component_id);
-
- // Add this component to the component graph. Don't connect it to anything yet
- _component_graph.emplace(component_id, ComponentList{});
-
- // For every { arg_id, arg_io } passed along with this component...
- for(const auto &link : component->get_links())
- {
- const ArgumentID &arg_id = link.arg_id;
- const SharedVarIO &arg_io = link.io;
-
- // Add the arg_id to the map describing the input/output relationship between an argument and the components that use it, if it doesn't yet exist there
- if(_outgoing_components.find(arg_id) == _outgoing_components.end())
- {
- _outgoing_components.emplace(arg_id, ComponentList{});
- _incoming_components.emplace(arg_id, ComponentList{});
- }
-
- // If it's an input argument, connect any other component that has it as output with this component
- // Additionally, set this component as one that treats this argument as "Input" (append to index 0)
- // This is used so that we keep track of whether two components use the same argument, one as input and one as output
- if(arg_io == SharedVarIO::Input)
- {
- for(const auto &prev_component : _incoming_components[arg_id])
- {
- _component_graph[prev_component].push_back(component_id);
- }
-
- _outgoing_components[arg_id].push_back(component_id);
- }
- // If it's an output argument, connect this component with any other component that has it as input
- // Additionally, set this component as one that treats this argument as "Output" (append to index 1)
- else
- {
- if(component->get_component_type() == ComponentType::Store)
- {
- ARM_COMPUTE_ERROR_ON_MSG(_dst_id >= 0, "Trying to add more than one dst argument to the graph");
- _dst_id = arg_id;
- }
-
- for(const auto &subseq_component : _outgoing_components[arg_id])
- {
- _component_graph[component_id].push_back(subseq_component);
- }
-
- _incoming_components[arg_id].push_back(component_id);
- }
- }
-
- ARM_COMPUTE_ERROR_ON_MSG(_graph.get_root_ops().size() != 1, "Trying to add more than one root to the graph");
-
- // Finally, add this component to the dictionary of components
- _components.insert(std::make_pair(component_id, std::move(component)));
- }
-
- std::string build_kernel_name() const
- {
- std::string name = "";
-
- traverse([&](std::stack<ComponentID> stack)
- {
- name += _components.find(stack.top())->second->name() + (stack.size() > 2 ? "___" : "");
- });
-
- return name;
- }
-
- std::string build_code()
- {
- ARM_COMPUTE_ERROR_ON_MSG(_graph_root == -1, "No root found in the component graph");
-
- // These data structures will hold the data from all the components in the blueprint
- std::set<std::string> headers_list{};
- std::set<std::string> additional_macros{};
- std::vector<std::string> component_codes{}; // vector because order matters
-
- // Step 1: Allocate all kernel argument shared variables before generating the component code
- auto stack = topological_sort();
- while(!stack.empty())
- {
- auto curr_component_id = stack.top();
- auto &curr_component = _components.find(curr_component_id)->second;
-
- curr_component->allocate_shared_vars(_vtable);
-
- stack.pop();
- }
- // Step 2: Generate component codes
- stack = topological_sort();
- while(!stack.empty())
- {
- auto curr_component_id = stack.top();
- auto &curr_component = _components.find(curr_component_id)->second;
-
- auto curr_headers_list = curr_component->get_headers_list();
- auto curr_additional_macros = curr_component->get_additional_macros();
- auto curr_component_code = curr_component->get_component_code();
- const auto var_lut = curr_component->get_tag_lut(_vtable); // Ideally can be merged with get_component_code once we have finer-grained code generation technique
- component_codes.push_back(IClKernelComponent::replace_tags(curr_component_code, var_lut));
-
- headers_list.insert(curr_headers_list.begin(), curr_headers_list.end());
- if(!curr_additional_macros.empty()) // Some components might not have any
- {
- additional_macros.insert(IClKernelComponent::replace_tags(curr_additional_macros, var_lut));
- }
-
- stack.pop();
- }
-
- // Step 3: Assemble the data gathered by traversing the graph into the string "code"
- std::string code = "";
-
- for(auto &header : headers_list)
- {
-#if defined(EMBEDDED_KERNELS)
- code += CLKernelLibrary::get().get_program(header).first;
-#else // defined(EMBEDDED_KERNELS)
- code += "#include \"" + header + "\"\n";
-#endif // defined(EMBEDDED_KERNELS)
- }
-
- for(auto &macros : additional_macros)
- {
- code += macros;
- }
-
- code += generate_kernel_signature(_vtable.get_kernel_arguments());
-
- code += "\n{\n\n";
-
- code += " //------------------ START KERNEL_BUILDER_COORDINATE ---------------------\n\n";
- code += generate_global_section();
- code += " //------------------ END KERNEL_BUILDER_COORDINATE ---------------------\n";
-
- for(auto &component_code : component_codes)
- {
- code += component_code;
- }
-
- code += "}\n";
-
- return code;
- }
-
- /** Generate config id of the entire kernel
- *
- * Format: kernel_name--comp0_config_id--comp1_config_id--...
- *
- * @return std::string
- */
- std::string build_config_id() const
- {
- std::string config_id = build_kernel_name();
- traverse([&](std::stack<ComponentID> stack)
- {
- config_id += "--" + _components.find(stack.top())->second->generate_config_id() + "--";
- });
-
- return config_id;
- }
-
- CLBuildOptions build_options() const
- {
- CLBuildOptions build_opts{};
-
- traverse([&](std::stack<ComponentID> stack)
- {
- build_opts.add_options(_components.find(stack.top())->second->generate_build_options().options());
- });
-
- return build_opts;
- }
-
- TileDescriptor get_tile_info() const
- {
- return _tile_info;
- }
-
- // Get the global execution window, i.e. that of the root component
- Window get_execution_window() const
- {
- ARM_COMPUTE_ERROR_ON_MSG(_graph_root == -1, "No root found in the component graph");
- ARM_COMPUTE_ERROR_ON_MSG(_dst_id == -1, "Destination Tensor Id should be ready before calling get_execution_window()");
-
- return _components.find(_graph_root)->second->get_window();
- }
-
- ArgumentID get_dst_id() const
- {
- return _dst_id;
- }
-
- ClKernelArgList get_arguments() const
- {
- ClKernelArgList arg_list{};
- for(const auto &arg_var : _vtable.get_kernel_arguments().get_all_vars())
- {
- arg_list[arg_var.desc.arg_id] = arg_var.desc;
- }
- return arg_list;
- }
-
- /** Get the arguments as shared vars from the vtable
- *
- * @return SharedVarTable::Arguments
- */
- SharedVarTable::Arguments get_argument_shared_vars() const
- {
- return _vtable.get_kernel_arguments();
- }
-
- const ITensorInfo *get_kernel_argument_info(const ArgumentID id) const
- {
- auto it = _kernel_tensors.find(id);
- if(it != _kernel_tensors.end())
- {
- return it->second;
- }
- return nullptr;
- }
-
- ITensorInfo *get_kernel_argument_info(const ArgumentID id)
- {
- auto it = _kernel_tensors.find(id);
- if(it != _kernel_tensors.end())
- {
- return it->second;
- }
- return nullptr;
- }
- /** Finalize graph construction. Graph is expected to not mutate after being finalized
- */
- void finalize()
- {
- cache_root_component();
- assign_shared_var_group();
- }
-
- DependencyGraph get_graph() const
- {
- return _graph;
- }
-
-private:
- void cache_root_component()
- {
- const auto roots = _graph.get_root_ops();
- ARM_COMPUTE_ERROR_ON_MSG(roots.size() != 1, "Trying to add more than one root to the graph");
- _graph_root = roots.at(0);
- }
- /** Assign the group for each shared var. Can only be performed at the end of the graph construction, before building
- */
- void assign_shared_var_group()
- {
- for(const auto &tensor : _kernel_tensors)
- {
- const auto tensor_id = tensor.first;
- if(_graph.is_src_tensor(tensor_id) || _graph.is_dst_tensor(tensor_id))
- {
- _shared_var_group_lut[tensor_id] = SharedVarGroup::Argument;
- }
- else
- {
- _shared_var_group_lut[tensor_id] = SharedVarGroup::Automatic;
- }
- }
- }
-
- void topological_sort_utility(ComponentID component_id, std::unordered_set<ComponentID> &visited, std::stack<ComponentID> &stack) const
- {
- visited.insert(component_id);
-
- for(auto connected_component : _component_graph.find(component_id)->second)
- {
- if(visited.find(connected_component) == visited.end())
- {
- topological_sort_utility(connected_component, visited, stack);
- }
- }
-
- stack.push(component_id);
- }
-
- std::stack<ComponentID> topological_sort() const
- {
- std::stack<ComponentID> stack{};
- std::unordered_set<ComponentID> visited{};
-
- topological_sort_utility(_graph_root, visited, stack);
-
- return stack;
- }
-
- void traverse(const std::function<void(std::stack<ComponentID>)> &func) const
- {
- std::stack<ComponentID> stack = topological_sort();
-
- while(!stack.empty())
- {
- func(stack);
- stack.pop();
- }
- }
-
- std::string generate_argument_declaration(const SharedVarTable::SharedVar &var) const
- {
- ARM_COMPUTE_ERROR_ON_MSG(var.group != SharedVarGroup::Argument, "An argument declaration can only be generated from a kernel argument");
- std::string code;
- switch(var.desc.tensor_arg_type)
- {
- case ClKernelTensorArgType::Vector:
- {
- code += "\n VECTOR_DECLARATION(" + var.uniq_name + ")";
- break;
- }
- case ClKernelTensorArgType::Image:
- {
- code += "\n IMAGE_DECLARATION(" + var.uniq_name + ")";
- break;
- }
- case ClKernelTensorArgType::Image_3D:
- {
- code += "\n IMAGE_DECLARATION(" + var.uniq_name + "),";
- code += "\n uint " + var.uniq_name + "_stride_z";
- break;
- }
- case ClKernelTensorArgType::Image_3D_Export_To_ClImage2D:
- {
- code += "\n __read_only image2d_t " + var.uniq_name + "_img,";
- code += "\n uint " + var.uniq_name + "_stride_z";
- break;
- }
- case ClKernelTensorArgType::Tensor_4D_t_Buffer:
- {
- code += "\n TENSOR4D_T(" + var.uniq_name + ", BUFFER)";
- break;
- }
- case ClKernelTensorArgType::Tensor_4D_t_Image:
- {
- code += "\n TENSOR4D_T(" + var.uniq_name + ", IMAGE)";
- break;
- }
- default:
- {
- ARM_COMPUTE_ERROR("Unsupported declaration generation for ClKernelTensorArgType");
- }
- }
- return code;
- }
-
- std::string generate_kernel_signature(const SharedVarTable::Arguments &argument_list) const
- {
- std::string code = "\n__kernel void " + build_kernel_name() + "(";
-
- for(const auto &arg : argument_list.get_all_vars())
- {
- code += generate_argument_declaration(arg) + ",";
- }
-
- code[code.length() - 1] = ')';
-
- return code;
- }
-
- std::string generate_global_section() const
- {
- auto dst_info = get_kernel_argument_info(_dst_id);
- auto dst_w = dst_info->dimension(0);
- const auto tile_w = std::max(1, get_execution_window().x().step());
- const auto tile_h = std::max(1, get_execution_window().y().step());
- auto leftover_w = dst_w % tile_w;
-
- std::string code = "";
- code += std::string(" int cout = GET_SPATIAL_IDX(0, ") + std::to_string(tile_w) + ", " + std::to_string(leftover_w) + ");\n";
- code += std::string(" int mout = GET_SPATIAL_IDX(1, ") + std::to_string(tile_h) + ", " + "0);\n";
- code += std::string(" int bout = GET_SPATIAL_IDX(2, 1, 0);\n\n");
-
- switch(_tile_info.clipping)
- {
- case ClippingStrategy::TOP_LEFT:
- code += " const bool g_cond_x = (cout == 0);\n";
- code += " const bool g_cond_y = (mout == 0);\n";
- break;
- case ClippingStrategy::TOP_RIGHT:
- code += " const bool g_cond_x = ((cout + 1) * " + std::to_string(tile_w) + " >= " + std::to_string(_tile_info.boundaries.x()) + ");\n";
- code += " const bool g_cond_y = (mout == 0);\n";
- break;
- case ClippingStrategy::BOTTOM_LEFT:
- code += " const bool g_cond_x = (cout == 0);\n";
- code += " const bool g_cond_y = ((mout + 1) * " + std::to_string(tile_h) + " >= " + std::to_string(_tile_info.boundaries.y()) + ");\n";
- break;
- case ClippingStrategy::BOTTOM_RIGHT:
- code += " const bool g_cond_x = ((cout + 1) * " + std::to_string(tile_w) + " >= " + std::to_string(_tile_info.boundaries.x()) + ");\n";
- code += " const bool g_cond_y = ((mout + 1) * " + std::to_string(tile_h) + " >= " + std::to_string(_tile_info.boundaries.y()) + ");\n";
- break;
- default:
- ARM_COMPUTE_ERROR("Unsupported clipping strategy");
- }
-
- return code;
- }
-
- TileDescriptor _tile_info{};
-
- int32_t _num_complex_components{};
-
- ArgumentID _dst_id{ -1 }; // Initially set to -1, which means the graph has no dst yet, since node IDs are positive numbers
-
- DependencyGraph _graph{};
-
- // Tensors, components and IDs with corresponding ptrs (except intermediate)
- std::unordered_map<ComponentID, ComponentUniquePtr> _components{};
- std::unordered_map<ArgumentID, ITensorInfo *> _kernel_tensors{};
- // Argument group lookup. Can be replaced by extending the ArgumentID type to include group info
- std::unordered_map<ArgumentID, SharedVarGroup> _shared_var_group_lut{};
-
- // Tracks all variables (e.g.: kernel arguments, kernel "global variables")
- SharedVarTable _vtable{};
-
- // Component directed graph (represented by an adjecency list of Component IDs)
- // This is used to understand the ordering and bindings between components when generating the kernel
- // It's initially set to -1 which means the graph has no root yet, since node IDs are positive numbers
- ComponentID _graph_root{ -1 };
- std::unordered_map<ComponentID, ComponentList> _component_graph{};
-
- // Additional data structures used to define the relationships between components and arguments
- // For each argument, it contains the list of components that consider it as an incoming or an outgoing argument
- // E.g. tensor0 -> component0 -> tensor1
- // _outgoing_components[tensor0] == {component0} (component0 is the outgoing component of tensor0. Component0 treats tensor0 as an input tensor)
- // _incoming_components[tensor1] == {component0} (component0 is the incoming component of tensor1. Component1 treats tensor1 as an output tensor)
- std::unordered_map<ArgumentID, ComponentList> _outgoing_components{};
- std::unordered_map<ArgumentID, ComponentList> _incoming_components{};
-};
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMMON_H
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Utils.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Utils.h
deleted file mode 100644
index 1b10050559..0000000000
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Utils.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-
-#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_UTILS
-#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_UTILS
-
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-inline ::std::ostream &operator<<(::std::ostream &os, const CLBuildOptions::StringSet &build_opts)
-{
- for(const auto &opt : build_opts)
- {
- os << opt << ",";
- }
- return os;
-}
-inline ::std::ostream &operator<<(::std::ostream &os, const CLBuildOptions &cl_build_opts)
-{
- os << cl_build_opts.options();
- return os;
-}
-
-inline std::string to_string(const CLBuildOptions &cl_build_opts)
-{
- std::stringstream str;
- str << cl_build_opts;
- return str.str();
-}
-inline ::std::ostream &operator<<(::std::ostream &os, const ClKernelCode &code)
-{
- os << "name: " << code.name << std::endl;
- os << "code: " << code.code << std::endl;
- os << "build_opts: " << code.build_options << std::endl;
- return os;
-}
-inline std::string to_string(const ClKernelCode &code)
-{
- std::stringstream str;
- str << code;
- return str.str();
-}
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-
-#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_UTILS
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp
deleted file mode 100644
index 811cd79811..0000000000
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp
+++ /dev/null
@@ -1,409 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.h"
-
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "src/core/CL/ICLKernel.h"
-#include "src/core/helpers/AutoConfiguration.h"
-#include "src/core/helpers/WindowHelpers.h"
-#include "src/gpu/cl/kernels/gemm/ClGemmHelpers.h"
-
-#include "arm_compute/runtime/CL/CLScheduler.h"
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-ComponentType ClDirectConvolutionKernelComponent::get_component_type() const
-{
- return ComponentType::Complex;
-}
-
-std::set<std::string> ClDirectConvolutionKernelComponent::get_headers_list() const
-{
- return std::set<std::string> { "helpers.h", "tile_helpers.h" };
-}
-
-Window ClDirectConvolutionKernelComponent::get_window() const
-{
- const auto src_info = _blueprint->impl().get_kernel_argument_info(_src.arg_id);
- const auto weight_info = _blueprint->impl().get_kernel_argument_info(_weight.arg_id);
- auto dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
-
- // Get dst shape
- PadStrideInfo pad_stride_info
- {
- static_cast<unsigned int>(_desc.conv2d.stride.x()),
- static_cast<unsigned int>(_desc.conv2d.stride.y()),
- static_cast<unsigned int>(_desc.conv2d.pad.left),
- static_cast<unsigned int>(_desc.conv2d.pad.right),
- static_cast<unsigned int>(_desc.conv2d.pad.top),
- static_cast<unsigned int>(_desc.conv2d.pad.bottom),
- DimensionRoundingType::FLOOR /*default rounding type*/
- };
- TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*src_info, *weight_info, pad_stride_info);
-
- // Output auto initialization if not yet initialized
- auto_init_if_empty(*dst_info, output_shape,
- 1,
- src_info->data_type(),
- src_info->quantization_info());
-
- const unsigned int vec_size = std::min(static_cast<unsigned int>(dst_info->tensor_shape()[0]), 4u);
- const unsigned int num_rows = (dst_info->tensor_shape()[0] > 16) ? ((src_info->data_type() == DataType::F32) ? 2U : 4U) : 1U;
- // const unsigned int num_rows = 1;
- // const unsigned int vec_size = tile_info.tile_dims.x();
- // const unsigned int num_rows = tile_info.tile_dims.y();
-
- // Create and configure kernel window
- Window win = calculate_max_window(output_shape, Steps(vec_size, num_rows));
-
- const size_t dim_y_collapsed = ceil_to_multiple(output_shape[1] * output_shape[2], num_rows);
- win.set(Window::DimY, Window::Dimension(0, dim_y_collapsed, num_rows));
- win.set(Window::DimZ, Window::Dimension(0, output_shape.total_size_upper(3), 1));
-
- return win;
-}
-
-std::string ClDirectConvolutionKernelComponent::get_additional_macros() const
-{
- return R"_()_"; // no macros
-}
-
-std::string ClDirectConvolutionKernelComponent::get_component_code() const
-{
- const auto src_info = _blueprint->impl().get_kernel_argument_info(_src.arg_id);
- const auto bias_info = _blueprint->impl().get_kernel_argument_info(_bias.arg_id);
-
- ARM_COMPUTE_ERROR_ON_MSG(src_info->data_layout() != DataLayout::NHWC, "Only NHWC data layout is supported by this component.");
-
- const auto channel_idx = get_data_layout_dimension_index(src_info->data_layout(), DataLayoutDimension::CHANNEL);
- const auto k0 = adjust_vec_size(is_data_type_quantized(src_info->data_type()) ? 16u : 8u, src_info->dimension(channel_idx));
- const bool leftover_loop = (src_info->dimension(channel_idx) % k0) != 0;
-
- std::string code = R"_(
- //------------------ START KERNEL {{meta_kernel_id}} ---------------------
- // IN_0(src) {{src}}
- // IN_1(wei) {{weight}}
- )_";
- if(bias_info != nullptr)
- {
- code += R"_(
- // IN_1(bia) {{bias}}
- )_";
- }
- code += R"_(
- // OUT(dst, accum) {{dst}}
-
- // Initialize the accumulators
- TILE({{ACC_DATA_TYPE}}, M0, N0, {{dst}});
- {
- // All the tensor dimensions are passed at compile time.
- // In case of dynamic tensor support, the following dimensions should be passed as function argument.
- #define _IWEI_WIDTH {{WEI_WIDTH}}
- #define _IWEI_HEIGHT {{WEI_HEIGHT}}
- #define _ISRC_WIDTH {{src}}_w
- #define _ISRC_HEIGHT {{src}}_h
- #define _ISRC_CHANNELS {{src}}_c
- #define _IDST_WIDTH {{arg_dst}}_w
- #define _IDST_HEIGHT {{arg_dst}}_h
- #define _IDST_CHANNELS {{arg_dst}}_c
- #define _IY_MULTIPLIER (_IWEI_WIDTH * _IWEI_HEIGHT)
-
- // .v = access the whole vector (OpenCL vector)
- // .s[x] = access the vector element at position x (scalar access)
- TILE(int, M0, 1, xi);
- TILE(int, M0, 1, yi);
-
- // Convert the linear index to coordinate
- LOOP_UNROLLING(int, i, 0, 1, M0,
- {
- xi[i].v = ((mout + i) % _IDST_WIDTH) * {{STRIDE_X}};
- yi[i].v = ((mout + i) / _IDST_WIDTH) * {{STRIDE_Y}};
- xi[i].v -= {{PAD_LEFT}};
- yi[i].v -= {{PAD_TOP}};
- })
-
- LOOP_UNROLLING(int, i, 0, 1, M0,
- {
- {{dst}}[i].v = 0;
- })
-
- for(int i = 0; i < (_IWEI_WIDTH * _IWEI_HEIGHT); ++i)
- {
- int ck = 0;
- int xk = i % _IWEI_WIDTH;
- int yk = i / _IWEI_HEIGHT;
-
- int k = 0;
- for(; k <= (_ISRC_CHANNELS - K0); k += K0)
- {
- TILE({{SRC_DATA_TYPE}}, M0, K0, a);
- TILE({{WEI_DATA_TYPE}}, N0, K0, b);
-
- LOOP_UNROLLING(int, i, 0, 1, M0,
- {
- a[i].v = {{ZERO_VALUE}};
- })
-
- // Load tile from the src tensor
- T_LOAD_NHWC_INDIRECT({{SRC_DATA_TYPE}}, M0, K0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yk, xk, ck, _ISRC_WIDTH, _ISRC_HEIGHT, {{src}}_stride_y, xi, yi, a);
-
- // Load tile from the weights tensor
- T_LOAD({{WEI_DATA_TYPE}}, N0, K0, {{WEI_TENSOR_TYPE}}, {{weight}}, ck, cout * _IY_MULTIPLIER + i, _IY_MULTIPLIER, {{weight}}_stride_y, b);
-
- // Compute the matrix multiplication between two tiles
- T_MMUL({{SRC_DATA_TYPE}}, {{WEI_DATA_TYPE}}, {{ACC_DATA_TYPE}}, M0, N0, K0, NT, T, a, b, {{dst}});
-
- ck += K0;
- }
-
- // We voluntarily use SRC_CHANNELS rather than _DSRC_CHANNELS
- // This #if directive should be removed in case of dynamic tensor support
- )_";
-
- if(leftover_loop)
- {
- code += R"_(
- // Left-over accumulations
- for(; k < _ISRC_CHANNELS; ++k)
- {
- TILE({{SRC_DATA_TYPE}}, M0, 1, a);
- TILE({{WEI_DATA_TYPE}}, N0, 1, b);
-
- LOOP_UNROLLING(int, i, 0, 1, M0,
- {
- a[i].v = {{ZERO_VALUE}};
- })
-
- // Load tile from the src tensor
- T_LOAD_NHWC_INDIRECT({{SRC_DATA_TYPE}}, M0, 1, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yk, xk, ck, _ISRC_WIDTH, _ISRC_HEIGHT, {{src}}_stride_y, xi, yi, a);
-
- // Load tile from the weights tensor
- // The T_LOAD for the left-over elements can only use BUFFER because we load one element per iteration
- T_LOAD({{WEI_DATA_TYPE}}, N0, 1, BUFFER, {{weight}}, ck, cout * _IY_MULTIPLIER + i, _IY_MULTIPLIER, {{weight}}_stride_y, b);
-
- // Compute the matrix multiplication between two tiles
- T_MMUL({{SRC_DATA_TYPE}}, {{WEI_DATA_TYPE}}, {{ACC_DATA_TYPE}}, M0, N0, 1, NT, T, a, b, {{dst}});
-
- ++ck;
- }
- )_";
- }
-
- code += R"_(
- #undef _I_WEI_WIDTH
- #undef _I_WEI_HEIGHT
- #undef _ISRC_WIDTH
- #undef _ISRC_HEIGHT
- #undef _ISRC_CHANNELS
- #undef _IDST_WIDTH
- #undef _IDST_HEIGHT
- #undef _IDST_CHANNELS
- #undef _IY_MULTIPLIER
-
- }
- )_";
-
- if(bias_info != nullptr)
- {
- code += R"_(
- TILE({{BIA_DATA_TYPE}}, 1, N0, bias0);
-
- T_LOAD({{BIA_DATA_TYPE}}, 1, N0, BUFFER, {{bias}}, cout, 0, 1, 0, bias0);
-
- // c = c + bias[broadcasted]
- T_ELTWISE_BROADCAST_ADD_X({{ACC_DATA_TYPE}}, M0, N0, {{dst}}, bias0, {{dst}});
- )_";
- }
-
- code += R"_(
- }
-//------------------ END KERNEL {{meta_kernel_id}} ---------------------
- )_";
- return code.c_str();
-}
-
-bool export_to_cl_image_support(const ITensorInfo *tensor, GPUTarget gpu_target, DataLayout data_layout)
-{
- if(tensor->tensor_shape()[0] % 4 || (data_layout != DataLayout::NHWC))
- {
- return false;
- }
-
- // If not floating point
- if(!is_data_type_float(tensor->data_type()))
- {
- return false;
- }
-
- if(gpu_target == GPUTarget::G71 || get_arch_from_target(gpu_target) == GPUTarget::MIDGARD)
- {
- return false;
- }
-
- // Check if the cl_khr_image2d_from_buffer extension is supported on the target platform
- if(!image2d_from_buffer_supported(CLKernelLibrary::get().get_device()))
- {
- return false;
- }
-
- // Check cl image pitch alignment
- if(get_cl_image_pitch_alignment(CLKernelLibrary::get().get_device()) == 0)
- {
- return false;
- }
-
- const size_t image_w = tensor->tensor_shape()[0] / 4;
- const size_t image_h = tensor->tensor_shape()[1] * tensor->tensor_shape()[2] * tensor->tensor_shape()[3];
- const size_t max_image_w = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_WIDTH>();
- const size_t max_image_h = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_HEIGHT>();
-
- if(image_w > max_image_w || image_h > max_image_h)
- {
- return false;
- }
-
- return true;
-}
-
-CLBuildOptions ClDirectConvolutionKernelComponent::generate_build_options() const
-{
- const auto src_info = _blueprint->impl().get_kernel_argument_info(_src.arg_id);
- auto weight_info = _blueprint->impl().get_kernel_argument_info(_weight.arg_id);
- const auto dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
- // const auto tile_info = _blueprint->impl().get_tile_info();
-
- const unsigned int channel_idx = get_data_layout_dimension_index(src_info->data_layout(), DataLayoutDimension::CHANNEL);
- const DataType data_type = src_info->data_type();
- const GPUTarget gpu_target = CLScheduler::get().target();
-
- const unsigned int n0 = _blueprint->impl().get_execution_window().x().step();
- const unsigned int m0 = _blueprint->impl().get_execution_window().y().step();
- const unsigned int k0 = adjust_vec_size(is_data_type_quantized(data_type) ? 16u : 8u, src_info->dimension(channel_idx));
- const unsigned int partial_store_n0 = dst_info->dimension(0) % n0;
- const bool export_to_cl_image = export_to_cl_image_support(weight_info, gpu_target, src_info->data_layout());
-
- // Update the padding for the weights tensor if we can export to cl_image
- if(export_to_cl_image)
- {
- arm_compute::opencl::kernels::gemm::update_padding_for_cl_image(weight_info);
- }
-
- CLBuildOptions build_opts{};
- build_opts.add_option("-cl-fast-relaxed-math");
- build_opts.add_option("-DIS_TILED");
- build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
- build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
- build_opts.add_option("-DK0=" + support::cpp11::to_string(k0));
- build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0));
-
- return build_opts;
-}
-
-void ClDirectConvolutionKernelComponent::allocate_shared_vars(SharedVarTable &vtable) const
-{
- const auto src_info = _blueprint->impl().get_kernel_argument_info(_src.arg_id);
- const auto weight_info = _blueprint->impl().get_kernel_argument_info(_weight.arg_id);
-
- vtable.add(_src, _blueprint->impl().group(_src.arg_id), ClKernelArgDescriptor(_src.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "src");
-
- const GPUTarget gpu_target = CLScheduler::get().target();
- const bool export_to_cl_image = export_to_cl_image_support(weight_info, gpu_target, src_info->data_layout());
- const ClKernelTensorArgType weight_type = export_to_cl_image ? ClKernelTensorArgType::Tensor_4D_t_Image : ClKernelTensorArgType::Tensor_4D_t_Buffer;
- vtable.add(_weight, _blueprint->impl().group(_weight.arg_id), ClKernelArgDescriptor(_weight.arg_id, weight_type), "weight");
-
- if(!_bias.is_empty()) // optional bias
- {
- vtable.add(_bias, _blueprint->impl().group(_bias.arg_id), ClKernelArgDescriptor(_bias.arg_id, ClKernelTensorArgType::Vector), "bias");
- }
- vtable.add(_dst, _blueprint->impl().group(_dst.arg_id), ClKernelArgDescriptor(_dst.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "dst");
-}
-
-ClDirectConvolutionKernelComponent::TagLUT ClDirectConvolutionKernelComponent::get_tag_lut(const SharedVarTable &vtable) const
-{
- TagLUT lut{};
-
- const auto src_info = _blueprint->impl().get_kernel_argument_info(_src.arg_id);
- const auto weight_info = _blueprint->impl().get_kernel_argument_info(_weight.arg_id);
- const auto bias_info = _blueprint->impl().get_kernel_argument_info(_bias.arg_id);
-
- // Arguments and global shared variables
- lut["src"] = vtable.get(_src);
- lut["weight"] = vtable.get(_weight);
-
- if(!_bias.is_empty()) // optional bias
- {
- lut["bias"] = vtable.get(_bias);
- lut["BIA_DATA_TYPE"] = get_cl_type_from_data_type(bias_info->data_type());
- }
- lut["dst"] = vtable.get(_dst);
-
- const auto dst_argument = _blueprint->impl().get_argument_shared_vars().get_dst_var();
- lut["arg_dst"] = dst_argument.uniq_name;
-
- // Local build options
- lut["meta_kernel_id"] = id();
- lut["ACC_DATA_TYPE"] = src_info->data_type();
- lut["SRC_DATA_TYPE"] = src_info->data_type();
- lut["WEI_DATA_TYPE"] = weight_info->data_type();
-
- lut["SRC_TENSOR_TYPE"] = "BUFFER";
- switch(vtable.get(_weight).desc.tensor_arg_type)
- {
- case ClKernelTensorArgType::Image_Export_To_ClImage2D:
- case ClKernelTensorArgType::Image_3D_Export_To_ClImage2D:
- case ClKernelTensorArgType::Tensor_4D_t_Image:
- {
- lut["WEI_TENSOR_TYPE"] = "IMAGE";
- break;
- }
- default:
- {
- lut["WEI_TENSOR_TYPE"] = "BUFFER";
- break;
- }
- }
- const auto width_idx = get_data_layout_dimension_index(src_info->data_layout(), DataLayoutDimension::WIDTH);
- const auto height_idx = get_data_layout_dimension_index(src_info->data_layout(), DataLayoutDimension::HEIGHT);
- lut["WEI_WIDTH"] = weight_info->dimension(width_idx);
- lut["WEI_HEIGHT"] = weight_info->dimension(height_idx);
-
- lut["STRIDE_X"] = _desc.conv2d.stride.x();
- lut["STRIDE_Y"] = _desc.conv2d.stride.y();
-
- lut["PAD_LEFT"] = _desc.conv2d.pad.left;
- lut["PAD_TOP"] = _desc.conv2d.pad.top;
-
- lut["ZERO_VALUE"] = 0;
-
- return lut;
-}
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.h
deleted file mode 100644
index 5babdbab51..0000000000
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-
-#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLDIRECTCONVOLUTIONKERNELCOMPONENT_H
-#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLDIRECTCONVOLUTIONKERNELCOMPONENT_H
-
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h"
-
-#include "utils/TypePrinter.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-class ClDirectConvolutionKernelComponent : public IClKernelComponent
-{
-public:
- ClDirectConvolutionKernelComponent(ClKernelBlueprint *blueprint, const ClDirectConv2dKernelDescriptor &desc,
- const Link &src, const Link &weight, const Link &dst, const Link &bias = Link{})
- : IClKernelComponent(blueprint), _desc{ desc }, _src{ src }, _weight{ weight }, _bias{ bias }, _dst{ dst }
- {
- }
-
- ComponentType get_component_type() const override;
- std::set<std::string> get_headers_list() const override;
- std::string get_additional_macros() const override;
- std::string get_component_code() const override;
- Window get_window() const override;
- ClKernelArgList get_args();
- CLBuildOptions generate_build_options() const override;
-
- virtual std::vector<Link> get_links() const override
- {
- return { _src, _weight, _bias, _dst };
- }
-
- virtual TagLUT get_tag_lut(const SharedVarTable &vtable) const override;
- virtual void allocate_shared_vars(SharedVarTable &vtable) const override;
-
- virtual std::string name() const override
- {
- return "direct_convolution_" + to_string(_blueprint->impl().get_kernel_argument_info(_src.arg_id)->data_layout()) + "_" + std::to_string(id());
- }
-
-private:
- ClDirectConv2dKernelDescriptor _desc{};
- Link _src{};
- Link _weight{};
- Link _bias{};
- Link _dst{};
-};
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif // ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLDIRECTCONVOLUTIONKERNELCOMPONENT_H
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.cpp
deleted file mode 100644
index e2eba68a63..0000000000
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.cpp
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Validate.h"
-#include "src/core/helpers/AutoConfiguration.h"
-#include "src/core/helpers/WindowHelpers.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-ComponentType ClElementwiseKernelComponent::get_component_type() const
-{
- return ComponentType::Simple;
-}
-
-std::set<std::string> ClElementwiseKernelComponent::get_headers_list() const
-{
- return std::set<std::string> { "common/experimental/gemm_fused_post_ops/fp_mixed_precision_helpers.h", "tile_helpers.h" };
-}
-
-Window ClElementwiseKernelComponent::get_window() const
-{
- const ITensorInfo *lhs_info = _blueprint->impl().get_kernel_argument_info(_lhs.arg_id);
- const ITensorInfo *rhs_info = _blueprint->impl().get_kernel_argument_info(_rhs.arg_id);
- ITensorInfo *dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
-
- ARM_COMPUTE_ERROR_ON_NULLPTR(lhs_info, rhs_info, dst_info);
-
- const std::pair<TensorShape, ValidRegion> broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*lhs_info, *rhs_info);
- const TensorShape &out_shape = broadcast_pair.first;
-
- auto_init_if_empty(*dst_info, out_shape, 1, lhs_info->data_type());
-
- TensorShape output_shape = dst_info->tensor_shape();
- // Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) and upper dimensions unchanged
- // This is in line with the collapsing convention used by Conv2d
- output_shape.collapse(2U, 1U);
- const unsigned int vector_size_byte_opencl = 16;
- const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / dst_info->element_size(), dst_info->dimension(0));
- Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
-
- return win;
-}
-
-std::string ClElementwiseKernelComponent::get_component_code() const
-{
- std::string code;
- const bool is_root = _blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Argument && _blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Argument;
-
- if(is_root)
- {
- return R"_(
- //------------------ START KERNEL {{meta_kernel_id}} ELTWISE_OP ---------------------
- // IN_0(LHS) {{lhs}}
- // IN_1(RHS) {{rhs}}
- // OUT(dst, accum) {{dst}}
-
- // dst = lhs + rhs (mix-precision, broadcast, boundary aware)
- TILE({{DATA_TYPE}}, M0, N0, {{dst}});
- {
- TILE({{DATA_TYPE}}, M0, N0, lhs_tile);
- TILE({{DATA_TYPE}}, M0, N0, rhs_tile);
-
- // Since mout maps to dimensions 1 (y) and dimension 2 (z) of the input tensor because of the collapsed window, bout maps to dimension 3 (w)
- {{lhs}}_offset_first_element_in_bytes += bout * {{lhs}}_stride_w;
- {{rhs}}_offset_first_element_in_bytes += bout * {{rhs}}_stride_w;
-
- T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{lhs}}, cout, mout, 1, {{lhs}}_stride_y, lhs_tile);
- T_LOAD({{DATA_TYPE}}, {{rhs_m0}}, {{rhs_n0}}, BUFFER, {{rhs}}, {{rhs_start_x}}, {{rhs_start_y}}, 1, {{rhs}}_stride_y, rhs_tile);
-
-#if defined(IS_BROADCAST)
- T_ELTWISE_BROADCAST_{{ELTWISE_OP}}_X({{DATA_TYPE}}, M0, N0, lhs_tile, rhs_tile, {{dst}});
-#else // !defined(IS_BROADCAST)
- T_ELTWISE_{{ELTWISE_OP}}({{DATA_TYPE}}, M0, N0, lhs_tile, rhs_tile, {{dst}});
-#endif // defined(IS_BROADCAST)
-
- }
- //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_OP ---------------------
-)_";
- }
- else
- {
- return R"_(
- //------------------ START KERNEL {{meta_kernel_id}} ELTWISE_OP ---------------------
- // IN_0/Out(Accumulator) {{acc}}
- // IN_1(Addend) {{addend}}
-
- // acc = addend + acc (mix-precision, broadcast, boundary aware)
- {
- TILE({{DATA_TYPE}}, M0, N0, addend_tile);
-
- T_LOAD({{DATA_TYPE}}, {{rhs_m0}}, {{rhs_n0}}, BUFFER, {{addend}}, {{rhs_start_x}}, {{rhs_start_y}}, 1, {{addend}}_stride_y, addend_tile);
-
-#if defined(IS_BROADCAST)
- T_ELTWISE_BROADCAST_{{ELTWISE_OP}}_X({{DATA_TYPE}}, M0, N0, {{acc}}, addend_tile, {{acc}});
-#else // !defined(IS_BROADCAST)
- T_ELTWISE_{{ELTWISE_OP}}({{DATA_TYPE}}, M0, N0, {{acc}}, addend_tile, {{acc}});
-#endif // defined(IS_BROADCAST)
- }
- //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_OP ---------------------
-)_";
- }
-}
-
-CLBuildOptions ClElementwiseKernelComponent::generate_build_options() const
-{
- const auto t_rhs_info = _blueprint->impl().get_kernel_argument_info(_rhs.arg_id);
- const auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
-
- CLBuildOptions build_opts{};
- const auto n0 = _blueprint->impl().get_execution_window().x().step();
- const auto m0 = _blueprint->impl().get_execution_window().y().step();
- const unsigned int partial_store_n0 = t_dst_info->dimension(0) % n0;
- const bool is_broadcast = t_rhs_info->tensor_shape() != t_dst_info->tensor_shape();
-
- build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
- build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
- build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0));
- build_opts.add_option_if(is_broadcast, "-DIS_BROADCAST");
-
- return build_opts;
-}
-
-std::string ClElementwiseKernelComponent::generate_config_id() const
-{
- auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
- std::string config_id{};
- config_id += lower_string(string_from_data_type(t_dst_info->data_type()));
- config_id += "_";
- config_id += support::cpp11::to_string(t_dst_info->dimension(0));
- config_id += "_";
- config_id += support::cpp11::to_string(t_dst_info->dimension(1));
- config_id += "_";
- config_id += lower_string(string_from_data_layout(t_dst_info->data_layout()));
- return config_id;
-}
-
-void ClElementwiseKernelComponent::allocate_shared_vars(SharedVarTable &vtable) const
-{
- const bool is_root = _blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Argument && _blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Argument;
- vtable.add(_lhs, _blueprint->impl().group(_lhs.arg_id), ClKernelArgDescriptor(_lhs.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "lhs");
- vtable.add(_rhs, _blueprint->impl().group(_rhs.arg_id), ClKernelArgDescriptor(_rhs.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "rhs");
- if(is_root)
- {
- vtable.add(_dst, _blueprint->impl().group(_dst.arg_id), ClKernelArgDescriptor(_dst.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "dst");
- }
-}
-
-ClElementwiseKernelComponent::TagLUT ClElementwiseKernelComponent::get_tag_lut(const SharedVarTable &vtable) const
-{
- TagLUT lut{};
- const auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
- ITensorInfo *t_addend_info = nullptr;
- // Arguments and global shared variables
- const bool is_root = _blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Argument && _blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Argument;
- if(is_root)
- {
- lut["lhs"] = vtable.get(_lhs);
- lut["rhs"] = vtable.get(_rhs);
- lut["dst"] = vtable.get(_dst);
- t_addend_info = _blueprint->impl().get_kernel_argument_info(_rhs.arg_id);
- }
- else
- {
- // Determine which link is the accumulator
- Link accumulator;
- Link addend;
- if(_blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Automatic)
- {
- accumulator = _lhs;
- addend = _rhs;
- }
- else if(_blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Automatic)
- {
- accumulator = _rhs;
- addend = _lhs;
- }
- else
- {
- ARM_COMPUTE_ERROR("Invalid elementwise component linking");
- }
- lut["acc"] = vtable.get(accumulator);
- lut["addend"] = vtable.get(addend);
- t_addend_info = _blueprint->impl().get_kernel_argument_info(addend.arg_id);
- }
- // Local build options
- lut["meta_kernel_id"] = id();
- lut["DATA_TYPE"] = get_cl_type_from_data_type(t_dst_info->data_type());
-
- switch(_desc.eltwise.op)
- {
- case ArithmeticOperation::DIV:
- lut["ELTWISE_OP"] = "DIV";
- break;
- case ArithmeticOperation::ADD:
- lut["ELTWISE_OP"] = "ADD";
- break;
- default:
- ARM_COMPUTE_ERROR("Arithmetic Operation not supported");
- }
-
- // Set broadcast parameters
- // PRE: All tensors are broadcast-compatible
- const bool is_broadcast = t_addend_info->tensor_shape() != t_dst_info->tensor_shape();
- if(is_broadcast)
- {
- // Note that n0 maps to input tensor dimension 0, m0 maps to input dimensions 1 and 2 because of our collapse strategy
- if(t_addend_info->dimension(0) == 1U && t_addend_info->dimension(1) == 1U && t_addend_info->dimension(2) == 1U) // Broadcast in X, Y, Z: collapsed rhs win [M0xN0] = [1x1]
- {
- lut["rhs_m0"] = "1";
- lut["rhs_n0"] = "1";
- lut["rhs_start_y"] = "0";
- lut["rhs_start_x"] = "0";
- }
- else if(t_addend_info->dimension(1) == 1U && t_addend_info->dimension(2) == 1U) // Broadcast in Y and Z: collapsed rhs win [M0xN0] = [1xN]
- {
- lut["rhs_m0"] = "1";
- lut["rhs_n0"] = "N0";
- lut["rhs_start_y"] = "0";
- lut["rhs_start_x"] = "cout";
- }
- else
- {
- ARM_COMPUTE_ERROR("Only support rhs broadcasting in all X, Y, Z dimensions, or just in Y and Z dimensions");
- }
- }
- else
- {
- lut["rhs_m0"] = "M0";
- lut["rhs_n0"] = "N0";
- lut["rhs_start_y"] = "mout";
- lut["rhs_start_x"] = "cout";
- }
- return lut;
-}
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h
deleted file mode 100644
index f8377457d3..0000000000
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-
-#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLELEMENTWISEADDKERNELCOMPONENT_H
-#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLELEMENTWISEADDKERNELCOMPONENT_H
-
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-class ClElementwiseKernelComponent : public IClKernelComponent
-{
-public:
- /** Construct a new Cl Elementwise Kernel Component object
- *
- * @param[in] blueprint Blueprint to which this component is added
- * @param[in] desc Component descriptor
- * @param[in] lhs Link to LHS tensor
- * @param[in] rhs Link to RHS tensor
- * @param[out] dst Link to DST tensor
- *
- * Support Level
- * Data Type: F16, F32
- * Tensor Shape: Any shape of arbitrary dimension >= 1 and <= 4
- * Value Range: All
- * Broadcasting: Only RHS tensor can be broadcasted into LHS. Only support broadcasting in dimension 1 and dimension 2 or all dimension 0, 1 and 2
- */
- ClElementwiseKernelComponent(ClKernelBlueprint *blueprint, const ClElementwiseKernelDescriptor &desc, const Link &lhs, const Link &rhs, const Link &dst)
- : IClKernelComponent(blueprint), _desc{ desc }, _lhs{ lhs }, _rhs{ rhs }, _dst{ dst }
- {
- }
-
- ComponentType get_component_type() const override;
- std::set<std::string> get_headers_list() const override;
- std::string get_component_code() const override;
- Window get_window() const override;
- CLBuildOptions generate_build_options() const override;
- std::string generate_config_id() const override;
-
- virtual std::vector<Link> get_links() const override
- {
- return { _lhs, _rhs, _dst };
- }
-
- virtual TagLUT get_tag_lut(const SharedVarTable &vtable) const override;
- virtual void allocate_shared_vars(SharedVarTable &vtable) const override;
-
- virtual std::string name() const override
- {
- return "eltwise_add_" + std::to_string(id());
- }
-
-private:
- ClElementwiseKernelDescriptor _desc{};
- Link _lhs{};
- Link _rhs{};
- Link _dst{};
-};
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif // ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLELEMENTWISEADDKERNELCOMPONENT_H
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.cpp
deleted file mode 100644
index 0a20a8f600..0000000000
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.cpp
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Validate.h"
-#include "src/core/helpers/AutoConfiguration.h"
-#include "src/core/helpers/WindowHelpers.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-ComponentType ClFloorKernelComponent::get_component_type() const
-{
- return ComponentType::Simple;
-}
-std::set<std::string> ClFloorKernelComponent::get_headers_list() const
-{
- return std::set<std::string> { "common/experimental/gemm_fused_post_ops/fp_mixed_precision_helpers.h", "tile_helpers.h" };
-}
-Window ClFloorKernelComponent::get_window() const
-{
- const ITensorInfo *src_info = _blueprint->impl().get_kernel_argument_info(_src.arg_id);
- ITensorInfo *dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
-
- ARM_COMPUTE_ERROR_ON_NULLPTR(src_info, dst_info);
- auto_init_if_empty(*dst_info, src_info->tensor_shape(), 1, src_info->data_type());
-
- TensorShape output_shape = dst_info->tensor_shape();
- // Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) and upper dimensions unchanged
- // This is in line with the collapsing convention used by Conv2d
- output_shape.collapse(2U, 1U);
- const unsigned int vector_size_byte_opencl = 16;
- const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / dst_info->element_size(), dst_info->dimension(0));
- Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
-
- return win;
-}
-std::string ClFloorKernelComponent::get_component_code() const
-{
- bool is_root = _blueprint->impl().group(_src.arg_id) == SharedVarGroup::Argument;
- if(is_root)
- {
- return R"_(
- //------------------ START KERNEL {{meta_kernel_id}} FLOOR ---------------------
- // IN_0(src) {{src}}
- // OUT(dst, accum) {{dst}}
- TILE({{DATA_TYPE}}, M0, N0, {{dst}});
- {
- TILE({{DATA_TYPE}}, M0, N0, src_tile);
-
- // Since mout maps to dimensions 1 (y) and dimension 2 (z) of the input tensor because of the collapsed window, bout maps to dimension 3 (w)
- {{src}}_offset_first_element_in_bytes += bout * {{src}}_stride_w;
- T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{src}}, cout, mout, 1, {{src}}_stride_y, src_tile);
-
- T_FLOOR({{DATA_TYPE}}, M0, N0, src_tile, {{dst}});
- }
- //------------------ END KERNEL {{meta_kernel_id}} FLOOR ---------------------
-)_";
- }
- else
- {
- return R"_(
- //------------------ START KERNEL {{meta_kernel_id}} FLOOR ---------------------
- // IN_0/Out(Accumulator) {{acc}}
- // output = floor(input)
- {
- T_FLOOR({{DATA_TYPE}}, M0, N0, {{acc}}, {{acc}});
- }
- //------------------ END KERNEL {{meta_kernel_id}} FLOOR ---------------------
-)_";
- }
-}
-CLBuildOptions ClFloorKernelComponent::generate_build_options() const
-{
- CLBuildOptions build_opts{};
- const auto n0 = _blueprint->impl().get_execution_window().x().step();
- const auto m0 = _blueprint->impl().get_execution_window().y().step();
- const auto dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
- const unsigned int partial_store_n0 = dst_info->dimension(0) % n0;
- build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
- build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
- build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0));
- return build_opts;
-}
-std::string ClFloorKernelComponent::generate_config_id() const
-{
- auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
- std::string config_id{};
- config_id += lower_string(string_from_data_type(t_dst_info->data_type()));
- config_id += "_";
- config_id += support::cpp11::to_string(t_dst_info->dimension(0));
- config_id += "_";
- config_id += support::cpp11::to_string(t_dst_info->dimension(1));
- config_id += "_";
- config_id += lower_string(string_from_data_layout(t_dst_info->data_layout()));
- return config_id;
-}
-void ClFloorKernelComponent::allocate_shared_vars(SharedVarTable &vtable) const
-{
- vtable.add(_src, _blueprint->impl().group(_src.arg_id), ClKernelArgDescriptor(_src.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "src");
- vtable.add(_dst, _blueprint->impl().group(_dst.arg_id), ClKernelArgDescriptor(_dst.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "dst");
-}
-ClFloorKernelComponent::TagLUT ClFloorKernelComponent::get_tag_lut(const SharedVarTable &vtable) const
-{
- TagLUT lut{};
- const auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
- // Arguments and global shared variables
- const bool is_root = _blueprint->impl().group(_src.arg_id) == SharedVarGroup::Argument;
-
- if(is_root)
- {
- lut["src"] = vtable.get(_src);
- lut["dst"] = vtable.get(_dst);
- }
- else
- {
- lut["acc"] = vtable.get(_src);
- }
-
- lut["meta_kernel_id"] = id();
- lut["DATA_TYPE"] = get_cl_type_from_data_type(t_dst_info->data_type());
- return lut;
-}
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h
deleted file mode 100644
index e791b36382..0000000000
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-
-#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLFLOORKERNELCOMPONENT_H
-#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLFLOORKERNELCOMPONENT_H
-
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-class ClFloorKernelComponent : public IClKernelComponent
-{
-public:
- /** Construct a new Cl Floor Kernel Component object
- *
- * @param blueprint Blueprint to which this component is added
- * @param src Link to SRC tensor
- * @param dst Link to DST tensor
- *
- * Support Level
- * Data Type: F16, F32
- * Tensor Shape: Any shape of arbitrary dimension >= 1 and <= 4
- * Value Range: All
- */
- ClFloorKernelComponent(ClKernelBlueprint *blueprint, const Link &src, const Link &dst)
- : IClKernelComponent(blueprint), _src{ src }, _dst{ dst }
- {
- }
-
- ComponentType get_component_type() const override;
- std::set<std::string> get_headers_list() const override;
- std::string get_component_code() const override;
- Window get_window() const override;
- CLBuildOptions generate_build_options() const override;
- std::string generate_config_id() const override;
-
- virtual std::vector<Link> get_links() const override
- {
- return { _src, _dst };
- }
-
- virtual TagLUT get_tag_lut(const SharedVarTable &vtable) const override;
- virtual void allocate_shared_vars(SharedVarTable &vtable) const override;
-
- virtual std::string name() const override
- {
- return "floor_" + std::to_string(id());
- }
-
-private:
- Link _src{};
- Link _dst{};
-};
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif // ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLFLOORKERNELCOMPONENT_H
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClKernelComponents.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClKernelComponents.h
deleted file mode 100644
index 3f99dd5553..0000000000
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClKernelComponents.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-
-#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_CLKERNELCOMPONENTS_H
-#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_CLKERNELCOMPONENTS_H
-
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.h"
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h"
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h"
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.h"
-
-#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_CLKERNELCOMPONENTS_H
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.cpp
deleted file mode 100644
index 7c805d5368..0000000000
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.cpp
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-ComponentType ClStoreBlockBoundaryAwareKernelComponent::get_component_type() const
-{
- return ComponentType::Store;
-}
-
-std::string ClStoreBlockBoundaryAwareKernelComponent::get_component_code() const
-{
- return R"_(
- //------------------ START KERNEL {{meta_kernel_id}} STORE ---------------------
-
- __global uchar *dst_addr = {{dst}}_ptr + {{dst}}_offset_first_element_in_bytes + (g_x * (uint)N0 * sizeof(DATA_TYPE)) + (COMPUTE_M0_START_ROW(g_y, M0, PARTIAL_STORE_M0) * {{dst}}_stride_y);
-
-#if defined(REINTERPRET_OUTPUT_AS_3D)
- // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
- // multiply dst_stride_z by DEPTH_GEMM3D
- dst_addr += g_z * {{dst}}_stride_z * DEPTH_GEMM3D;
-
-#else // defined(REINTERPRET_OUTPUT_AS_3D)
-
- // Add offset for batched GEMM
- dst_addr += g_z * {{dst}}_stride_z;
-
-#endif // defined(REINTERPRET_OUTPUT_AS_3D)
-
- STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, {{src}}, dst_addr, {{dst}}_stride_y, g_zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, g_cond_y, g_cond_x);
-
- //------------------ END KERNEL {{meta_kernel_id}} STORE ---------------------
-
-)_";
-}
-
-CLBuildOptions ClStoreBlockBoundaryAwareKernelComponent::generate_build_options() const
-{
- auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
- // auto tile_info = _blueprint->impl().get_tile_info();
-
- CLBuildOptions build_opts{};
-
- const auto n0 = _blueprint->impl().get_execution_window().x().step();
- const auto m0 = _blueprint->impl().get_execution_window().y().step();
- const auto partial_m0 = t_dst_info->dimension(0) % m0;
- const auto partial_n0 = t_dst_info->dimension(1) % n0;
-
- build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(t_dst_info->data_type()));
- build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
- build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
- build_opts.add_option("-DPARTIAL_STORE_M0=" + support::cpp11::to_string(partial_m0));
- build_opts.add_option("-DPARTIAL_STORE_N0=" + support::cpp11::to_string(partial_n0));
-
- return build_opts;
-}
-
-void ClStoreBlockBoundaryAwareKernelComponent::allocate_shared_vars(SharedVarTable &vtable) const
-{
- vtable.add(_src, _blueprint->impl().group(_src.arg_id), ClKernelArgDescriptor(_src.arg_id, ClKernelTensorArgType::Image_3D), "src");
- vtable.add(_dst, _blueprint->impl().group(_dst.arg_id), ClKernelArgDescriptor(_dst.arg_id, ClKernelTensorArgType::Image_3D), "dst");
-}
-
-ClStoreBlockBoundaryAwareKernelComponent::TagLUT ClStoreBlockBoundaryAwareKernelComponent::get_tag_lut(const SharedVarTable &vtable) const
-{
- return {
- { "meta_kernel_id", id() },
- { "src", vtable.get(_src) },
- { "dst", vtable.get(_dst) },
- };
-}
-
-ComponentType ClStoreIndirectWidthSelectKernelComponent::get_component_type() const
-{
- return ComponentType::Store;
-}
-
-std::string ClStoreIndirectWidthSelectKernelComponent::get_component_code() const
-{
- return R"_(
- //------------------ START KERNEL {{meta_kernel_id}} STORE ---------------------
- {
- // This also follows NHWC layout
- // cout maps to global_id(0) maps to Channel
- // mout maps to global_id(1) maps to Height and Weight (Collapsed Window)
- // bout maps to global_id(3) maps to N / Batch
- #define _IDST_WIDTH {{dst}}_w
- #define _IDST_HEIGHT {{dst}}_h
- TILE(uint, M0, 1, dst_indirect_y);
-
- // Calculate the destination indirect Y
- LOOP_UNROLLING(int, i, 0, 1, M0,
- {
- dst_indirect_y[i].v = (uint)min(mout + i, (int)(_IDST_WIDTH * _IDST_HEIGHT) - 1);
- dst_indirect_y[i].v += bout * (int)(_IDST_WIDTH * _IDST_HEIGHT);
- })
-
- bool x_cond = PARTIAL_N0 != 0 && get_global_id(0) == 0;
-
- T_STORE_INDIRECT_WIDTH_SELECT({{DST_DATA_TYPE}}, M0, N0, PARTIAL_N0, {{DST_TENSOR_TYPE}}, {{dst}}, cout, {{dst}}_stride_y, x_cond, {{src}}, dst_indirect_y);
-
- #undef _IDST_WIDTH
- #undef _IDST_HEIGHT
- //------------------ END KERNEL {{meta_kernel_id}} STORE ---------------------
- }
-
-)_";
-}
-
-CLBuildOptions ClStoreIndirectWidthSelectKernelComponent::generate_build_options() const
-{
- CLBuildOptions build_opts{};
-
- return build_opts;
-}
-
-void ClStoreIndirectWidthSelectKernelComponent::allocate_shared_vars(SharedVarTable &vtable) const
-{
- vtable.add(_src, _blueprint->impl().group(_src.arg_id), ClKernelArgDescriptor(_src.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "src");
- vtable.add(_dst, _blueprint->impl().group(_dst.arg_id), ClKernelArgDescriptor(_dst.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "dst");
-}
-
-ClStoreIndirectWidthSelectKernelComponent::TagLUT ClStoreIndirectWidthSelectKernelComponent::get_tag_lut(const SharedVarTable &vtable) const
-{
- TagLUT lut{};
-
- // Arguments and global shared variables
- lut["src"] = vtable.get(_src);
- lut["dst"] = vtable.get(_dst);
-
- // Local build options
- lut["meta_kernel_id"] = id();
- lut["DST_TENSOR_TYPE"] = "BUFFER";
- const auto dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
- lut["DST_DATA_TYPE"] = dst_info->data_type();
-
- return lut;
-}
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.h
deleted file mode 100644
index e0b188dc8d..0000000000
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-
-#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLSTOREKERNELCOMPONENTS_H
-#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLSTOREKERNELCOMPONENTS_H
-
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-class ClStoreBlockBoundaryAwareKernelComponent : public IClKernelComponent
-{
-public:
- ClStoreBlockBoundaryAwareKernelComponent(ClKernelBlueprint *blueprint, const Link &src, const Link &dst)
- : IClKernelComponent(blueprint), _src{ src }, _dst{ dst }
- {
- }
- ComponentType get_component_type() const override;
- std::string get_component_code() const override;
- CLBuildOptions generate_build_options() const override;
- TagLUT get_tag_lut(const SharedVarTable &vtable) const override;
- void allocate_shared_vars(SharedVarTable &vtable) const override;
-
- virtual std::vector<Link> get_links() const override
- {
- return { _src, _dst };
- }
-
- virtual std::string name() const override
- {
- return "";
- }
-
-private:
- Link _src{};
- Link _dst{};
-};
-
-class ClStoreIndirectWidthSelectKernelComponent : public IClKernelComponent
-{
-public:
- ClStoreIndirectWidthSelectKernelComponent(ClKernelBlueprint *blueprint, const Link &src, const Link &dst)
- : IClKernelComponent(blueprint), _src{ src }, _dst{ dst }
- {
- }
- ComponentType get_component_type() const override;
- std::string get_component_code() const override;
- CLBuildOptions generate_build_options() const override;
- virtual TagLUT get_tag_lut(const SharedVarTable &vtable) const override;
- void allocate_shared_vars(SharedVarTable &vtable) const override;
-
- virtual std::vector<Link> get_links() const override
- {
- return { _src, _dst };
- }
-
- virtual std::string name() const override
- {
- return "";
- }
-
-private:
- Link _src{};
- Link _dst{};
-};
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif // ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLSTOREKERNELCOMPONENTS_H
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file