From 31df05a1870662a7288fbaeb6fbc7fc458bb5a73 Mon Sep 17 00:00:00 2001 From: SiCong Li Date: Wed, 9 Nov 2022 15:57:48 +0000 Subject: Remove dynamic fusion prototype with tests and examples Public headers of the new experimental dynamic fusion can be found in arm_compute/dynamic_fusion/ New examples on how to use the interface can be found in tests/validation/dynamic_fusion/gpu/Integration.cpp Resolves COMPMID-5683 Change-Id: I7ccb902a227fb487562df15fc3c30118d1d95bbd Signed-off-by: SiCong Li Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8671 Reviewed-by: Jakub Sujak Reviewed-by: Gunes Bayir Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins Tested-by: Arm Jenkins --- .../dynamic_fusion/ClKernelBuildingImpl/Common.h | 930 --------------------- .../dynamic_fusion/ClKernelBuildingImpl/Utils.h | 76 -- .../ClDirectConvolutionKernelComponent.cpp | 409 --------- .../ClDirectConvolutionKernelComponent.h | 81 -- .../components/ClElementwiseKernelComponent.cpp | 266 ------ .../components/ClElementwiseKernelComponent.h | 90 -- .../components/ClFloorKernelComponent.cpp | 153 ---- .../components/ClFloorKernelComponent.h | 85 -- .../components/ClKernelComponents.h | 35 - .../components/ClStoreKernelComponents.cpp | 171 ---- .../components/ClStoreKernelComponents.h | 97 --- 11 files changed, 2393 deletions(-) delete mode 100644 src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h delete mode 100644 src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Utils.h delete mode 100644 src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp delete mode 100644 src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.h delete mode 100644 src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.cpp delete mode 100644 src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h delete mode 100644 src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.cpp delete mode 100644 src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h delete mode 100644 src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClKernelComponents.h delete mode 100644 src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.cpp delete mode 100644 src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.h (limited to 'src/core/experimental/dynamic_fusion/ClKernelBuildingImpl') diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h deleted file mode 100644 index 04919acb83..0000000000 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h +++ /dev/null @@ -1,930 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION - -#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMMON_H -#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMMON_H - -#include "arm_compute/core/CL/CLCompileContext.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/GPUTarget.h" -#include "src/core/common/Macros.h" -#include "support/Requires.h" -#include "support/StringSupport.h" - -#include "src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h" - -#include -#include -#include -#include -#include - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -/** We introduce the concept of *Shared Variables* in the context of kernel building. - * They are variables that can be accessed / shared among all the kernel components within a single kernel. - * For now we consider 2 groups of shared variables: - * Argument: The argument variables (parameters) of a kernel - * Automatic: The automatic variables declared inside a kernel - * All Shared Variables have the same kernel scope, and are thus visible to all kernel components -*/ - -enum class SharedVarIO -{ - Input, - Output -}; - -enum class SharedVarGroup -{ - Argument, // Parameters to a kernel function == dst or src tensors of the whole blueprint graph - Automatic // Automatic variables declared within the kernel body == intermediate tensors of the whole blueprint graph -}; - -/** Specifies a shared variable link for a component. - * It describes all the information that's available when a component is constructed / added: - * e.g. its linkage (via ArgumentID and io) and its group - * This is not shared variable on its own, but is used for instantiating a SharedVar when building the code - */ -struct SharedVarLink -{ - ArgumentID arg_id{ g_arg_placeholder }; - SharedVarIO io{ SharedVarIO::Input }; - bool is_empty() const - { - return arg_id == g_arg_placeholder; - } -}; - -/** A table of all the variables used in the kernel / blueprint - * Because we limit the DependencyGraph in the blueprint to a Linear Sequence for now, we only allow ** a single global variable (the accumulator) ** - * - * NOTE: the order they appear in the table is the order of their "declaration" in the component code, and is also their ID - * NOTE: the variables all have the scope of the full kernel function - */ -class SharedVarTable -{ -public: - /** A fully realized SharedVarLink - */ - struct SharedVar - { - ArgumentID arg_id{ g_arg_placeholder }; - SharedVarIO io{ SharedVarIO::Input }; - SharedVarGroup group{ SharedVarGroup::Argument }; - std::string uniq_name{}; // Unique name, also the final variable name used in the built code - ClKernelArgDescriptor desc{}; // Automatic variables can and should still be described using this struct - bool is_empty() const - { - return arg_id == g_arg_placeholder; - } - }; - - class Arguments - { - public: - Arguments() = default; - void add_var(const SharedVar &var) - { - ARM_COMPUTE_ERROR_ON(var.group != SharedVarGroup::Argument); - _vars.push_back(var); - } - std::vector get_all_vars() const - { - return _vars; - } - std::vector get_src_vars() const - { - std::vector src_vars; - std::copy_if(_vars.begin(), _vars.end(), std::back_inserter(src_vars), [](const SharedVar & var) - { - return var.io == SharedVarIO::Input; - }); - return src_vars; - } - SharedVar get_dst_var() const - { - std::vector dst_vars; - std::copy_if(_vars.begin(), _vars.end(), std::back_inserter(dst_vars), [](const SharedVar & var) - { - return var.io == SharedVarIO::Output; - }); - ARM_COMPUTE_ERROR_ON(dst_vars.size() != 1); - return dst_vars.at(0); - } - - private: - std::vector _vars{}; - }; - - /** Create a SharedVar for a corresponding SharedVarLink (contains ArgumentID). If one has already been created for the SharedVarLink, simply return it instead of creating a new one - * - * @note: The order of insertion is important. There is one precondition: - * PRECOND: The components have been sorted topologically / is being traversed in topological order - * This ensures that all the consumer var links (Output, Automatic Links) can consume (return) the producer var links when they're referred - */ - void add(SharedVarLink var_link, SharedVarGroup group, ClKernelArgDescriptor runtime_desc, const std::string &name = "unnamed") - { - ARM_COMPUTE_ERROR_ON_MSG(var_link.is_empty(), "Non-empty SharedVarLink expected"); - if(!get(var_link).is_empty()) - { - return; - } - - auto var_id = _num_var; - std::stringstream ss; - ss << name << "_" << var_id; - const auto uniq_name = ss.str(); - SharedVar var{ var_link.arg_id, var_link.io, group, uniq_name, runtime_desc }; - - if(group == SharedVarGroup::Argument) - { - _arguments.emplace(var_id, var); - _arg_id_map.emplace(var_link.arg_id, var_id); - _num_var++; - } - else if(group == SharedVarGroup::Automatic) - { - if(_global_vars.empty()) - { - if(var_link.io == SharedVarIO::Output) - { - _global_vars.emplace(var_id, var); - _arg_id_map.emplace(var_link.arg_id, var_id); - _num_var++; - } - else - { - ARM_COMPUTE_ERROR("Component likely not traversed in topological order"); - } - } - else - { - // Associate additional SharedVarLinks with the single global shared variable - const auto global_var_id = _global_vars.begin()->first; - _arg_id_map[var_link.arg_id] = global_var_id; - } - } - else - { - ARM_COMPUTE_ERROR("Unrecognised SharedVarGroup"); - } - } - - /** Get the SharedVar associated with @p var_link - * - * @param var_link - * @return SharedVar - */ - SharedVar get(const SharedVarLink &var_link) const - { - const SharedVar empty_var{}; - if(_arg_id_map.find(var_link.arg_id) != _arg_id_map.end()) - { - const auto var_id = _arg_id_map.at(var_link.arg_id); - const auto arg_var = _arguments.find(var_id); - if(arg_var != _arguments.end()) - { - return arg_var->second; - } - else - { - return _global_vars.at(var_id); - } - } - return empty_var; - } - - /** @note The arguments are returned in the order they are added - */ - Arguments get_kernel_arguments() const - { - Arguments args{}; - for(const auto &a : _arguments) - { - args.add_var(a.second); - } - return args; - } - -private: - using VarID = int32_t; - -private: - std::map _global_vars{}; // Shared, global variable - std::map _arguments{}; - std::map _arg_id_map{}; // Track ArgumentIDs that have already been added - VarID _num_var{ 0 }; -}; - -enum class ComponentType -{ - Simple, - Complex, - Store -}; - -using ComponentID = DependencyGraph::Id; -using ComponentList = std::vector; -class IClKernelComponent -{ -public: - using Link = SharedVarLink; - using Tag = std::string; - struct TagVal - { - TagVal() = default; - TagVal(const SharedVarTable::SharedVar &var) - : value{ var.uniq_name } - { - } - - template ::value)> - TagVal(T val) - : value{ support::cpp11::to_string(val) } - { - } - - TagVal(const std::string &val) - : value{ val } - { - } - - TagVal(const char *val) - : value{ std::string(val) } - { - } - - TagVal(const DataType &data_type) - : value{ get_cl_type_from_data_type(data_type) } - { - } - - std::string value{}; - }; - using TagLUT = std::unordered_map; // Used to instantiating a code template / replacing tags -public: - IClKernelComponent(ClKernelBlueprint *blueprint) - : _blueprint(blueprint) - { - } - - ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(IClKernelComponent); - - virtual ~IClKernelComponent() = default; - virtual ComponentType get_component_type() const = 0; - virtual std::vector get_links() const = 0; - virtual std::string name() const = 0; - - // @note: some tags can be unused since they could be used only for the macros, or only for the component code - static std::string replace_tags(const std::string &code_template, const TagLUT &tags) - { - std::string replaced_code = ""; - bool scanning_pattern = false; - std::string pattern_found = ""; - for(size_t i = 0; i < code_template.size() - 1; ++i) - { - if(!scanning_pattern) - { - if(code_template[i] == '{' && code_template[i + 1] == '{') - { - i += 1; - scanning_pattern = true; - pattern_found = ""; - } - else - { - replaced_code += code_template[i]; - } - } - else - { - if(code_template[i] == '}' && code_template[i + 1] == '}') - { - i += 1; - scanning_pattern = false; - std::string err = "Pattern " + pattern_found + " not found in tags"; - ARM_COMPUTE_ERROR_ON_MSG(tags.find(pattern_found) == tags.end(), err.c_str()); - replaced_code += tags.find(pattern_found)->second.value; - } - else - { - pattern_found += code_template[i]; - } - } - } - - return replaced_code; - } - ComponentID id() const - { - return _id; - } - void set_id(ComponentID id) - { - _id = id; - } - - virtual std::set get_headers_list() const - { - return std::set {}; - } - - virtual std::string get_additional_macros() const - { - return ""; - } - - virtual std::string get_component_code() const - { - return ""; - } - - virtual Window get_window() const - { - return Window{}; - } - - /** Get the tag look-up table used to instantiate the component code. - * - * @param vtable - * @return TagLUT - */ - virtual TagLUT get_tag_lut(const SharedVarTable &vtable) const = 0; - - /** Allocate all shared variables used by the component in the @p vtable - * - * @param vtable - */ - virtual void allocate_shared_vars(SharedVarTable &vtable) const = 0; - - virtual std::string get_dst_addr_calculation() const - { - return ""; - } - - /** Generate config id of the component - * - * @return std::string - */ - virtual std::string generate_config_id() const - { - return ""; - } - - virtual CLBuildOptions generate_build_options() const - { - return CLBuildOptions{}; - } - -protected: - ClKernelBlueprint *_blueprint; - -private: - ComponentID _id{}; -}; - -using ComponentUniquePtr = std::unique_ptr; - -/** Intermediate representation of the final, complete kernel source. - */ -struct ClKernelBlueprint::Implementation -{ -public: - Implementation() = default; - ~Implementation() = default; - -public: - Status update_merge_point(ArgumentID t_id, ArgumentID merge_point) - { - return _graph.update_merge_point(t_id, merge_point); - } - - ArgumentID add_kernel_tensor(ITensorInfo *tensor_info, ArgumentID merge_point = DependencyGraph::empty_id()) - { - const auto id = _graph.add_tensor(merge_point); - if(_kernel_tensors.find(id) == _kernel_tensors.end()) - { - _kernel_tensors.insert(std::make_pair(id, tensor_info)); - } - return id; - } - - void set_tile_info(const TileDescriptor &tile_info) - { - _tile_info = tile_info; - } - - SharedVarGroup group(ArgumentID arg_id) const - { - if(arg_id == g_arg_placeholder) - { - // In case of placeholder, don't care what we return; - return SharedVarGroup::Argument; - } - return _shared_var_group_lut.at(arg_id); - } - - void validate_arg_ids(std::initializer_list args) const - { - for(const auto arg_id : args) - { - ARM_COMPUTE_UNUSED(arg_id); - ARM_COMPUTE_ERROR_ON_MSG(_kernel_tensors.find(arg_id) == _kernel_tensors.end() && arg_id != g_arg_placeholder, - "Trying to use an argument that hasn't been added to the blueprint"); - } - } - - void add_component(ComponentUniquePtr component) - { - if(component->get_component_type() == ComponentType::Complex) - { - ++_num_complex_components; - ARM_COMPUTE_ERROR_ON_MSG(_num_complex_components > 1, "Only one complex component per blueprint is supported."); - } - - // Get an unique ID for the component that's being added - std::vector src_tensors; - std::vector dst_tensors; - for(const auto &link : component->get_links()) - { - if(link.is_empty()) - { - continue; - } - if(link.io == SharedVarIO::Input) - { - src_tensors.push_back(link.arg_id); - } - else - { - dst_tensors.push_back(link.arg_id); - } - } - const ComponentID component_id = _graph.add_operator(src_tensors, dst_tensors).second; - component->set_id(component_id); - - // Add this component to the component graph. Don't connect it to anything yet - _component_graph.emplace(component_id, ComponentList{}); - - // For every { arg_id, arg_io } passed along with this component... - for(const auto &link : component->get_links()) - { - const ArgumentID &arg_id = link.arg_id; - const SharedVarIO &arg_io = link.io; - - // Add the arg_id to the map describing the input/output relationship between an argument and the components that use it, if it doesn't yet exist there - if(_outgoing_components.find(arg_id) == _outgoing_components.end()) - { - _outgoing_components.emplace(arg_id, ComponentList{}); - _incoming_components.emplace(arg_id, ComponentList{}); - } - - // If it's an input argument, connect any other component that has it as output with this component - // Additionally, set this component as one that treats this argument as "Input" (append to index 0) - // This is used so that we keep track of whether two components use the same argument, one as input and one as output - if(arg_io == SharedVarIO::Input) - { - for(const auto &prev_component : _incoming_components[arg_id]) - { - _component_graph[prev_component].push_back(component_id); - } - - _outgoing_components[arg_id].push_back(component_id); - } - // If it's an output argument, connect this component with any other component that has it as input - // Additionally, set this component as one that treats this argument as "Output" (append to index 1) - else - { - if(component->get_component_type() == ComponentType::Store) - { - ARM_COMPUTE_ERROR_ON_MSG(_dst_id >= 0, "Trying to add more than one dst argument to the graph"); - _dst_id = arg_id; - } - - for(const auto &subseq_component : _outgoing_components[arg_id]) - { - _component_graph[component_id].push_back(subseq_component); - } - - _incoming_components[arg_id].push_back(component_id); - } - } - - ARM_COMPUTE_ERROR_ON_MSG(_graph.get_root_ops().size() != 1, "Trying to add more than one root to the graph"); - - // Finally, add this component to the dictionary of components - _components.insert(std::make_pair(component_id, std::move(component))); - } - - std::string build_kernel_name() const - { - std::string name = ""; - - traverse([&](std::stack stack) - { - name += _components.find(stack.top())->second->name() + (stack.size() > 2 ? "___" : ""); - }); - - return name; - } - - std::string build_code() - { - ARM_COMPUTE_ERROR_ON_MSG(_graph_root == -1, "No root found in the component graph"); - - // These data structures will hold the data from all the components in the blueprint - std::set headers_list{}; - std::set additional_macros{}; - std::vector component_codes{}; // vector because order matters - - // Step 1: Allocate all kernel argument shared variables before generating the component code - auto stack = topological_sort(); - while(!stack.empty()) - { - auto curr_component_id = stack.top(); - auto &curr_component = _components.find(curr_component_id)->second; - - curr_component->allocate_shared_vars(_vtable); - - stack.pop(); - } - // Step 2: Generate component codes - stack = topological_sort(); - while(!stack.empty()) - { - auto curr_component_id = stack.top(); - auto &curr_component = _components.find(curr_component_id)->second; - - auto curr_headers_list = curr_component->get_headers_list(); - auto curr_additional_macros = curr_component->get_additional_macros(); - auto curr_component_code = curr_component->get_component_code(); - const auto var_lut = curr_component->get_tag_lut(_vtable); // Ideally can be merged with get_component_code once we have finer-grained code generation technique - component_codes.push_back(IClKernelComponent::replace_tags(curr_component_code, var_lut)); - - headers_list.insert(curr_headers_list.begin(), curr_headers_list.end()); - if(!curr_additional_macros.empty()) // Some components might not have any - { - additional_macros.insert(IClKernelComponent::replace_tags(curr_additional_macros, var_lut)); - } - - stack.pop(); - } - - // Step 3: Assemble the data gathered by traversing the graph into the string "code" - std::string code = ""; - - for(auto &header : headers_list) - { -#if defined(EMBEDDED_KERNELS) - code += CLKernelLibrary::get().get_program(header).first; -#else // defined(EMBEDDED_KERNELS) - code += "#include \"" + header + "\"\n"; -#endif // defined(EMBEDDED_KERNELS) - } - - for(auto ¯os : additional_macros) - { - code += macros; - } - - code += generate_kernel_signature(_vtable.get_kernel_arguments()); - - code += "\n{\n\n"; - - code += " //------------------ START KERNEL_BUILDER_COORDINATE ---------------------\n\n"; - code += generate_global_section(); - code += " //------------------ END KERNEL_BUILDER_COORDINATE ---------------------\n"; - - for(auto &component_code : component_codes) - { - code += component_code; - } - - code += "}\n"; - - return code; - } - - /** Generate config id of the entire kernel - * - * Format: kernel_name--comp0_config_id--comp1_config_id--... - * - * @return std::string - */ - std::string build_config_id() const - { - std::string config_id = build_kernel_name(); - traverse([&](std::stack stack) - { - config_id += "--" + _components.find(stack.top())->second->generate_config_id() + "--"; - }); - - return config_id; - } - - CLBuildOptions build_options() const - { - CLBuildOptions build_opts{}; - - traverse([&](std::stack stack) - { - build_opts.add_options(_components.find(stack.top())->second->generate_build_options().options()); - }); - - return build_opts; - } - - TileDescriptor get_tile_info() const - { - return _tile_info; - } - - // Get the global execution window, i.e. that of the root component - Window get_execution_window() const - { - ARM_COMPUTE_ERROR_ON_MSG(_graph_root == -1, "No root found in the component graph"); - ARM_COMPUTE_ERROR_ON_MSG(_dst_id == -1, "Destination Tensor Id should be ready before calling get_execution_window()"); - - return _components.find(_graph_root)->second->get_window(); - } - - ArgumentID get_dst_id() const - { - return _dst_id; - } - - ClKernelArgList get_arguments() const - { - ClKernelArgList arg_list{}; - for(const auto &arg_var : _vtable.get_kernel_arguments().get_all_vars()) - { - arg_list[arg_var.desc.arg_id] = arg_var.desc; - } - return arg_list; - } - - /** Get the arguments as shared vars from the vtable - * - * @return SharedVarTable::Arguments - */ - SharedVarTable::Arguments get_argument_shared_vars() const - { - return _vtable.get_kernel_arguments(); - } - - const ITensorInfo *get_kernel_argument_info(const ArgumentID id) const - { - auto it = _kernel_tensors.find(id); - if(it != _kernel_tensors.end()) - { - return it->second; - } - return nullptr; - } - - ITensorInfo *get_kernel_argument_info(const ArgumentID id) - { - auto it = _kernel_tensors.find(id); - if(it != _kernel_tensors.end()) - { - return it->second; - } - return nullptr; - } - /** Finalize graph construction. Graph is expected to not mutate after being finalized - */ - void finalize() - { - cache_root_component(); - assign_shared_var_group(); - } - - DependencyGraph get_graph() const - { - return _graph; - } - -private: - void cache_root_component() - { - const auto roots = _graph.get_root_ops(); - ARM_COMPUTE_ERROR_ON_MSG(roots.size() != 1, "Trying to add more than one root to the graph"); - _graph_root = roots.at(0); - } - /** Assign the group for each shared var. Can only be performed at the end of the graph construction, before building - */ - void assign_shared_var_group() - { - for(const auto &tensor : _kernel_tensors) - { - const auto tensor_id = tensor.first; - if(_graph.is_src_tensor(tensor_id) || _graph.is_dst_tensor(tensor_id)) - { - _shared_var_group_lut[tensor_id] = SharedVarGroup::Argument; - } - else - { - _shared_var_group_lut[tensor_id] = SharedVarGroup::Automatic; - } - } - } - - void topological_sort_utility(ComponentID component_id, std::unordered_set &visited, std::stack &stack) const - { - visited.insert(component_id); - - for(auto connected_component : _component_graph.find(component_id)->second) - { - if(visited.find(connected_component) == visited.end()) - { - topological_sort_utility(connected_component, visited, stack); - } - } - - stack.push(component_id); - } - - std::stack topological_sort() const - { - std::stack stack{}; - std::unordered_set visited{}; - - topological_sort_utility(_graph_root, visited, stack); - - return stack; - } - - void traverse(const std::function)> &func) const - { - std::stack stack = topological_sort(); - - while(!stack.empty()) - { - func(stack); - stack.pop(); - } - } - - std::string generate_argument_declaration(const SharedVarTable::SharedVar &var) const - { - ARM_COMPUTE_ERROR_ON_MSG(var.group != SharedVarGroup::Argument, "An argument declaration can only be generated from a kernel argument"); - std::string code; - switch(var.desc.tensor_arg_type) - { - case ClKernelTensorArgType::Vector: - { - code += "\n VECTOR_DECLARATION(" + var.uniq_name + ")"; - break; - } - case ClKernelTensorArgType::Image: - { - code += "\n IMAGE_DECLARATION(" + var.uniq_name + ")"; - break; - } - case ClKernelTensorArgType::Image_3D: - { - code += "\n IMAGE_DECLARATION(" + var.uniq_name + "),"; - code += "\n uint " + var.uniq_name + "_stride_z"; - break; - } - case ClKernelTensorArgType::Image_3D_Export_To_ClImage2D: - { - code += "\n __read_only image2d_t " + var.uniq_name + "_img,"; - code += "\n uint " + var.uniq_name + "_stride_z"; - break; - } - case ClKernelTensorArgType::Tensor_4D_t_Buffer: - { - code += "\n TENSOR4D_T(" + var.uniq_name + ", BUFFER)"; - break; - } - case ClKernelTensorArgType::Tensor_4D_t_Image: - { - code += "\n TENSOR4D_T(" + var.uniq_name + ", IMAGE)"; - break; - } - default: - { - ARM_COMPUTE_ERROR("Unsupported declaration generation for ClKernelTensorArgType"); - } - } - return code; - } - - std::string generate_kernel_signature(const SharedVarTable::Arguments &argument_list) const - { - std::string code = "\n__kernel void " + build_kernel_name() + "("; - - for(const auto &arg : argument_list.get_all_vars()) - { - code += generate_argument_declaration(arg) + ","; - } - - code[code.length() - 1] = ')'; - - return code; - } - - std::string generate_global_section() const - { - auto dst_info = get_kernel_argument_info(_dst_id); - auto dst_w = dst_info->dimension(0); - const auto tile_w = std::max(1, get_execution_window().x().step()); - const auto tile_h = std::max(1, get_execution_window().y().step()); - auto leftover_w = dst_w % tile_w; - - std::string code = ""; - code += std::string(" int cout = GET_SPATIAL_IDX(0, ") + std::to_string(tile_w) + ", " + std::to_string(leftover_w) + ");\n"; - code += std::string(" int mout = GET_SPATIAL_IDX(1, ") + std::to_string(tile_h) + ", " + "0);\n"; - code += std::string(" int bout = GET_SPATIAL_IDX(2, 1, 0);\n\n"); - - switch(_tile_info.clipping) - { - case ClippingStrategy::TOP_LEFT: - code += " const bool g_cond_x = (cout == 0);\n"; - code += " const bool g_cond_y = (mout == 0);\n"; - break; - case ClippingStrategy::TOP_RIGHT: - code += " const bool g_cond_x = ((cout + 1) * " + std::to_string(tile_w) + " >= " + std::to_string(_tile_info.boundaries.x()) + ");\n"; - code += " const bool g_cond_y = (mout == 0);\n"; - break; - case ClippingStrategy::BOTTOM_LEFT: - code += " const bool g_cond_x = (cout == 0);\n"; - code += " const bool g_cond_y = ((mout + 1) * " + std::to_string(tile_h) + " >= " + std::to_string(_tile_info.boundaries.y()) + ");\n"; - break; - case ClippingStrategy::BOTTOM_RIGHT: - code += " const bool g_cond_x = ((cout + 1) * " + std::to_string(tile_w) + " >= " + std::to_string(_tile_info.boundaries.x()) + ");\n"; - code += " const bool g_cond_y = ((mout + 1) * " + std::to_string(tile_h) + " >= " + std::to_string(_tile_info.boundaries.y()) + ");\n"; - break; - default: - ARM_COMPUTE_ERROR("Unsupported clipping strategy"); - } - - return code; - } - - TileDescriptor _tile_info{}; - - int32_t _num_complex_components{}; - - ArgumentID _dst_id{ -1 }; // Initially set to -1, which means the graph has no dst yet, since node IDs are positive numbers - - DependencyGraph _graph{}; - - // Tensors, components and IDs with corresponding ptrs (except intermediate) - std::unordered_map _components{}; - std::unordered_map _kernel_tensors{}; - // Argument group lookup. Can be replaced by extending the ArgumentID type to include group info - std::unordered_map _shared_var_group_lut{}; - - // Tracks all variables (e.g.: kernel arguments, kernel "global variables") - SharedVarTable _vtable{}; - - // Component directed graph (represented by an adjecency list of Component IDs) - // This is used to understand the ordering and bindings between components when generating the kernel - // It's initially set to -1 which means the graph has no root yet, since node IDs are positive numbers - ComponentID _graph_root{ -1 }; - std::unordered_map _component_graph{}; - - // Additional data structures used to define the relationships between components and arguments - // For each argument, it contains the list of components that consider it as an incoming or an outgoing argument - // E.g. tensor0 -> component0 -> tensor1 - // _outgoing_components[tensor0] == {component0} (component0 is the outgoing component of tensor0. Component0 treats tensor0 as an input tensor) - // _incoming_components[tensor1] == {component0} (component0 is the incoming component of tensor1. Component1 treats tensor1 as an output tensor) - std::unordered_map _outgoing_components{}; - std::unordered_map _incoming_components{}; -}; - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMMON_H -#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Utils.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Utils.h deleted file mode 100644 index 1b10050559..0000000000 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Utils.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION - -#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_UTILS -#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_UTILS - -#include "src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -inline ::std::ostream &operator<<(::std::ostream &os, const CLBuildOptions::StringSet &build_opts) -{ - for(const auto &opt : build_opts) - { - os << opt << ","; - } - return os; -} -inline ::std::ostream &operator<<(::std::ostream &os, const CLBuildOptions &cl_build_opts) -{ - os << cl_build_opts.options(); - return os; -} - -inline std::string to_string(const CLBuildOptions &cl_build_opts) -{ - std::stringstream str; - str << cl_build_opts; - return str.str(); -} -inline ::std::ostream &operator<<(::std::ostream &os, const ClKernelCode &code) -{ - os << "name: " << code.name << std::endl; - os << "code: " << code.code << std::endl; - os << "build_opts: " << code.build_options << std::endl; - return os; -} -inline std::string to_string(const ClKernelCode &code) -{ - std::stringstream str; - str << code; - return str.str(); -} - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute - -#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_UTILS -#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp deleted file mode 100644 index 811cd79811..0000000000 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp +++ /dev/null @@ -1,409 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION - -#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.h" - -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "src/core/CL/ICLKernel.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" -#include "src/gpu/cl/kernels/gemm/ClGemmHelpers.h" - -#include "arm_compute/runtime/CL/CLScheduler.h" -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -ComponentType ClDirectConvolutionKernelComponent::get_component_type() const -{ - return ComponentType::Complex; -} - -std::set ClDirectConvolutionKernelComponent::get_headers_list() const -{ - return std::set { "helpers.h", "tile_helpers.h" }; -} - -Window ClDirectConvolutionKernelComponent::get_window() const -{ - const auto src_info = _blueprint->impl().get_kernel_argument_info(_src.arg_id); - const auto weight_info = _blueprint->impl().get_kernel_argument_info(_weight.arg_id); - auto dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); - - // Get dst shape - PadStrideInfo pad_stride_info - { - static_cast(_desc.conv2d.stride.x()), - static_cast(_desc.conv2d.stride.y()), - static_cast(_desc.conv2d.pad.left), - static_cast(_desc.conv2d.pad.right), - static_cast(_desc.conv2d.pad.top), - static_cast(_desc.conv2d.pad.bottom), - DimensionRoundingType::FLOOR /*default rounding type*/ - }; - TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*src_info, *weight_info, pad_stride_info); - - // Output auto initialization if not yet initialized - auto_init_if_empty(*dst_info, output_shape, - 1, - src_info->data_type(), - src_info->quantization_info()); - - const unsigned int vec_size = std::min(static_cast(dst_info->tensor_shape()[0]), 4u); - const unsigned int num_rows = (dst_info->tensor_shape()[0] > 16) ? ((src_info->data_type() == DataType::F32) ? 2U : 4U) : 1U; - // const unsigned int num_rows = 1; - // const unsigned int vec_size = tile_info.tile_dims.x(); - // const unsigned int num_rows = tile_info.tile_dims.y(); - - // Create and configure kernel window - Window win = calculate_max_window(output_shape, Steps(vec_size, num_rows)); - - const size_t dim_y_collapsed = ceil_to_multiple(output_shape[1] * output_shape[2], num_rows); - win.set(Window::DimY, Window::Dimension(0, dim_y_collapsed, num_rows)); - win.set(Window::DimZ, Window::Dimension(0, output_shape.total_size_upper(3), 1)); - - return win; -} - -std::string ClDirectConvolutionKernelComponent::get_additional_macros() const -{ - return R"_()_"; // no macros -} - -std::string ClDirectConvolutionKernelComponent::get_component_code() const -{ - const auto src_info = _blueprint->impl().get_kernel_argument_info(_src.arg_id); - const auto bias_info = _blueprint->impl().get_kernel_argument_info(_bias.arg_id); - - ARM_COMPUTE_ERROR_ON_MSG(src_info->data_layout() != DataLayout::NHWC, "Only NHWC data layout is supported by this component."); - - const auto channel_idx = get_data_layout_dimension_index(src_info->data_layout(), DataLayoutDimension::CHANNEL); - const auto k0 = adjust_vec_size(is_data_type_quantized(src_info->data_type()) ? 16u : 8u, src_info->dimension(channel_idx)); - const bool leftover_loop = (src_info->dimension(channel_idx) % k0) != 0; - - std::string code = R"_( - //------------------ START KERNEL {{meta_kernel_id}} --------------------- - // IN_0(src) {{src}} - // IN_1(wei) {{weight}} - )_"; - if(bias_info != nullptr) - { - code += R"_( - // IN_1(bia) {{bias}} - )_"; - } - code += R"_( - // OUT(dst, accum) {{dst}} - - // Initialize the accumulators - TILE({{ACC_DATA_TYPE}}, M0, N0, {{dst}}); - { - // All the tensor dimensions are passed at compile time. - // In case of dynamic tensor support, the following dimensions should be passed as function argument. - #define _IWEI_WIDTH {{WEI_WIDTH}} - #define _IWEI_HEIGHT {{WEI_HEIGHT}} - #define _ISRC_WIDTH {{src}}_w - #define _ISRC_HEIGHT {{src}}_h - #define _ISRC_CHANNELS {{src}}_c - #define _IDST_WIDTH {{arg_dst}}_w - #define _IDST_HEIGHT {{arg_dst}}_h - #define _IDST_CHANNELS {{arg_dst}}_c - #define _IY_MULTIPLIER (_IWEI_WIDTH * _IWEI_HEIGHT) - - // .v = access the whole vector (OpenCL vector) - // .s[x] = access the vector element at position x (scalar access) - TILE(int, M0, 1, xi); - TILE(int, M0, 1, yi); - - // Convert the linear index to coordinate - LOOP_UNROLLING(int, i, 0, 1, M0, - { - xi[i].v = ((mout + i) % _IDST_WIDTH) * {{STRIDE_X}}; - yi[i].v = ((mout + i) / _IDST_WIDTH) * {{STRIDE_Y}}; - xi[i].v -= {{PAD_LEFT}}; - yi[i].v -= {{PAD_TOP}}; - }) - - LOOP_UNROLLING(int, i, 0, 1, M0, - { - {{dst}}[i].v = 0; - }) - - for(int i = 0; i < (_IWEI_WIDTH * _IWEI_HEIGHT); ++i) - { - int ck = 0; - int xk = i % _IWEI_WIDTH; - int yk = i / _IWEI_HEIGHT; - - int k = 0; - for(; k <= (_ISRC_CHANNELS - K0); k += K0) - { - TILE({{SRC_DATA_TYPE}}, M0, K0, a); - TILE({{WEI_DATA_TYPE}}, N0, K0, b); - - LOOP_UNROLLING(int, i, 0, 1, M0, - { - a[i].v = {{ZERO_VALUE}}; - }) - - // Load tile from the src tensor - T_LOAD_NHWC_INDIRECT({{SRC_DATA_TYPE}}, M0, K0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yk, xk, ck, _ISRC_WIDTH, _ISRC_HEIGHT, {{src}}_stride_y, xi, yi, a); - - // Load tile from the weights tensor - T_LOAD({{WEI_DATA_TYPE}}, N0, K0, {{WEI_TENSOR_TYPE}}, {{weight}}, ck, cout * _IY_MULTIPLIER + i, _IY_MULTIPLIER, {{weight}}_stride_y, b); - - // Compute the matrix multiplication between two tiles - T_MMUL({{SRC_DATA_TYPE}}, {{WEI_DATA_TYPE}}, {{ACC_DATA_TYPE}}, M0, N0, K0, NT, T, a, b, {{dst}}); - - ck += K0; - } - - // We voluntarily use SRC_CHANNELS rather than _DSRC_CHANNELS - // This #if directive should be removed in case of dynamic tensor support - )_"; - - if(leftover_loop) - { - code += R"_( - // Left-over accumulations - for(; k < _ISRC_CHANNELS; ++k) - { - TILE({{SRC_DATA_TYPE}}, M0, 1, a); - TILE({{WEI_DATA_TYPE}}, N0, 1, b); - - LOOP_UNROLLING(int, i, 0, 1, M0, - { - a[i].v = {{ZERO_VALUE}}; - }) - - // Load tile from the src tensor - T_LOAD_NHWC_INDIRECT({{SRC_DATA_TYPE}}, M0, 1, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yk, xk, ck, _ISRC_WIDTH, _ISRC_HEIGHT, {{src}}_stride_y, xi, yi, a); - - // Load tile from the weights tensor - // The T_LOAD for the left-over elements can only use BUFFER because we load one element per iteration - T_LOAD({{WEI_DATA_TYPE}}, N0, 1, BUFFER, {{weight}}, ck, cout * _IY_MULTIPLIER + i, _IY_MULTIPLIER, {{weight}}_stride_y, b); - - // Compute the matrix multiplication between two tiles - T_MMUL({{SRC_DATA_TYPE}}, {{WEI_DATA_TYPE}}, {{ACC_DATA_TYPE}}, M0, N0, 1, NT, T, a, b, {{dst}}); - - ++ck; - } - )_"; - } - - code += R"_( - #undef _I_WEI_WIDTH - #undef _I_WEI_HEIGHT - #undef _ISRC_WIDTH - #undef _ISRC_HEIGHT - #undef _ISRC_CHANNELS - #undef _IDST_WIDTH - #undef _IDST_HEIGHT - #undef _IDST_CHANNELS - #undef _IY_MULTIPLIER - - } - )_"; - - if(bias_info != nullptr) - { - code += R"_( - TILE({{BIA_DATA_TYPE}}, 1, N0, bias0); - - T_LOAD({{BIA_DATA_TYPE}}, 1, N0, BUFFER, {{bias}}, cout, 0, 1, 0, bias0); - - // c = c + bias[broadcasted] - T_ELTWISE_BROADCAST_ADD_X({{ACC_DATA_TYPE}}, M0, N0, {{dst}}, bias0, {{dst}}); - )_"; - } - - code += R"_( - } -//------------------ END KERNEL {{meta_kernel_id}} --------------------- - )_"; - return code.c_str(); -} - -bool export_to_cl_image_support(const ITensorInfo *tensor, GPUTarget gpu_target, DataLayout data_layout) -{ - if(tensor->tensor_shape()[0] % 4 || (data_layout != DataLayout::NHWC)) - { - return false; - } - - // If not floating point - if(!is_data_type_float(tensor->data_type())) - { - return false; - } - - if(gpu_target == GPUTarget::G71 || get_arch_from_target(gpu_target) == GPUTarget::MIDGARD) - { - return false; - } - - // Check if the cl_khr_image2d_from_buffer extension is supported on the target platform - if(!image2d_from_buffer_supported(CLKernelLibrary::get().get_device())) - { - return false; - } - - // Check cl image pitch alignment - if(get_cl_image_pitch_alignment(CLKernelLibrary::get().get_device()) == 0) - { - return false; - } - - const size_t image_w = tensor->tensor_shape()[0] / 4; - const size_t image_h = tensor->tensor_shape()[1] * tensor->tensor_shape()[2] * tensor->tensor_shape()[3]; - const size_t max_image_w = CLKernelLibrary::get().get_device().getInfo(); - const size_t max_image_h = CLKernelLibrary::get().get_device().getInfo(); - - if(image_w > max_image_w || image_h > max_image_h) - { - return false; - } - - return true; -} - -CLBuildOptions ClDirectConvolutionKernelComponent::generate_build_options() const -{ - const auto src_info = _blueprint->impl().get_kernel_argument_info(_src.arg_id); - auto weight_info = _blueprint->impl().get_kernel_argument_info(_weight.arg_id); - const auto dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); - // const auto tile_info = _blueprint->impl().get_tile_info(); - - const unsigned int channel_idx = get_data_layout_dimension_index(src_info->data_layout(), DataLayoutDimension::CHANNEL); - const DataType data_type = src_info->data_type(); - const GPUTarget gpu_target = CLScheduler::get().target(); - - const unsigned int n0 = _blueprint->impl().get_execution_window().x().step(); - const unsigned int m0 = _blueprint->impl().get_execution_window().y().step(); - const unsigned int k0 = adjust_vec_size(is_data_type_quantized(data_type) ? 16u : 8u, src_info->dimension(channel_idx)); - const unsigned int partial_store_n0 = dst_info->dimension(0) % n0; - const bool export_to_cl_image = export_to_cl_image_support(weight_info, gpu_target, src_info->data_layout()); - - // Update the padding for the weights tensor if we can export to cl_image - if(export_to_cl_image) - { - arm_compute::opencl::kernels::gemm::update_padding_for_cl_image(weight_info); - } - - CLBuildOptions build_opts{}; - build_opts.add_option("-cl-fast-relaxed-math"); - build_opts.add_option("-DIS_TILED"); - build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); - build_opts.add_option("-DM0=" + support::cpp11::to_string(m0)); - build_opts.add_option("-DK0=" + support::cpp11::to_string(k0)); - build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0)); - - return build_opts; -} - -void ClDirectConvolutionKernelComponent::allocate_shared_vars(SharedVarTable &vtable) const -{ - const auto src_info = _blueprint->impl().get_kernel_argument_info(_src.arg_id); - const auto weight_info = _blueprint->impl().get_kernel_argument_info(_weight.arg_id); - - vtable.add(_src, _blueprint->impl().group(_src.arg_id), ClKernelArgDescriptor(_src.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "src"); - - const GPUTarget gpu_target = CLScheduler::get().target(); - const bool export_to_cl_image = export_to_cl_image_support(weight_info, gpu_target, src_info->data_layout()); - const ClKernelTensorArgType weight_type = export_to_cl_image ? ClKernelTensorArgType::Tensor_4D_t_Image : ClKernelTensorArgType::Tensor_4D_t_Buffer; - vtable.add(_weight, _blueprint->impl().group(_weight.arg_id), ClKernelArgDescriptor(_weight.arg_id, weight_type), "weight"); - - if(!_bias.is_empty()) // optional bias - { - vtable.add(_bias, _blueprint->impl().group(_bias.arg_id), ClKernelArgDescriptor(_bias.arg_id, ClKernelTensorArgType::Vector), "bias"); - } - vtable.add(_dst, _blueprint->impl().group(_dst.arg_id), ClKernelArgDescriptor(_dst.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "dst"); -} - -ClDirectConvolutionKernelComponent::TagLUT ClDirectConvolutionKernelComponent::get_tag_lut(const SharedVarTable &vtable) const -{ - TagLUT lut{}; - - const auto src_info = _blueprint->impl().get_kernel_argument_info(_src.arg_id); - const auto weight_info = _blueprint->impl().get_kernel_argument_info(_weight.arg_id); - const auto bias_info = _blueprint->impl().get_kernel_argument_info(_bias.arg_id); - - // Arguments and global shared variables - lut["src"] = vtable.get(_src); - lut["weight"] = vtable.get(_weight); - - if(!_bias.is_empty()) // optional bias - { - lut["bias"] = vtable.get(_bias); - lut["BIA_DATA_TYPE"] = get_cl_type_from_data_type(bias_info->data_type()); - } - lut["dst"] = vtable.get(_dst); - - const auto dst_argument = _blueprint->impl().get_argument_shared_vars().get_dst_var(); - lut["arg_dst"] = dst_argument.uniq_name; - - // Local build options - lut["meta_kernel_id"] = id(); - lut["ACC_DATA_TYPE"] = src_info->data_type(); - lut["SRC_DATA_TYPE"] = src_info->data_type(); - lut["WEI_DATA_TYPE"] = weight_info->data_type(); - - lut["SRC_TENSOR_TYPE"] = "BUFFER"; - switch(vtable.get(_weight).desc.tensor_arg_type) - { - case ClKernelTensorArgType::Image_Export_To_ClImage2D: - case ClKernelTensorArgType::Image_3D_Export_To_ClImage2D: - case ClKernelTensorArgType::Tensor_4D_t_Image: - { - lut["WEI_TENSOR_TYPE"] = "IMAGE"; - break; - } - default: - { - lut["WEI_TENSOR_TYPE"] = "BUFFER"; - break; - } - } - const auto width_idx = get_data_layout_dimension_index(src_info->data_layout(), DataLayoutDimension::WIDTH); - const auto height_idx = get_data_layout_dimension_index(src_info->data_layout(), DataLayoutDimension::HEIGHT); - lut["WEI_WIDTH"] = weight_info->dimension(width_idx); - lut["WEI_HEIGHT"] = weight_info->dimension(height_idx); - - lut["STRIDE_X"] = _desc.conv2d.stride.x(); - lut["STRIDE_Y"] = _desc.conv2d.stride.y(); - - lut["PAD_LEFT"] = _desc.conv2d.pad.left; - lut["PAD_TOP"] = _desc.conv2d.pad.top; - - lut["ZERO_VALUE"] = 0; - - return lut; -} -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.h deleted file mode 100644 index 5babdbab51..0000000000 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION - -#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLDIRECTCONVOLUTIONKERNELCOMPONENT_H -#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLDIRECTCONVOLUTIONKERNELCOMPONENT_H - -#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h" - -#include "utils/TypePrinter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClDirectConvolutionKernelComponent : public IClKernelComponent -{ -public: - ClDirectConvolutionKernelComponent(ClKernelBlueprint *blueprint, const ClDirectConv2dKernelDescriptor &desc, - const Link &src, const Link &weight, const Link &dst, const Link &bias = Link{}) - : IClKernelComponent(blueprint), _desc{ desc }, _src{ src }, _weight{ weight }, _bias{ bias }, _dst{ dst } - { - } - - ComponentType get_component_type() const override; - std::set get_headers_list() const override; - std::string get_additional_macros() const override; - std::string get_component_code() const override; - Window get_window() const override; - ClKernelArgList get_args(); - CLBuildOptions generate_build_options() const override; - - virtual std::vector get_links() const override - { - return { _src, _weight, _bias, _dst }; - } - - virtual TagLUT get_tag_lut(const SharedVarTable &vtable) const override; - virtual void allocate_shared_vars(SharedVarTable &vtable) const override; - - virtual std::string name() const override - { - return "direct_convolution_" + to_string(_blueprint->impl().get_kernel_argument_info(_src.arg_id)->data_layout()) + "_" + std::to_string(id()); - } - -private: - ClDirectConv2dKernelDescriptor _desc{}; - Link _src{}; - Link _weight{}; - Link _bias{}; - Link _dst{}; -}; - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif // ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLDIRECTCONVOLUTIONKERNELCOMPONENT_H -#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.cpp deleted file mode 100644 index e2eba68a63..0000000000 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.cpp +++ /dev/null @@ -1,266 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION - -#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -ComponentType ClElementwiseKernelComponent::get_component_type() const -{ - return ComponentType::Simple; -} - -std::set ClElementwiseKernelComponent::get_headers_list() const -{ - return std::set { "common/experimental/gemm_fused_post_ops/fp_mixed_precision_helpers.h", "tile_helpers.h" }; -} - -Window ClElementwiseKernelComponent::get_window() const -{ - const ITensorInfo *lhs_info = _blueprint->impl().get_kernel_argument_info(_lhs.arg_id); - const ITensorInfo *rhs_info = _blueprint->impl().get_kernel_argument_info(_rhs.arg_id); - ITensorInfo *dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); - - ARM_COMPUTE_ERROR_ON_NULLPTR(lhs_info, rhs_info, dst_info); - - const std::pair broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*lhs_info, *rhs_info); - const TensorShape &out_shape = broadcast_pair.first; - - auto_init_if_empty(*dst_info, out_shape, 1, lhs_info->data_type()); - - TensorShape output_shape = dst_info->tensor_shape(); - // Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) and upper dimensions unchanged - // This is in line with the collapsing convention used by Conv2d - output_shape.collapse(2U, 1U); - const unsigned int vector_size_byte_opencl = 16; - const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / dst_info->element_size(), dst_info->dimension(0)); - Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration)); - - return win; -} - -std::string ClElementwiseKernelComponent::get_component_code() const -{ - std::string code; - const bool is_root = _blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Argument && _blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Argument; - - if(is_root) - { - return R"_( - //------------------ START KERNEL {{meta_kernel_id}} ELTWISE_OP --------------------- - // IN_0(LHS) {{lhs}} - // IN_1(RHS) {{rhs}} - // OUT(dst, accum) {{dst}} - - // dst = lhs + rhs (mix-precision, broadcast, boundary aware) - TILE({{DATA_TYPE}}, M0, N0, {{dst}}); - { - TILE({{DATA_TYPE}}, M0, N0, lhs_tile); - TILE({{DATA_TYPE}}, M0, N0, rhs_tile); - - // Since mout maps to dimensions 1 (y) and dimension 2 (z) of the input tensor because of the collapsed window, bout maps to dimension 3 (w) - {{lhs}}_offset_first_element_in_bytes += bout * {{lhs}}_stride_w; - {{rhs}}_offset_first_element_in_bytes += bout * {{rhs}}_stride_w; - - T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{lhs}}, cout, mout, 1, {{lhs}}_stride_y, lhs_tile); - T_LOAD({{DATA_TYPE}}, {{rhs_m0}}, {{rhs_n0}}, BUFFER, {{rhs}}, {{rhs_start_x}}, {{rhs_start_y}}, 1, {{rhs}}_stride_y, rhs_tile); - -#if defined(IS_BROADCAST) - T_ELTWISE_BROADCAST_{{ELTWISE_OP}}_X({{DATA_TYPE}}, M0, N0, lhs_tile, rhs_tile, {{dst}}); -#else // !defined(IS_BROADCAST) - T_ELTWISE_{{ELTWISE_OP}}({{DATA_TYPE}}, M0, N0, lhs_tile, rhs_tile, {{dst}}); -#endif // defined(IS_BROADCAST) - - } - //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_OP --------------------- -)_"; - } - else - { - return R"_( - //------------------ START KERNEL {{meta_kernel_id}} ELTWISE_OP --------------------- - // IN_0/Out(Accumulator) {{acc}} - // IN_1(Addend) {{addend}} - - // acc = addend + acc (mix-precision, broadcast, boundary aware) - { - TILE({{DATA_TYPE}}, M0, N0, addend_tile); - - T_LOAD({{DATA_TYPE}}, {{rhs_m0}}, {{rhs_n0}}, BUFFER, {{addend}}, {{rhs_start_x}}, {{rhs_start_y}}, 1, {{addend}}_stride_y, addend_tile); - -#if defined(IS_BROADCAST) - T_ELTWISE_BROADCAST_{{ELTWISE_OP}}_X({{DATA_TYPE}}, M0, N0, {{acc}}, addend_tile, {{acc}}); -#else // !defined(IS_BROADCAST) - T_ELTWISE_{{ELTWISE_OP}}({{DATA_TYPE}}, M0, N0, {{acc}}, addend_tile, {{acc}}); -#endif // defined(IS_BROADCAST) - } - //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_OP --------------------- -)_"; - } -} - -CLBuildOptions ClElementwiseKernelComponent::generate_build_options() const -{ - const auto t_rhs_info = _blueprint->impl().get_kernel_argument_info(_rhs.arg_id); - const auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); - - CLBuildOptions build_opts{}; - const auto n0 = _blueprint->impl().get_execution_window().x().step(); - const auto m0 = _blueprint->impl().get_execution_window().y().step(); - const unsigned int partial_store_n0 = t_dst_info->dimension(0) % n0; - const bool is_broadcast = t_rhs_info->tensor_shape() != t_dst_info->tensor_shape(); - - build_opts.add_option("-DM0=" + support::cpp11::to_string(m0)); - build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); - build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0)); - build_opts.add_option_if(is_broadcast, "-DIS_BROADCAST"); - - return build_opts; -} - -std::string ClElementwiseKernelComponent::generate_config_id() const -{ - auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); - std::string config_id{}; - config_id += lower_string(string_from_data_type(t_dst_info->data_type())); - config_id += "_"; - config_id += support::cpp11::to_string(t_dst_info->dimension(0)); - config_id += "_"; - config_id += support::cpp11::to_string(t_dst_info->dimension(1)); - config_id += "_"; - config_id += lower_string(string_from_data_layout(t_dst_info->data_layout())); - return config_id; -} - -void ClElementwiseKernelComponent::allocate_shared_vars(SharedVarTable &vtable) const -{ - const bool is_root = _blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Argument && _blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Argument; - vtable.add(_lhs, _blueprint->impl().group(_lhs.arg_id), ClKernelArgDescriptor(_lhs.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "lhs"); - vtable.add(_rhs, _blueprint->impl().group(_rhs.arg_id), ClKernelArgDescriptor(_rhs.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "rhs"); - if(is_root) - { - vtable.add(_dst, _blueprint->impl().group(_dst.arg_id), ClKernelArgDescriptor(_dst.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "dst"); - } -} - -ClElementwiseKernelComponent::TagLUT ClElementwiseKernelComponent::get_tag_lut(const SharedVarTable &vtable) const -{ - TagLUT lut{}; - const auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); - ITensorInfo *t_addend_info = nullptr; - // Arguments and global shared variables - const bool is_root = _blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Argument && _blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Argument; - if(is_root) - { - lut["lhs"] = vtable.get(_lhs); - lut["rhs"] = vtable.get(_rhs); - lut["dst"] = vtable.get(_dst); - t_addend_info = _blueprint->impl().get_kernel_argument_info(_rhs.arg_id); - } - else - { - // Determine which link is the accumulator - Link accumulator; - Link addend; - if(_blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Automatic) - { - accumulator = _lhs; - addend = _rhs; - } - else if(_blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Automatic) - { - accumulator = _rhs; - addend = _lhs; - } - else - { - ARM_COMPUTE_ERROR("Invalid elementwise component linking"); - } - lut["acc"] = vtable.get(accumulator); - lut["addend"] = vtable.get(addend); - t_addend_info = _blueprint->impl().get_kernel_argument_info(addend.arg_id); - } - // Local build options - lut["meta_kernel_id"] = id(); - lut["DATA_TYPE"] = get_cl_type_from_data_type(t_dst_info->data_type()); - - switch(_desc.eltwise.op) - { - case ArithmeticOperation::DIV: - lut["ELTWISE_OP"] = "DIV"; - break; - case ArithmeticOperation::ADD: - lut["ELTWISE_OP"] = "ADD"; - break; - default: - ARM_COMPUTE_ERROR("Arithmetic Operation not supported"); - } - - // Set broadcast parameters - // PRE: All tensors are broadcast-compatible - const bool is_broadcast = t_addend_info->tensor_shape() != t_dst_info->tensor_shape(); - if(is_broadcast) - { - // Note that n0 maps to input tensor dimension 0, m0 maps to input dimensions 1 and 2 because of our collapse strategy - if(t_addend_info->dimension(0) == 1U && t_addend_info->dimension(1) == 1U && t_addend_info->dimension(2) == 1U) // Broadcast in X, Y, Z: collapsed rhs win [M0xN0] = [1x1] - { - lut["rhs_m0"] = "1"; - lut["rhs_n0"] = "1"; - lut["rhs_start_y"] = "0"; - lut["rhs_start_x"] = "0"; - } - else if(t_addend_info->dimension(1) == 1U && t_addend_info->dimension(2) == 1U) // Broadcast in Y and Z: collapsed rhs win [M0xN0] = [1xN] - { - lut["rhs_m0"] = "1"; - lut["rhs_n0"] = "N0"; - lut["rhs_start_y"] = "0"; - lut["rhs_start_x"] = "cout"; - } - else - { - ARM_COMPUTE_ERROR("Only support rhs broadcasting in all X, Y, Z dimensions, or just in Y and Z dimensions"); - } - } - else - { - lut["rhs_m0"] = "M0"; - lut["rhs_n0"] = "N0"; - lut["rhs_start_y"] = "mout"; - lut["rhs_start_x"] = "cout"; - } - return lut; -} -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h deleted file mode 100644 index f8377457d3..0000000000 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION - -#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLELEMENTWISEADDKERNELCOMPONENT_H -#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLELEMENTWISEADDKERNELCOMPONENT_H - -#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClElementwiseKernelComponent : public IClKernelComponent -{ -public: - /** Construct a new Cl Elementwise Kernel Component object - * - * @param[in] blueprint Blueprint to which this component is added - * @param[in] desc Component descriptor - * @param[in] lhs Link to LHS tensor - * @param[in] rhs Link to RHS tensor - * @param[out] dst Link to DST tensor - * - * Support Level - * Data Type: F16, F32 - * Tensor Shape: Any shape of arbitrary dimension >= 1 and <= 4 - * Value Range: All - * Broadcasting: Only RHS tensor can be broadcasted into LHS. Only support broadcasting in dimension 1 and dimension 2 or all dimension 0, 1 and 2 - */ - ClElementwiseKernelComponent(ClKernelBlueprint *blueprint, const ClElementwiseKernelDescriptor &desc, const Link &lhs, const Link &rhs, const Link &dst) - : IClKernelComponent(blueprint), _desc{ desc }, _lhs{ lhs }, _rhs{ rhs }, _dst{ dst } - { - } - - ComponentType get_component_type() const override; - std::set get_headers_list() const override; - std::string get_component_code() const override; - Window get_window() const override; - CLBuildOptions generate_build_options() const override; - std::string generate_config_id() const override; - - virtual std::vector get_links() const override - { - return { _lhs, _rhs, _dst }; - } - - virtual TagLUT get_tag_lut(const SharedVarTable &vtable) const override; - virtual void allocate_shared_vars(SharedVarTable &vtable) const override; - - virtual std::string name() const override - { - return "eltwise_add_" + std::to_string(id()); - } - -private: - ClElementwiseKernelDescriptor _desc{}; - Link _lhs{}; - Link _rhs{}; - Link _dst{}; -}; - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif // ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLELEMENTWISEADDKERNELCOMPONENT_H -#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.cpp deleted file mode 100644 index 0a20a8f600..0000000000 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.cpp +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION -#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -ComponentType ClFloorKernelComponent::get_component_type() const -{ - return ComponentType::Simple; -} -std::set ClFloorKernelComponent::get_headers_list() const -{ - return std::set { "common/experimental/gemm_fused_post_ops/fp_mixed_precision_helpers.h", "tile_helpers.h" }; -} -Window ClFloorKernelComponent::get_window() const -{ - const ITensorInfo *src_info = _blueprint->impl().get_kernel_argument_info(_src.arg_id); - ITensorInfo *dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); - - ARM_COMPUTE_ERROR_ON_NULLPTR(src_info, dst_info); - auto_init_if_empty(*dst_info, src_info->tensor_shape(), 1, src_info->data_type()); - - TensorShape output_shape = dst_info->tensor_shape(); - // Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) and upper dimensions unchanged - // This is in line with the collapsing convention used by Conv2d - output_shape.collapse(2U, 1U); - const unsigned int vector_size_byte_opencl = 16; - const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / dst_info->element_size(), dst_info->dimension(0)); - Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration)); - - return win; -} -std::string ClFloorKernelComponent::get_component_code() const -{ - bool is_root = _blueprint->impl().group(_src.arg_id) == SharedVarGroup::Argument; - if(is_root) - { - return R"_( - //------------------ START KERNEL {{meta_kernel_id}} FLOOR --------------------- - // IN_0(src) {{src}} - // OUT(dst, accum) {{dst}} - TILE({{DATA_TYPE}}, M0, N0, {{dst}}); - { - TILE({{DATA_TYPE}}, M0, N0, src_tile); - - // Since mout maps to dimensions 1 (y) and dimension 2 (z) of the input tensor because of the collapsed window, bout maps to dimension 3 (w) - {{src}}_offset_first_element_in_bytes += bout * {{src}}_stride_w; - T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{src}}, cout, mout, 1, {{src}}_stride_y, src_tile); - - T_FLOOR({{DATA_TYPE}}, M0, N0, src_tile, {{dst}}); - } - //------------------ END KERNEL {{meta_kernel_id}} FLOOR --------------------- -)_"; - } - else - { - return R"_( - //------------------ START KERNEL {{meta_kernel_id}} FLOOR --------------------- - // IN_0/Out(Accumulator) {{acc}} - // output = floor(input) - { - T_FLOOR({{DATA_TYPE}}, M0, N0, {{acc}}, {{acc}}); - } - //------------------ END KERNEL {{meta_kernel_id}} FLOOR --------------------- -)_"; - } -} -CLBuildOptions ClFloorKernelComponent::generate_build_options() const -{ - CLBuildOptions build_opts{}; - const auto n0 = _blueprint->impl().get_execution_window().x().step(); - const auto m0 = _blueprint->impl().get_execution_window().y().step(); - const auto dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); - const unsigned int partial_store_n0 = dst_info->dimension(0) % n0; - build_opts.add_option("-DM0=" + support::cpp11::to_string(m0)); - build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); - build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0)); - return build_opts; -} -std::string ClFloorKernelComponent::generate_config_id() const -{ - auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); - std::string config_id{}; - config_id += lower_string(string_from_data_type(t_dst_info->data_type())); - config_id += "_"; - config_id += support::cpp11::to_string(t_dst_info->dimension(0)); - config_id += "_"; - config_id += support::cpp11::to_string(t_dst_info->dimension(1)); - config_id += "_"; - config_id += lower_string(string_from_data_layout(t_dst_info->data_layout())); - return config_id; -} -void ClFloorKernelComponent::allocate_shared_vars(SharedVarTable &vtable) const -{ - vtable.add(_src, _blueprint->impl().group(_src.arg_id), ClKernelArgDescriptor(_src.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "src"); - vtable.add(_dst, _blueprint->impl().group(_dst.arg_id), ClKernelArgDescriptor(_dst.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "dst"); -} -ClFloorKernelComponent::TagLUT ClFloorKernelComponent::get_tag_lut(const SharedVarTable &vtable) const -{ - TagLUT lut{}; - const auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); - // Arguments and global shared variables - const bool is_root = _blueprint->impl().group(_src.arg_id) == SharedVarGroup::Argument; - - if(is_root) - { - lut["src"] = vtable.get(_src); - lut["dst"] = vtable.get(_dst); - } - else - { - lut["acc"] = vtable.get(_src); - } - - lut["meta_kernel_id"] = id(); - lut["DATA_TYPE"] = get_cl_type_from_data_type(t_dst_info->data_type()); - return lut; -} -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h deleted file mode 100644 index e791b36382..0000000000 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION - -#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLFLOORKERNELCOMPONENT_H -#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLFLOORKERNELCOMPONENT_H - -#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClFloorKernelComponent : public IClKernelComponent -{ -public: - /** Construct a new Cl Floor Kernel Component object - * - * @param blueprint Blueprint to which this component is added - * @param src Link to SRC tensor - * @param dst Link to DST tensor - * - * Support Level - * Data Type: F16, F32 - * Tensor Shape: Any shape of arbitrary dimension >= 1 and <= 4 - * Value Range: All - */ - ClFloorKernelComponent(ClKernelBlueprint *blueprint, const Link &src, const Link &dst) - : IClKernelComponent(blueprint), _src{ src }, _dst{ dst } - { - } - - ComponentType get_component_type() const override; - std::set get_headers_list() const override; - std::string get_component_code() const override; - Window get_window() const override; - CLBuildOptions generate_build_options() const override; - std::string generate_config_id() const override; - - virtual std::vector get_links() const override - { - return { _src, _dst }; - } - - virtual TagLUT get_tag_lut(const SharedVarTable &vtable) const override; - virtual void allocate_shared_vars(SharedVarTable &vtable) const override; - - virtual std::string name() const override - { - return "floor_" + std::to_string(id()); - } - -private: - Link _src{}; - Link _dst{}; -}; - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif // ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLFLOORKERNELCOMPONENT_H -#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClKernelComponents.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClKernelComponents.h deleted file mode 100644 index 3f99dd5553..0000000000 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClKernelComponents.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION - -#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_CLKERNELCOMPONENTS_H -#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_CLKERNELCOMPONENTS_H - -#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.h" -#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h" -#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h" -#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.h" - -#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_CLKERNELCOMPONENTS_H -#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.cpp deleted file mode 100644 index 7c805d5368..0000000000 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.cpp +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION - -#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -ComponentType ClStoreBlockBoundaryAwareKernelComponent::get_component_type() const -{ - return ComponentType::Store; -} - -std::string ClStoreBlockBoundaryAwareKernelComponent::get_component_code() const -{ - return R"_( - //------------------ START KERNEL {{meta_kernel_id}} STORE --------------------- - - __global uchar *dst_addr = {{dst}}_ptr + {{dst}}_offset_first_element_in_bytes + (g_x * (uint)N0 * sizeof(DATA_TYPE)) + (COMPUTE_M0_START_ROW(g_y, M0, PARTIAL_STORE_M0) * {{dst}}_stride_y); - -#if defined(REINTERPRET_OUTPUT_AS_3D) - // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we - // multiply dst_stride_z by DEPTH_GEMM3D - dst_addr += g_z * {{dst}}_stride_z * DEPTH_GEMM3D; - -#else // defined(REINTERPRET_OUTPUT_AS_3D) - - // Add offset for batched GEMM - dst_addr += g_z * {{dst}}_stride_z; - -#endif // defined(REINTERPRET_OUTPUT_AS_3D) - - STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, {{src}}, dst_addr, {{dst}}_stride_y, g_zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, g_cond_y, g_cond_x); - - //------------------ END KERNEL {{meta_kernel_id}} STORE --------------------- - -)_"; -} - -CLBuildOptions ClStoreBlockBoundaryAwareKernelComponent::generate_build_options() const -{ - auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); - // auto tile_info = _blueprint->impl().get_tile_info(); - - CLBuildOptions build_opts{}; - - const auto n0 = _blueprint->impl().get_execution_window().x().step(); - const auto m0 = _blueprint->impl().get_execution_window().y().step(); - const auto partial_m0 = t_dst_info->dimension(0) % m0; - const auto partial_n0 = t_dst_info->dimension(1) % n0; - - build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(t_dst_info->data_type())); - build_opts.add_option("-DM0=" + support::cpp11::to_string(m0)); - build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); - build_opts.add_option("-DPARTIAL_STORE_M0=" + support::cpp11::to_string(partial_m0)); - build_opts.add_option("-DPARTIAL_STORE_N0=" + support::cpp11::to_string(partial_n0)); - - return build_opts; -} - -void ClStoreBlockBoundaryAwareKernelComponent::allocate_shared_vars(SharedVarTable &vtable) const -{ - vtable.add(_src, _blueprint->impl().group(_src.arg_id), ClKernelArgDescriptor(_src.arg_id, ClKernelTensorArgType::Image_3D), "src"); - vtable.add(_dst, _blueprint->impl().group(_dst.arg_id), ClKernelArgDescriptor(_dst.arg_id, ClKernelTensorArgType::Image_3D), "dst"); -} - -ClStoreBlockBoundaryAwareKernelComponent::TagLUT ClStoreBlockBoundaryAwareKernelComponent::get_tag_lut(const SharedVarTable &vtable) const -{ - return { - { "meta_kernel_id", id() }, - { "src", vtable.get(_src) }, - { "dst", vtable.get(_dst) }, - }; -} - -ComponentType ClStoreIndirectWidthSelectKernelComponent::get_component_type() const -{ - return ComponentType::Store; -} - -std::string ClStoreIndirectWidthSelectKernelComponent::get_component_code() const -{ - return R"_( - //------------------ START KERNEL {{meta_kernel_id}} STORE --------------------- - { - // This also follows NHWC layout - // cout maps to global_id(0) maps to Channel - // mout maps to global_id(1) maps to Height and Weight (Collapsed Window) - // bout maps to global_id(3) maps to N / Batch - #define _IDST_WIDTH {{dst}}_w - #define _IDST_HEIGHT {{dst}}_h - TILE(uint, M0, 1, dst_indirect_y); - - // Calculate the destination indirect Y - LOOP_UNROLLING(int, i, 0, 1, M0, - { - dst_indirect_y[i].v = (uint)min(mout + i, (int)(_IDST_WIDTH * _IDST_HEIGHT) - 1); - dst_indirect_y[i].v += bout * (int)(_IDST_WIDTH * _IDST_HEIGHT); - }) - - bool x_cond = PARTIAL_N0 != 0 && get_global_id(0) == 0; - - T_STORE_INDIRECT_WIDTH_SELECT({{DST_DATA_TYPE}}, M0, N0, PARTIAL_N0, {{DST_TENSOR_TYPE}}, {{dst}}, cout, {{dst}}_stride_y, x_cond, {{src}}, dst_indirect_y); - - #undef _IDST_WIDTH - #undef _IDST_HEIGHT - //------------------ END KERNEL {{meta_kernel_id}} STORE --------------------- - } - -)_"; -} - -CLBuildOptions ClStoreIndirectWidthSelectKernelComponent::generate_build_options() const -{ - CLBuildOptions build_opts{}; - - return build_opts; -} - -void ClStoreIndirectWidthSelectKernelComponent::allocate_shared_vars(SharedVarTable &vtable) const -{ - vtable.add(_src, _blueprint->impl().group(_src.arg_id), ClKernelArgDescriptor(_src.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "src"); - vtable.add(_dst, _blueprint->impl().group(_dst.arg_id), ClKernelArgDescriptor(_dst.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "dst"); -} - -ClStoreIndirectWidthSelectKernelComponent::TagLUT ClStoreIndirectWidthSelectKernelComponent::get_tag_lut(const SharedVarTable &vtable) const -{ - TagLUT lut{}; - - // Arguments and global shared variables - lut["src"] = vtable.get(_src); - lut["dst"] = vtable.get(_dst); - - // Local build options - lut["meta_kernel_id"] = id(); - lut["DST_TENSOR_TYPE"] = "BUFFER"; - const auto dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); - lut["DST_DATA_TYPE"] = dst_info->data_type(); - - return lut; -} - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.h deleted file mode 100644 index e0b188dc8d..0000000000 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION - -#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLSTOREKERNELCOMPONENTS_H -#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLSTOREKERNELCOMPONENTS_H - -#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClStoreBlockBoundaryAwareKernelComponent : public IClKernelComponent -{ -public: - ClStoreBlockBoundaryAwareKernelComponent(ClKernelBlueprint *blueprint, const Link &src, const Link &dst) - : IClKernelComponent(blueprint), _src{ src }, _dst{ dst } - { - } - ComponentType get_component_type() const override; - std::string get_component_code() const override; - CLBuildOptions generate_build_options() const override; - TagLUT get_tag_lut(const SharedVarTable &vtable) const override; - void allocate_shared_vars(SharedVarTable &vtable) const override; - - virtual std::vector get_links() const override - { - return { _src, _dst }; - } - - virtual std::string name() const override - { - return ""; - } - -private: - Link _src{}; - Link _dst{}; -}; - -class ClStoreIndirectWidthSelectKernelComponent : public IClKernelComponent -{ -public: - ClStoreIndirectWidthSelectKernelComponent(ClKernelBlueprint *blueprint, const Link &src, const Link &dst) - : IClKernelComponent(blueprint), _src{ src }, _dst{ dst } - { - } - ComponentType get_component_type() const override; - std::string get_component_code() const override; - CLBuildOptions generate_build_options() const override; - virtual TagLUT get_tag_lut(const SharedVarTable &vtable) const override; - void allocate_shared_vars(SharedVarTable &vtable) const override; - - virtual std::vector get_links() const override - { - return { _src, _dst }; - } - - virtual std::string name() const override - { - return ""; - } - -private: - Link _src{}; - Link _dst{}; -}; - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif // ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLSTOREKERNELCOMPONENTS_H -#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file -- cgit v1.2.1