diff options
Diffstat (limited to 'src/dynamic_fusion/sketch/gpu/template_writer')
27 files changed, 0 insertions, 4891 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp deleted file mode 100644 index 775b0a0c8c..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "GpuKernelVariableTable.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/ITensorInfo.h" - -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -void GpuKernelVariableTable::declare_variable(const GpuKernelComponentGroup &comp_group, - const ITensorInfo *tensor, - GpuKernelArgumentInfo argument_info, - const std::string &alias) -{ - ARM_COMPUTE_ERROR_ON_MSG(!tensor->has_valid_id(), "Tensor info with valid id expected"); - - // Do not re-declare if the variable associated with the tensor has already been declared - auto it = _vars.find(tensor->id()); - - if (it != _vars.end()) - { - ARM_COMPUTE_ERROR_ON(!(it->second.kernel_argument_info == argument_info)); - return; - } - - const auto target = comp_group.get_tile_for_tensor(tensor); - - if (target != tensor) - { - // If the tensor uses a shared tile, don't declare another variable. - it = _vars.find(target->id()); - - ARM_COMPUTE_ERROR_ON_MSG(it == _vars.end(), "The variable used for this tensor must have been declared."); - - _vars[tensor->id()] = it->second; - } - else - { - // Declare variable associated with the tensor - std::stringstream ss; - ss << alias << "_t" << abs(tensor->id()); - const auto uniq_name = ss.str(); - TensorVariable var{tensor->id(), uniq_name, argument_info}; - - _vars.emplace(tensor->id(), var); - } -} - -GpuKernelVariableTable::TensorVariable GpuKernelVariableTable::get_variable(const ITensorInfo *tensor) const -{ - const auto var = _vars.at(tensor->id()); - return var; -} - -GpuKernelVariableTable::VariableList -GpuKernelVariableTable::get_variable_list(const std::vector<const ITensorInfo *> &tensors) const -{ - VariableList vars{}; - for (const auto &tensor : tensors) - { - if (!tensor->has_valid_id()) - { - continue; - } - vars.push_back(get_variable(tensor)); - } - return vars; -} - -TagVal::TagVal(const GpuKernelVariableTable::TensorVariable &var) : value{var.uniq_name} -{ -} - -TagVal::TagVal(const std::string &val) : value{val} -{ -} - -TagVal::TagVal(const char *val) : value{std::string(val)} -{ -} - -TagVal::TagVal(const DataType &data_type) : value{get_cl_type_from_data_type(data_type)} -{ -} -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h b/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h deleted file mode 100644 index c17f131ada..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_GPUKERNELVARIABLETABLE -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_GPUKERNELVARIABLETABLE - -#include "arm_compute/core/ITensorInfo.h" - -#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" -#include "support/AclRequires.h" -#include "support/StringSupport.h" - -#include <set> -#include <string> -#include <type_traits> -#include <unordered_map> - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class GpuKernelComponentGroup; - -/** A table of all the variables used in the kernel. - * Each kernel has exactly one variable table. - */ -class GpuKernelVariableTable -{ -public: - /** A tensor variable whose main purposes are: - * - Hold the newly assigned @ref GpuKernelArgumentInfo for the associated tensor info - * - Hold the generated variable name for the associated tensor info - */ - struct TensorVariable - { - public: - TensorVariable() = default; - TensorVariable(const TensorVariable &) = default; - TensorVariable &operator=(const TensorVariable &) = default; - ITensorInfo::Id id{ITensorInfo::invalid_tensor_id}; - std::string uniq_name{"empty"}; // Unique name, also the final variable name used in the built code - GpuKernelArgumentInfo kernel_argument_info{}; - bool has_valid_id() const - { - return id != ITensorInfo::invalid_tensor_id; - } - }; - using VariableList = std::vector<TensorVariable>; - -public: - /** Declare a @ref TensorVariable for a corresponding tensor info. - * - * @param[in] comp_group Component group the tensor belongs to - * @param[in] tensor Tensor info with which the new variable is associated - * @param[in] argument_info Kernel argument information - * @param[in] alias Alias for the variable. Will be used as part of the variable name - */ - void declare_variable(const GpuKernelComponentGroup &comp_group, - const ITensorInfo *tensor, - GpuKernelArgumentInfo argument_info, - const std::string &alias = "unnamed"); - /** Get the @ref TensorVariable associated with @p tensor - * - * @param[in] tensor Tensor info to be queried - * - * @return TensorVariable - */ - TensorVariable get_variable(const ITensorInfo *tensor) const; - /** Get the @ref TensorVariable list associated with @p tensors - * @note Empty tensors are skipped - * - * @param[in] tensors List of tensor infos to be queried - * - * @return VariableList - */ - VariableList get_variable_list(const std::vector<const ITensorInfo *> &tensors) const; - -private: - std::map<ITensorInfo::Id, TensorVariable> _vars{}; -}; - -/** A tag value will substitute a tag in a string template during its instantiation */ -struct TagVal -{ - /** Default constructor */ - TagVal() = default; - /** Construct a @ref TagVal from a @ref GpuKernelVariableTable::TensorVariable */ - TagVal(const GpuKernelVariableTable::TensorVariable &var); - /** Construct a @ref TagVal from an integral type */ - template <typename T, ARM_COMPUTE_REQUIRES_TA(std::is_integral<T>::value)> - TagVal(T val) : value{support::cpp11::to_string(val)} - { - } - /** Construct a @ref TagVal from a string */ - TagVal(const std::string &val); - /** Construct a @ref TagVal from a c-style string */ - TagVal(const char *val); - /** Construct a @ref TagVal from a @ref DataType */ - TagVal(const DataType &data_type); - /** Get the value of the TagVal as a converted string */ - std::string value{}; -}; - -/** A tag used in a string template is a placeholder string to be substituted by real values during template instantiation */ -using Tag = std::string; - -/** Tag lookup table. It is used to instantiate a string template */ -using TagLUT = std::unordered_map<Tag, TagVal>; - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_GPUKERNELVARIABLETABLE */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h b/src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h deleted file mode 100644 index 9d0b4f592a..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_IGPUTEMPLATECOMPONENTWRITER -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_IGPUTEMPLATECOMPONENTWRITER - -#include "arm_compute/core/CL/CLCompileContext.h" -#include "arm_compute/core/ITensorInfo.h" -#include "arm_compute/core/Window.h" - -#include "src/dynamic_fusion/sketch/ArgumentPack.h" -#include "src/dynamic_fusion/sketch/gpu/components/Types.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -/** Forward declaration */ -class GpuKernelComponentGroup; -class GpuKernelVariableTable; - -/** An interface used by @ref ClTemplateWriter to write source code for a kernel component - */ -class IGpuTemplateComponentWriter -{ -public: - using ComponentGroup = GpuKernelComponentGroup; - - /**For now all kernel intermeditate/destination tensors are expected to be of type Tensor_4D_t_Buffer*/ - static constexpr GpuKernelArgumentInfo::Type common_tensor_type = GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer; - -public: - /** Constructor - * - * @param[in] id Component id - * @param[in] tensors Tensor arguments to the components - */ - IGpuTemplateComponentWriter(ComponentId id, const ArgumentPack<ITensorInfo> &tensors) : _id{id}, _tensors{tensors} - { - } - /** Destructor */ - virtual ~IGpuTemplateComponentWriter() - { - } - /** Generate kernel component name */ - virtual std::string get_name() const = 0; - /** Generate kernel component code template - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return std::string Component code - */ - virtual std::string get_component_code(const ComponentGroup &comp_group) const = 0; - /** Declare all variables used by the component in the @p vtable - * - * @param[out] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - */ - virtual void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const = 0; - /** Generate the tag look-up table used to instantiate the component code. - * - * @param[in] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - * - * @return TagLUT Tag lookup table - */ - virtual TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const = 0; - /** Generate additional macros used in the component */ - virtual std::string get_additional_macros() const - { - return ""; - } - /** Generate the build options used in the component - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return CLBuildOptions Build options - */ - virtual CLBuildOptions get_build_options(const ComponentGroup &comp_group) const - { - ARM_COMPUTE_UNUSED(comp_group); - return CLBuildOptions{}; - } - /** Generate the component config id string used for tuning */ - virtual std::string get_config_id() const - { - return ""; - } - /** Generate the header list used in the component */ - virtual std::set<std::string> get_headers_list() const - { - return std::set<std::string>{}; - } - /** Generate the execution window for the component */ - virtual Window get_window() const - { - return Window{}; - } - /** Get tensor arguments */ - ArgumentPack<ITensorInfo> tensors() const - { - return _tensors; - } - /** Get component id */ - ComponentId id() const - { - return _id; - } - -private: - ComponentId _id{-1}; - ArgumentPack<ITensorInfo> _tensors{}; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_IGPUTEMPLATECOMPONENTWRITER */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp deleted file mode 100644 index c165fb5f33..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp +++ /dev/null @@ -1,181 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "ClTemplateActivation.h" - -#include "arm_compute/core/utils/ActivationFunctionUtils.h" -#include "arm_compute/core/utils/helpers/AdjustVecSize.h" -#include "arm_compute/core/utils/StringUtils.h" - -#include "src/core/helpers/WindowHelpers.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" -#include "support/StringSupport.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -ClTemplateActivation::ClTemplateActivation(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes) - : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}, _attributes{attributes} -{ - _src = this->tensors().get_const_tensor(TensorType::ACL_SRC); - _dst = this->tensors().get_const_tensor(TensorType::ACL_DST); - ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst); -} - -std::string ClTemplateActivation::get_name() const -{ - return "activation"; -} - -std::string ClTemplateActivation::get_component_code(const ComponentGroup &comp_group) const -{ - std::string code; - const bool is_root = (comp_group.get_root_component()->id() == this->id()); - - code = R"_( -//------------------ START KERNEL {{meta_kernel_id}} --------------------- -)_"; - if (is_root) - { - code += R"_( -// IN(src) {{src}} -// OUT(dst, accum) {{dst}} - -TILE({{DATA_TYPE}}, M0, N0, {{src}}); -TILE(uint, M0, 1, g_dst_indirect_y); -{ - {{src}}_offset_first_element_in_bytes += g_ind_2 * {{src}}_stride_z; - - T_LOAD({{DATA_TYPE}}, M0, N0, {{TENSOR_TYPE}}, {{src}}, g_ind_0, g_ind_1, 1, {{src}}_stride_y, {{src}}); - - T_ACTIVATION({{DATA_TYPE}}, M0, N0, {{ACT}}, {{A_VAL}}, {{B_VAL}}, {{src}}, {{dst}}); -} - -LOOP_UNROLLING(int, i, 0, 1, M0, -{ - g_dst_indirect_y[i].v = (uint)min((int)(g_ind_1 + i), (int)({{arg_dst}}_w) - 1); - g_dst_indirect_y[i].v += (int)(g_ind_2 % {{arg_dst}}_h) * (int)({{arg_dst}}_w); - g_dst_indirect_y[i].v += (int)(g_ind_2 / {{arg_dst}}_h) * (int)({{arg_dst}}_w * {{arg_dst}}_h); -}) -)_"; - } - else - { - code += R"_( -// IN/OUT(src, accum) {{src}} - -{ - T_ACTIVATION({{DATA_TYPE}}, M0, N0, {{ACT}}, {{A_VAL}}, {{B_VAL}}, {{src}}, {{dst}}); -} -)_"; - } - code += R"_( -//------------------ END KERNEL {{meta_kernel_id}} --------------------- -)_"; - return code; -} - -void ClTemplateActivation::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), - "src"); - - vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), - "dst"); -} - -TagLUT ClTemplateActivation::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - TagLUT lut{}; - // Arguments and global shared variables - lut["src"] = vtable.get_variable(_src); - lut["dst"] = vtable.get_variable(_dst); - - const auto dst_argument = vtable.get_variable(comp_group.get_any_dst_tensor()); - lut["arg_dst"] = dst_argument.uniq_name; - - // Local build options - lut["meta_kernel_id"] = id(); - lut["DATA_TYPE"] = get_cl_type_from_data_type(_src->data_type()); - lut["TENSOR_TYPE"] = "BUFFER"; - - const auto f_act = lower_string(string_from_activation_func(_attributes.activation())); - - lut["ACT"] = f_act; - lut["A_VAL"] = float_to_string_with_full_precision(_attributes.a()); - lut["B_VAL"] = float_to_string_with_full_precision(_attributes.b()); - - return lut; -} - -CLBuildOptions ClTemplateActivation::get_build_options(const ComponentGroup &comp_group) const -{ - /// NOTE: For now tile sizes (n0, m0) are set by the execution window. This may change in the future - const auto root_window = comp_group.get_root_component()->template_writer()->get_window(); - const unsigned int n0 = root_window.x().step(); - const unsigned int m0 = root_window.y().step(); - const unsigned int partial_store_n0 = _dst->dimension(0) % n0; - - CLBuildOptions build_opts; - build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); - build_opts.add_option("-DM0=" + support::cpp11::to_string(m0)); - build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0)); - - return build_opts; -} - -std::string ClTemplateActivation::get_config_id() const -{ - std::string config_id{}; - config_id += "activation_"; - config_id += lower_string(string_from_data_type(_src->data_type())); - config_id += "_"; - config_id += support::cpp11::to_string(_src->dimension(0)); - config_id += "_"; - config_id += support::cpp11::to_string(_src->dimension(1)); - return config_id; -} - -std::set<std::string> ClTemplateActivation::get_headers_list() const -{ - return std::set<std::string>{"helpers.h", "tile_helpers.h", "activation_float_helpers.h"}; -} - -Window ClTemplateActivation::get_window() const -{ - ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized"); - const unsigned int n0 = adjust_vec_size(16 / _dst->element_size(), _dst->dimension(0)); - Window win = calculate_max_window(*_dst, Steps(n0)); - return win.collapse(win, Window::DimZ); -} - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h deleted file mode 100644 index 88ee370342..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEACTIVATION -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEACTIVATION - -#include "arm_compute/core/experimental/Types.h" -#include "arm_compute/function_info/ActivationLayerInfo.h" - -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClTemplateActivation final : public IGpuTemplateComponentWriter -{ -public: - using Attributes = ClComponentActivation::Attributes; - - /** Constructor - * - * @param[in] id Component id - * @param[in] tensors Tensor arguments to the components - * @param[in] attributes Component attributes - */ - ClTemplateActivation(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes); - - /** Destructor */ - ~ClTemplateActivation() override = default; - - /** Prevent instances of this class from being copy constructed */ - ClTemplateActivation(const ClTemplateActivation &activation) = delete; - - /** Prevent instances of this class from being copied */ - ClTemplateActivation &operator=(const ClTemplateActivation &activation) = delete; - - /** Allow instances of this class to be move constructed */ - ClTemplateActivation(ClTemplateActivation &&activation) = default; - - /** Allow instances of this class to be moved */ - ClTemplateActivation &operator=(ClTemplateActivation &&activation) = default; - - /** Generate kernel component name */ - std::string get_name() const override; - - /** Generate kernel component code template - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return std::string Component code - */ - std::string get_component_code(const ComponentGroup &comp_group) const override; - - /** Declare all variables used by the component in the @p vtable - * - * @param[out] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - */ - void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - - /** Generate the tag look-up table used to instantiate the component code. - * - * @param[in] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - * - * @return TagLUT Tag lookup table - */ - TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - - /** Generate the build options used in the component - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return CLBuildOptions Build options - */ - CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override; - - /** Generate the component config id string used for tuning */ - std::string get_config_id() const override; - - /** Generate the header list used in the component */ - std::set<std::string> get_headers_list() const override; - - /** Generate the execution window for the component */ - Window get_window() const override; - -private: - const ITensorInfo *_src; - const ITensorInfo *_dst; - Attributes _attributes; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEACTIVATION */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp deleted file mode 100644 index 0da3a73801..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp +++ /dev/null @@ -1,212 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "ClTemplateCast.h" - -#include "arm_compute/core/utils/helpers/AdjustVecSize.h" -#include "arm_compute/core/utils/StringUtils.h" - -#include "src/core/helpers/WindowHelpers.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -ClTemplateCast::ClTemplateCast(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes) - : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}, _attributes{attributes} -{ - _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); - _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0); - - ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst); -} - -std::string ClTemplateCast::get_name() const -{ - const size_t src_size = data_size_from_type(_src->data_type()); - const size_t dst_size = data_size_from_type(_dst->data_type()); - - return (src_size >= dst_size) ? "cast_down" : "cast_up"; -} - -std::string ClTemplateCast::get_component_code(const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - const std::string kernel_name = get_name(); - const auto is_root = (comp_group.get_root_component()->id() == this->id()); - - std::string code = R"_( -//------------------ START KERNEL {{meta_kernel_id}} CAST --------------------- -)_"; - - if (is_root) - { - code += R"_( -// IN_0(src) {{src}} -// OUT(dst, accum) {{dst}} - -TILE(uint, M0, 1, g_dst_indirect_y); -{ - {{src}}_offset_first_element_in_bytes += get_global_id(2) * {{src}}_stride_z; - - TILE({{DATA_TYPE_IN}}, M0, N0, {{tmp}}); - T_LOAD({{DATA_TYPE_IN}}, M0, N0, BUFFER, {{src}}, g_ind_0, g_ind_1, 1, {{src}}_stride_y, {{tmp}}); -)_"; - } - - code += R"_( - LOOP_UNROLLING(int, m0, 0, 1, M0, - { -)_"; - - if (kernel_name == "cast_down" && is_data_type_quantized(_src->data_type())) - { - code += R"_( - {{tmp}}[m0].v ^= (VEC_DATA_TYPE({{DATA_TYPE_IN}}, N0))0x80; -)_"; - } - - if (kernel_name == "cast_down" && - (is_data_type_float(_src->data_type()) || _attributes.convert_policy() == ConvertPolicy::SATURATE)) - { - code += R"_( - {{dst}}[m0].v = CONVERT_SAT({{tmp}}[m0].v, VEC_DATA_TYPE({{DATA_TYPE_OUT}}, N0)); -)_"; - } - else - { - code += R"_( - {{dst}}[m0].v = CONVERT({{tmp}}[m0].v, VEC_DATA_TYPE({{DATA_TYPE_OUT}}, N0)); -)_"; - } - - code += R"_( - }) -)_"; - - if (is_root) - { - code += R"_( - LOOP_UNROLLING(int, i, 0, 1, M0, - { - g_dst_indirect_y[i].v = (uint)min((int)(g_ind_1 + i), (int)({{arg_dst}}_w) - 1); - g_dst_indirect_y[i].v += (int)(g_ind_2 % {{arg_dst}}_h) * (int)({{arg_dst}}_w); - g_dst_indirect_y[i].v += (int)(g_ind_2 / {{arg_dst}}_h) * (int)({{arg_dst}}_w * {{arg_dst}}_h); - }) -} -)_"; - } - - code += R"_( -//------------------ END KERNEL {{meta_kernel_id}} CAST --------------------- -)_"; - - return code; -} - -void ClTemplateCast::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), - "src"); - - vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), - "dst"); -} - -TagLUT ClTemplateCast::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - const auto is_root = (comp_group.get_root_component()->id() == this->id()); - - TagLUT lut{}; - - // Arguments and global shared variables - lut["src"] = vtable.get_variable(_src); - lut["dst"] = vtable.get_variable(_dst); - lut["tmp"] = (is_root) ? lut["src"].value + "_in_data" : lut["src"]; - - const auto dst_argument = vtable.get_variable(comp_group.get_any_dst_tensor()); - lut["arg_dst"] = dst_argument.uniq_name; - - // Local build options - lut["meta_kernel_id"] = id(); - - lut["DATA_TYPE_IN"] = get_cl_type_from_data_type(_src->data_type()); - lut["DATA_TYPE_OUT"] = get_cl_type_from_data_type(_dst->data_type()); - - return lut; -} - -CLBuildOptions ClTemplateCast::get_build_options(const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - const auto root_window = comp_group.get_root_component()->template_writer()->get_window(); - const unsigned int n0 = root_window.x().step(); - const unsigned int m0 = root_window.y().step(); - - // Set build options - CLBuildOptions build_opts{}; - build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); - build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(_src->dimension(0) % n0)); - build_opts.add_option("-DM0=" + support::cpp11::to_string(m0)); - - return build_opts; -} - -std::string ClTemplateCast::get_config_id() const -{ - std::string config_id{}; - - config_id += "_"; - config_id += lower_string(string_from_data_type(_src->data_type())); - config_id += "_"; - config_id += lower_string(string_from_data_type(_dst->data_type())); - config_id += "_"; - config_id += support::cpp11::to_string(_src->dimension(0)); - config_id += "_"; - config_id += support::cpp11::to_string(_src->dimension(1)); - - return config_id; -} - -std::set<std::string> ClTemplateCast::get_headers_list() const -{ - return std::set<std::string>{"helpers.h", "tile_helpers.h"}; -} - -Window ClTemplateCast::get_window() const -{ - ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized"); - - const unsigned int n0 = adjust_vec_size(16 / _dst->element_size(), _dst->dimension(0)); - Window win = calculate_max_window(*_dst, Steps(n0)); - return win.collapse(win, Window::DimZ); -} - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.h deleted file mode 100644 index 3adca4edc9..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATECAST -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATECAST - -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClTemplateCast final : public IGpuTemplateComponentWriter -{ -public: - using Attributes = ClComponentCast::Attributes; - - /** Constructor - * - * @param[in] id Component id - * @param[in] tensors Tensor arguments to the components - * @param[in] attributes Component attributes - */ - ClTemplateCast(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes); - /** Prevent instances of this class from being copy constructed */ - ClTemplateCast(const ClTemplateCast &cast) = delete; - /** Prevent instances of this class from being copied */ - ClTemplateCast &operator=(const ClTemplateCast &cast) = delete; - /** Allow instances of this class to be move constructed */ - ClTemplateCast(ClTemplateCast &&cast) = default; - /** Allow instances of this class to be moved */ - ClTemplateCast &operator=(ClTemplateCast &&cast) = default; - /** Generate kernel component name */ - std::string get_name() const override; - /** Generate kernel component code template - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return std::string Component code - */ - std::string get_component_code(const ComponentGroup &comp_group) const override; - /** Declare all variables used by the component in the @p vtable - * - * @param[out] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - */ - void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the tag look-up table used to instantiate the component code. - * - * @param[in] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - * - * @return TagLUT Tag lookup table - */ - TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the build options used in the component - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return CLBuildOptions Build options - */ - CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override; - /** Generate the component config id string used for tuning */ - std::string get_config_id() const override; - /** Generate the header list used in the component */ - std::set<std::string> get_headers_list() const override; - /** Generate the execution window for the component */ - Window get_window() const override; - -private: - const ITensorInfo *_src; - const ITensorInfo *_dst; - Attributes _attributes; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute - -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATECAST */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp deleted file mode 100644 index 8380620ab2..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp +++ /dev/null @@ -1,364 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "ClTemplateDepthwiseConv2d.h" - -#include "src/core/helpers/WindowHelpers.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -ClTemplateDepthwiseConv2d::ClTemplateDepthwiseConv2d(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes, - const Settings &settings) - : IGpuTemplateComponentWriter{id, tensors}, - _src{}, - _weight{}, - _bias{}, - _dst{}, - _attributes{attributes}, - _settings{settings} -{ - _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); - _weight = this->tensors().get_const_tensor(TensorType::ACL_SRC_1); - if (this->tensors().get_const_tensor(TensorType::ACL_SRC_2)) - { - _bias = this->tensors().get_const_tensor(TensorType::ACL_SRC_2); - } - _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0); - ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _weight, _dst); -} - -std::string ClTemplateDepthwiseConv2d::get_name() const -{ - return "depthwise_conv2d"; -} - -std::string ClTemplateDepthwiseConv2d::get_component_code(const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - constexpr int height_idx = 2; // Data Layout is NHWC - - std::string code = R"_( -//------------------ START KERNEL {{meta_kernel_id}} --------------------- -// IN_0(src) {{src}} -// IN_1(wei) {{weight}} -)_"; - - if (_bias != nullptr && _bias->has_valid_id()) - { - code += R"_( -// IN_1(bia) {{bias}} -)_"; - } - - code += R"_( -// OUT(dst, accum) {{dst}} - -TILE(uint, M0, 1, g_dst_indirect_y); - -{ -#define _IWEI_WIDTH {{WEI_WIDTH}} -#define _IWEI_HEIGHT {{WEI_HEIGHT}} -#define _IDST_WIDTH {{arg_dst}}_w -#define _IDST_HEIGHT {{arg_dst}}_h -#define _IM0_A M0_A -#define _IN0_A N0_A -#define _IM0_B _IWEI_WIDTH -#define _IN0_B N0 -#define _IBOUNDARY_CHECK (!((_IWEI_WIDTH == 1 && _IWEI_HEIGHT == 1 && {{PAD_LEFT}} == 0 && {{PAD_TOP}} == 0 && M0 == 1))) -)_"; - - code += R"_( - const int yo = g_ind_2 % {{arg_dst}}_h; - const int bout = g_ind_2 / {{arg_dst}}_h; -)_"; - - code += R"_( - - int xi = g_ind_1 * {{STRIDE_X}}; - int yi = yo * {{STRIDE_Y}}; - xi -= {{PAD_LEFT}}; - yi -= {{PAD_TOP}}; - - LOOP_UNROLLING(int, i, 0, 1, M0, - { - {{dst}}[i].v = 0; - }) -)_"; - - if (_weight->dimension(height_idx) < 5) - { - code += R"_( - LOOP_UNROLLING(int, yk, 0, 1, _IWEI_HEIGHT, -)_"; - } - else - { - code += R"_( - for(int yk = 0; yk < _IWEI_HEIGHT; ++yk) -)_"; - } - - code += R"_( - { - TILE({{SRC_DATA_TYPE}}, _IM0_A, _IN0_A, a); - - LOOP_UNROLLING(int, i, 0, 1, _IM0_A, - { - a[i].v = 0; - }) - - T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, _IM0_A, _IN0_A, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi + yk * {{DILATION_Y}}, xi, (g_ind_0 / {{DEPTH_MULTIPLIER}}), {{src}}_w, {{src}}_h, {{DILATION_X}}, 1, _IBOUNDARY_CHECK, a); - - TILE({{WEI_DATA_TYPE}}, _IM0_B, _IN0_B, b); - - T_LOAD({{WEI_DATA_TYPE}}, _IM0_B, _IN0_B, {{WEI_TENSOR_TYPE}}, {{weight}}, g_ind_0, yk * _IM0_B, 1, {{weight}}_stride_y, b); - - LOOP_UNROLLING(int, m0, 0, 1, M0, - { - LOOP_UNROLLING(int, xk, 0, 1, _IWEI_WIDTH, - { -)_"; - - if (!_settings.is_fma_available()) - { - code += R"_( - {{dst}}[m0].v += a[xk + m0].v * b[xk].v; -)_"; - } - else - { - code += R"_( - {{dst}}[m0].v = fma(a[xk + m0].v, b[xk].v, {{dst}}[m0].v); -)_"; - } - - code += R"_( - }) - }) - } -)_"; - - if (_weight->dimension(height_idx) < 5) - { - code += R"_( - ) -)_"; - } - - if (_bias && _bias->has_valid_id()) - { - code += R"_( - TILE({{BIA_DATA_TYPE}}, 1, N0, {{bias}}); - - T_LOAD({{BIA_DATA_TYPE}}, 1, N0, BUFFER, {{bias}}, g_ind_0, 0, 0, 0, {{bias}}); - - T_ELTWISE_BROADCAST_ADD_X({{ACC_DATA_TYPE}}, M0, N0, {{dst}}, {{bias}}, {{dst}}); -)_"; - } - - code += R"_( - LOOP_UNROLLING(int, i, 0, 1, M0, - { - g_dst_indirect_y[i].v = (uint)min((int)(g_ind_1 + i), (int)({{arg_dst}}_w) - 1); - g_dst_indirect_y[i].v += (int)(g_ind_2 % {{arg_dst}}_h) * (int)({{arg_dst}}_w); - g_dst_indirect_y[i].v += (int)(g_ind_2 / {{arg_dst}}_h) * (int)({{arg_dst}}_w * {{arg_dst}}_h); - }) -} -//------------------ END KERNEL {{meta_kernel_id}} --------------------- -)_"; - - return code; -} - -void ClTemplateDepthwiseConv2d::declare_variables(GpuKernelVariableTable &vtable, - const ComponentGroup &comp_group) const -{ - const GpuKernelArgumentInfo::Type input_type = _settings.export_input_to_cl_image() - ? GpuKernelArgumentInfo::Type::Tensor_4D_t_Image - : GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer; - - vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(input_type), "src"); - - const GpuKernelArgumentInfo::Type weight_type = _settings.export_weights_to_cl_image() - ? GpuKernelArgumentInfo::Type::Tensor_4D_t_Image - : GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer; - - vtable.declare_variable(comp_group, _weight, GpuKernelArgumentInfo(weight_type), "weight"); - - if (_bias != nullptr && _bias->has_valid_id()) // optional bias - { - vtable.declare_variable(comp_group, _bias, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Vector), "bias"); - } - vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), - "dst"); -} - -TagLUT ClTemplateDepthwiseConv2d::get_tag_lut(const GpuKernelVariableTable &vtable, - const ComponentGroup &comp_group) const -{ - TagLUT lut{}; - - // Arguments and global shared variables - lut["src"] = vtable.get_variable(_src); - lut["weight"] = vtable.get_variable(_weight); - - if (_bias != nullptr && _bias->has_valid_id()) // optional bias - { - lut["bias"] = vtable.get_variable(_bias); - lut["BIA_DATA_TYPE"] = get_cl_type_from_data_type(_bias->data_type()); - } - lut["dst"] = vtable.get_variable(_dst); - - const auto dst_argument = vtable.get_variable(comp_group.get_any_dst_tensor()); - lut["arg_dst"] = dst_argument.uniq_name; - - // Local build options - lut["meta_kernel_id"] = id(); - lut["ACC_DATA_TYPE"] = _src->data_type(); - lut["SRC_DATA_TYPE"] = _src->data_type(); - lut["WEI_DATA_TYPE"] = _weight->data_type(); - - switch (vtable.get_variable(_src).kernel_argument_info.type) - { - case GpuKernelArgumentInfo::Type::Image_Export_To_ClImage2D: - case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D: - case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image: - lut["SRC_TENSOR_TYPE"] = "IMAGE"; - break; - default: - lut["SRC_TENSOR_TYPE"] = "BUFFER"; - break; - } - - switch (vtable.get_variable(_weight).kernel_argument_info.type) - { - case GpuKernelArgumentInfo::Type::Image_Export_To_ClImage2D: - case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D: - case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image: - lut["WEI_TENSOR_TYPE"] = "IMAGE"; - break; - default: - lut["WEI_TENSOR_TYPE"] = "BUFFER"; - break; - } - - // Data Layout is NHWC - constexpr int width_idx = 1; - constexpr int height_idx = 2; - - lut["WEI_WIDTH"] = _weight->dimension(width_idx); - lut["WEI_HEIGHT"] = _weight->dimension(height_idx); - - lut["STRIDE_X"] = _attributes.stride().x(); - lut["STRIDE_Y"] = _attributes.stride().y(); - - lut["PAD_LEFT"] = _attributes.pad().left; - lut["PAD_TOP"] = _attributes.pad().top; - - lut["DILATION_X"] = _attributes.dilation().x(); - lut["DILATION_Y"] = _attributes.dilation().y(); - - lut["DEPTH_MULTIPLIER"] = _attributes.depth_multiplier(); - - return lut; -} - -CLBuildOptions ClTemplateDepthwiseConv2d::get_build_options(const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - constexpr unsigned int width_idx = 1; // Data Layout is NHWC - - const unsigned int n0 = _settings.n0(); - const unsigned int m0 = _settings.m0(); - const unsigned int m0_a = _weight->dimension(width_idx) + m0 - 1; - const unsigned int n0_a = _attributes.depth_multiplier() > 1 ? 1 : n0; - const unsigned int partial_store_n0 = _dst->dimension(0) % n0; - - CLBuildOptions build_opts{}; - - if (_settings.fast_relaxed_math()) - { - build_opts.add_option("-cl-fast-relaxed-math"); - } - else - { - // -cl-fast-relaxed-math also sets -cl-finite-math-only and -cl-unsafe-math-optimizations - // to disable -cl-finite-math-only, we only include -cl-unsafe-math-optimizations - build_opts.add_option("-cl-unsafe-math-optimizations"); - } - - build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); - build_opts.add_option("-DM0=" + support::cpp11::to_string(m0)); - build_opts.add_option("-DN0_A=" + support::cpp11::to_string(n0_a)); - build_opts.add_option("-DM0_A=" + support::cpp11::to_string(m0_a)); - build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0)); - - return build_opts; -} - -std::string ClTemplateDepthwiseConv2d::get_config_id() const -{ - std::string config_id{}; - - config_id += support::cpp11::to_string(_src->dimension(0)); - config_id += "_"; - config_id += support::cpp11::to_string(_src->dimension(1)); - config_id += "_"; - config_id += support::cpp11::to_string(_src->dimension(2)); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(0)); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(1)); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(2)); - config_id += "_"; - config_id += string_from_data_type(_src->data_type()); - - return config_id; -} - -std::set<std::string> ClTemplateDepthwiseConv2d::get_headers_list() const -{ - return std::set<std::string>{"helpers.h", "tile_helpers.h"}; -} - -Window ClTemplateDepthwiseConv2d::get_window() const -{ - ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized"); - - Window win = calculate_max_window(*_dst, Steps(_settings.n0(), _settings.m0())); - return win.collapse(win, Window::DimZ); -} - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h deleted file mode 100644 index 5d04c687c3..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDEPTHWISECONV2D -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDEPTHWISECONV2D - -#include "arm_compute/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.h" - -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClTemplateDepthwiseConv2d final : public IGpuTemplateComponentWriter -{ -public: - using Attributes = ClComponentDepthwiseConv2d::Attributes; - using Settings = ClComponentDepthwiseConv2d::Settings; - /** Constructor - * - * Similar to @ref ClComponentDepthwiseConv2d::validate() - * - * @param[in] id Component id - * @param[in] tensors Tensor arguments to the components - * @param[in] attributes Component attributes - * @param[in] settings Component settings - */ - ClTemplateDepthwiseConv2d(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes, - const Settings &settings); - /** Prevent instances of this class from being copy constructed */ - ClTemplateDepthwiseConv2d(const ClTemplateDepthwiseConv2d &depthwise_conv2d) = delete; - /** Prevent instances of this class from being copied */ - ClTemplateDepthwiseConv2d &operator=(const ClTemplateDepthwiseConv2d &depthwise_conv2d) = delete; - /** Allow instances of this class to be move constructed */ - ClTemplateDepthwiseConv2d(ClTemplateDepthwiseConv2d &&depthwise_conv2d) = default; - /** Allow instances of this class to be moved */ - ClTemplateDepthwiseConv2d &operator=(ClTemplateDepthwiseConv2d &&depthwise_conv2d) = default; - /** Generate kernel component name */ - std::string get_name() const override; - /** Generate kernel component code template - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return std::string Component code - */ - std::string get_component_code(const ComponentGroup &comp_group) const override; - /** Declare all variables used by the component in the @p vtable - * - * @param[out] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - */ - void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the tag look-up table used to instantiate the component code. - * - * @param[in] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - * - * @return TagLUT Tag lookup table - */ - TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the build options used in the component - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return CLBuildOptions Build options - */ - CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override; - /** Generate the component config id string used for tuning */ - std::string get_config_id() const override; - /** Generate the header list used in the component */ - std::set<std::string> get_headers_list() const override; - /** Generate the execution window for the component */ - Window get_window() const override; - -private: - const ITensorInfo *_src; - const ITensorInfo *_weight; - const ITensorInfo *_bias; - const ITensorInfo *_dst; - Attributes _attributes; - Settings _settings; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDEPTHWISECONV2D */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp deleted file mode 100644 index f6a7a58d1d..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp +++ /dev/null @@ -1,393 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "ClTemplateDirectConv2d.h" - -#include "arm_compute/core/utils/helpers/AdjustVecSize.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/core/utils/StringUtils.h" - -#include "src/core/helpers/WindowHelpers.h" -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" -#include "support/StringSupport.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -ClTemplateDirectConv2d::ClTemplateDirectConv2d(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes, - const Settings &settings) - : IGpuTemplateComponentWriter{id, tensors}, - _src{}, - _weight{}, - _bias{}, - _dst{}, - _attributes{attributes}, - _settings{settings} -{ - _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); - _weight = this->tensors().get_const_tensor(TensorType::ACL_SRC_1); - if (this->tensors().get_const_tensor(TensorType::ACL_SRC_2)) - { - _bias = this->tensors().get_const_tensor(TensorType::ACL_SRC_2); - } - _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0); - ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _weight, _dst); -} - -std::string ClTemplateDirectConv2d::get_name() const -{ - return "direct_conv2d"; -} - -std::string ClTemplateDirectConv2d::get_component_code(const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - const auto channel_idx = get_data_layout_dimension_index(_src->data_layout(), DataLayoutDimension::CHANNEL); - const auto k0 = adjust_vec_size(_settings.direct_conv_descriptor().k0, _src->dimension(channel_idx)); - const bool leftover_loop = (_src->dimension(channel_idx) % k0) != 0; - - std::string code = R"_( -//------------------ START KERNEL {{meta_kernel_id}} --------------------- -// IN_0(src) {{src}} -// IN_1(wei) {{weight}} -)_"; - if (_bias && _bias->has_valid_id()) - { - code += R"_( -// IN_1(bia) {{bias}} -)_"; - } - code += R"_( -// OUT(dst, accum) {{dst}} - -TILE(uint, M0, 1, g_dst_indirect_y); - -{ -#define _IWEI_WIDTH {{WEI_WIDTH}} -#define _IWEI_HEIGHT {{WEI_HEIGHT}} -#define _ISRC_WIDTH {{SRC_WIDTH}} -#define _ISRC_HEIGHT {{SRC_HEIGHT}} -#define _ISRC_CHANNELS {{SRC_CHANNELS}} -#define _IDST_WIDTH {{DST_WIDTH}} -#define _IDST_HEIGHT {{DST_HEIGHT}} -#define _IDST_CHANNELS {{DST_CHANNELS}} -#define _IY_MULTIPLIER (_IWEI_WIDTH * _IWEI_HEIGHT) - - TILE(int, M0, 1, xi); - TILE(int, M0, 1, yi); - - // Convert the linear index to coordinate - LOOP_UNROLLING(int, i, 0, 1, M0, - { - xi[0].s[i] = ((g_ind_1 + i) % _IDST_WIDTH) * {{STRIDE_X}}; - yi[0].s[i] = ((g_ind_1 + i) / _IDST_WIDTH) * {{STRIDE_Y}}; - xi[0].s[i] -= {{PAD_LEFT}}; - yi[0].s[i] -= {{PAD_TOP}}; - }) - - LOOP_UNROLLING(int, i, 0, 1, M0, - { - {{dst}}[i].v = 0; - }) - - for(int i = 0; i < (_IWEI_WIDTH * _IWEI_HEIGHT); ++i) - { - int xk = i % _IWEI_WIDTH; - int yk = i / _IWEI_WIDTH; - - TILE(int, 1, M0, my); - - LOOP_UNROLLING(int, i, 0, 1, M0, - { - int x_s = xi[0].s[i] + xk; - int y_s = yi[0].s[i] + yk; - my[0].s[i] = x_s + y_s *_ISRC_WIDTH; - my[0].s[i] = my[0].s[i] + g_ind_2 * (int)(_ISRC_WIDTH * _ISRC_HEIGHT); - my[0].s[i] = select(-1, my[0].s[i], x_s >= 0); - my[0].s[i] = select(-1, my[0].s[i], x_s < _ISRC_WIDTH); - my[0].s[i] = select(-1, my[0].s[i], y_s >= 0); - my[0].s[i] = select(-1, my[0].s[i], y_s < _ISRC_HEIGHT); - }) - - int ck = 0; - for(; ck <= (_ISRC_CHANNELS - K0); ck += K0) - { - TILE({{SRC_DATA_TYPE}}, M0, K0, a); - TILE({{WEI_DATA_TYPE}}, N0, K0, b); - - LOOP_UNROLLING(int, i, 0, 1, M0, - { - a[i].v = {{ZERO_VALUE}}; - }) - - LOOP_UNROLLING(int, i, 0, 1, N0, - { - b[i].v = {{ZERO_VALUE}}; - }) - - T_LOAD2D_INDIRECT({{SRC_DATA_TYPE}}, M0, K0, {{SRC_TENSOR_TYPE}}, {{src}}, ck, {{src}}_stride_y, my, a); - - T_LOAD({{WEI_DATA_TYPE}}, N0, K0, {{WEI_TENSOR_TYPE}}, {{weight}}, ck, g_ind_0 * _IY_MULTIPLIER + i, _IY_MULTIPLIER, {{weight}}_stride_y, b); - - T_MMUL({{SRC_DATA_TYPE}}, {{WEI_DATA_TYPE}}, {{ACC_DATA_TYPE}}, M0, N0, K0, NT, T, a, b, {{dst}}); - } -)_"; - - if (leftover_loop) - { - code += R"_( - for(; ck < _ISRC_CHANNELS; ++ck) - { - TILE({{SRC_DATA_TYPE}}, M0, 1, a); - TILE({{WEI_DATA_TYPE}}, N0, 1, b); - - LOOP_UNROLLING(int, i, 0, 1, M0, - { - a[i].v = {{ZERO_VALUE}}; - }) - - LOOP_UNROLLING(int, i, 0, 1, N0, - { - b[i].v = {{ZERO_VALUE}}; - }) - - T_LOAD2D_INDIRECT({{SRC_DATA_TYPE}}, M0, 1, {{SRC_TENSOR_TYPE}}, {{src}}, ck, {{src}}_stride_y, my, a); - - T_LOAD({{WEI_DATA_TYPE}}, N0, 1, BUFFER, {{weight}}, ck, g_ind_0 * _IY_MULTIPLIER + i, _IY_MULTIPLIER, {{weight}}_stride_y, b); - - T_MMUL({{SRC_DATA_TYPE}}, {{WEI_DATA_TYPE}}, {{ACC_DATA_TYPE}}, M0, N0, 1, NT, T, a, b, {{dst}}); - } - )_"; - } - - code += R"_( -#undef _I_WEI_WIDTH -#undef _I_WEI_HEIGHT -#undef _ISRC_WIDTH -#undef _ISRC_HEIGHT -#undef _ISRC_CHANNELS -#undef _IDST_WIDTH -#undef _IDST_HEIGHT -#undef _IDST_CHANNELS -#undef _IY_MULTIPLIER - - } -)_"; - - if (_bias && _bias->has_valid_id()) - { - code += R"_( - TILE({{BIA_DATA_TYPE}}, 1, N0, bias0); - - T_LOAD({{BIA_DATA_TYPE}}, 1, N0, BUFFER, {{bias}}, g_ind_0, 0, 1, 0, bias0); - - T_ELTWISE_BROADCAST_ADD_X({{ACC_DATA_TYPE}}, M0, N0, {{dst}}, bias0, {{dst}}); - )_"; - } - - code += R"_( - LOOP_UNROLLING(int, i, 0, 1, M0, - { - g_dst_indirect_y[i].v = (uint)min(g_ind_1 + i, (int)({{DST_WIDTH}} * {{DST_HEIGHT}}) - 1); - g_dst_indirect_y[i].v += g_ind_2 * (int)({{DST_WIDTH}} * {{DST_HEIGHT}}); - }) -} -//------------------ END KERNEL {{meta_kernel_id}} --------------------- -)_"; - return code; -} - -void ClTemplateDirectConv2d::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), - "src"); - - const GpuKernelArgumentInfo::Type weight_type = _settings.export_to_cl_image() - ? GpuKernelArgumentInfo::Type::Tensor_4D_t_Image - : GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer; - vtable.declare_variable(comp_group, _weight, GpuKernelArgumentInfo(weight_type), "weight"); - - if (_bias && _bias->has_valid_id()) // optional bias - { - vtable.declare_variable(comp_group, _bias, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Vector), "bias"); - } - vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(common_tensor_type), "dst"); -} - -TagLUT ClTemplateDirectConv2d::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - TagLUT lut{}; - // Arguments and global shared variables - lut["src"] = vtable.get_variable(_src); - lut["weight"] = vtable.get_variable(_weight); - - if (_bias && _bias->has_valid_id()) // optional bias - { - lut["bias"] = vtable.get_variable(_bias); - lut["BIA_DATA_TYPE"] = get_cl_type_from_data_type(_bias->data_type()); - } - lut["dst"] = vtable.get_variable(_dst); - - const auto dst_argument = vtable.get_variable(comp_group.get_any_dst_tensor()); - lut["arg_dst"] = dst_argument.uniq_name; - - // Local build options - lut["meta_kernel_id"] = id(); - lut["ACC_DATA_TYPE"] = _src->data_type(); - lut["SRC_DATA_TYPE"] = _src->data_type(); - lut["WEI_DATA_TYPE"] = _weight->data_type(); - - lut["SRC_TENSOR_TYPE"] = "BUFFER"; - switch (vtable.get_variable(_weight).kernel_argument_info.type) - { - case GpuKernelArgumentInfo::Type::Image_Export_To_ClImage2D: - case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D: - case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image: - { - lut["WEI_TENSOR_TYPE"] = "IMAGE"; - break; - } - default: - { - lut["WEI_TENSOR_TYPE"] = "BUFFER"; - break; - } - } - const auto width_idx = 1; - const auto height_idx = 2; - const auto channel_idx = 0; - - lut["SRC_WIDTH"] = _src->dimension(width_idx); - lut["SRC_HEIGHT"] = _src->dimension(height_idx); - lut["SRC_CHANNELS"] = _src->dimension(channel_idx); - - lut["WEI_WIDTH"] = _weight->dimension(width_idx); - lut["WEI_HEIGHT"] = _weight->dimension(height_idx); - - lut["DST_WIDTH"] = _dst->dimension(width_idx); - lut["DST_HEIGHT"] = _dst->dimension(height_idx); - lut["DST_CHANNELS"] = _dst->dimension(channel_idx); - - lut["STRIDE_X"] = _attributes.stride().x(); - lut["STRIDE_Y"] = _attributes.stride().y(); - - lut["PAD_LEFT"] = _attributes.pad().left; - lut["PAD_TOP"] = _attributes.pad().top; - - lut["ZERO_VALUE"] = 0; - - return lut; -} - -CLBuildOptions ClTemplateDirectConv2d::get_build_options(const ComponentGroup &comp_group) const -{ - const unsigned int channel_idx = get_data_layout_dimension_index(_src->data_layout(), DataLayoutDimension::CHANNEL); - - const auto root_window = comp_group.get_root_component()->template_writer()->get_window(); - const unsigned int n0 = root_window.x().step(); - const unsigned int m0 = root_window.y().step(); - const unsigned int k0 = adjust_vec_size(_settings.direct_conv_descriptor().k0, _src->dimension(channel_idx)); - const unsigned int partial_store_n0 = _dst->dimension(0) % n0; - - CLBuildOptions build_opts{}; - if (_settings.fast_relaxed_math()) - { - build_opts.add_option("-cl-fast-relaxed-math"); - } - else - { - // -cl-fast-relaxed-math also sets -cl-finite-math-only and -cl-unsafe-math-optimizations - // to disable -cl-finite-math-only, we only include -cl-unsafe-math-optimizations - build_opts.add_option("-cl-unsafe-math-optimizations"); - } - - build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); - build_opts.add_option("-DM0=" + support::cpp11::to_string(m0)); - build_opts.add_option("-DK0=" + support::cpp11::to_string(k0)); - build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0)); - - return build_opts; -} - -std::string ClTemplateDirectConv2d::get_config_id() const -{ - const DataType data_type = _src->data_type(); - const DataLayout data_layout = _src->data_layout(); - - const unsigned int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const unsigned int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - - const unsigned int kernel_size = _weight->dimension(width_idx); - - std::string config_id{}; - config_id += lower_string(string_from_data_type(data_type)); - config_id += "_"; - config_id += support::cpp11::to_string(kernel_size); - config_id += "_"; - config_id += support::cpp11::to_string(_attributes.stride().x()); - config_id += "_"; - config_id += support::cpp11::to_string(_attributes.stride().y()); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(width_idx)); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(height_idx)); - config_id += "_"; - config_id += lower_string(string_from_data_layout(data_layout)); - return config_id; -} - -std::set<std::string> ClTemplateDirectConv2d::get_headers_list() const -{ - return std::set<std::string>{"helpers.h", "tile_helpers.h"}; -} - -Window ClTemplateDirectConv2d::get_window() const -{ - ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized"); - - const auto output_shape = _dst->tensor_shape(); - const auto desc = _settings.direct_conv_descriptor(); - - const unsigned int n0 = adjust_vec_size(desc.n0, output_shape[0]); - const unsigned int m0 = adjust_vec_size(desc.m0, output_shape[1] * output_shape[2]); - - // Create and configure kernel window - Window win = calculate_max_window(output_shape, Steps(n0, m0)); - - const size_t dim_y_collapsed = ceil_to_multiple(output_shape[1] * output_shape[2], m0); - win.set(Window::DimY, Window::Dimension(0, dim_y_collapsed, m0)); - win.set(Window::DimZ, Window::Dimension(0, output_shape.total_size_upper(3), 1)); - - return win; -} - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h deleted file mode 100644 index 03c8cd2f15..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDIRECTCONV2D -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDIRECTCONV2D - -#include "arm_compute/core/experimental/Types.h" -#include "arm_compute/dynamic_fusion/sketch/attributes/Conv2dAttributes.h" - -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClTemplateDirectConv2d final : public IGpuTemplateComponentWriter -{ -public: - using Attributes = ClComponentDirectConv2d::Attributes; - using Settings = ClComponentDirectConv2d::Settings; - /** Constructor - * - * Similar to @ref ClComponentDirectConv2d::validate() - * - * @param[in] id Component id - * @param[in] tensors Tensor arguments to the components - * @param[in] attributes Component attributes - * @param[in] settings Component settings - */ - ClTemplateDirectConv2d(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes, - const Settings &settings); - /** Destructor */ - ~ClTemplateDirectConv2d() override = default; - /** Prevent instances of this class from being copy constructed */ - ClTemplateDirectConv2d(const ClTemplateDirectConv2d &direct_conv2d) = delete; - /** Prevent instances of this class from being copied */ - ClTemplateDirectConv2d &operator=(const ClTemplateDirectConv2d &direct_conv2d) = delete; - /** Allow instances of this class to be move constructed */ - ClTemplateDirectConv2d(ClTemplateDirectConv2d &&direct_conv2d) = default; - /** Allow instances of this class to be moved */ - ClTemplateDirectConv2d &operator=(ClTemplateDirectConv2d &&direct_conv2d) = default; - /** Generate kernel component name */ - std::string get_name() const override; - /** Generate kernel component code template - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return std::string Component code - */ - std::string get_component_code(const ComponentGroup &comp_group) const override; - /** Declare all variables used by the component in the @p vtable - * - * @param[out] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - */ - void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the tag look-up table used to instantiate the component code. - * - * @param[in] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - * - * @return TagLUT Tag lookup table - */ - TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the build options used in the component - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return CLBuildOptions Build options - */ - CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override; - /** Generate the component config id string used for tuning */ - std::string get_config_id() const override; - /** Generate the header list used in the component */ - std::set<std::string> get_headers_list() const override; - /** Generate the execution window for the component */ - Window get_window() const override; - -private: - const ITensorInfo *_src; - const ITensorInfo *_weight; - const ITensorInfo *_bias; - const ITensorInfo *_dst; - Attributes _attributes; - Settings _settings; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDIRECTCONV2D */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp deleted file mode 100644 index 78bff3c3f3..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp +++ /dev/null @@ -1,274 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "ClTemplateElementwiseBinary.h" - -#include "arm_compute/core/utils/helpers/AdjustVecSize.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/core/utils/StringUtils.h" - -#include "src/core/helpers/WindowHelpers.h" -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" -#include "support/StringSupport.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -constexpr unsigned int vector_size_byte_opencl = 16; - -ClTemplateElementwiseBinary::ClTemplateElementwiseBinary(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes) - : IGpuTemplateComponentWriter{id, tensors}, _lhs{}, _rhs{}, _dst{}, _attributes{attributes} -{ - _lhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); - _rhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_1); - _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0); - ARM_COMPUTE_ERROR_ON_NULLPTR(_lhs, _rhs, _dst); -} - -std::string ClTemplateElementwiseBinary::get_name() const -{ - return "elementwise_binary"; -} - -std::string ClTemplateElementwiseBinary::get_component_code(const ComponentGroup &comp_group) const -{ - std::string code; - const bool is_root = (comp_group.get_root_component()->id() == this->id()); - const bool is_lhs_input = comp_group.is_input_tensor(_lhs); - const bool is_rhs_input = comp_group.is_input_tensor(_rhs); - - code = - R"_( - //------------------ START KERNEL {{meta_kernel_id}} {{ELTWISE_OP}} --------------------- -)_"; - - if (is_root) - { - code += - R"_( - TILE(uint, M0, 1, g_dst_indirect_y); -)_"; - } - - if (is_lhs_input) - { - code += - R"_( - TILE({{DATA_TYPE}}, {{lhs_m0}}, N0, {{lhs}}); -)_"; - } - - if (is_rhs_input) - { - code += - R"_( - TILE({{DATA_TYPE}}, {{rhs_m0}}, N0, {{rhs}}); -)_"; - } - - code += - R"_( - { -)_"; - - if (is_lhs_input) - { - code += - R"_( - {{lhs}}_offset_first_element_in_bytes += g_ind_2 * {{lhs}}_stride_w; - T_LOAD({{DATA_TYPE}}, {{lhs_m0}}, {{lhs_n0}}, BUFFER, {{lhs}}, {{lhs_start_ind_0}}, {{lhs_start_ind_1}}, 1, {{lhs}}_stride_y, {{lhs}}); -)_"; - } - - if (is_rhs_input) - { - code += - R"_( - {{rhs}}_offset_first_element_in_bytes += g_ind_2 * {{rhs}}_stride_w; - T_LOAD({{DATA_TYPE}}, {{rhs_m0}}, {{rhs_n0}}, BUFFER, {{rhs}}, {{rhs_start_ind_0}}, {{rhs_start_ind_1}}, 1, {{rhs}}_stride_y, {{rhs}}); -)_"; - } - - code += - R"_( - T_ELTWISE_{{BROADCAST_OP}}{{ELTWISE_OP}}({{DATA_TYPE}}, M0, N0, {{lhs}}, {{rhs}}, {{dst}}); -)_"; - - if (is_root) - { - // Calculate the destination indirect Y - code += - R"_( - LOOP_UNROLLING(int, i, 0, 1, M0, - { - g_dst_indirect_y[i].v = (uint)min(g_ind_1 + i, (int)({{arg_dst}}_w * {{arg_dst}}_h) - 1); - g_dst_indirect_y[i].v += g_ind_2 * (int)({{arg_dst}}_w * {{arg_dst}}_h); - }) -)_"; - } - - code += - R"_( - } - //------------------ END KERNEL {{meta_kernel_id}} {{ELTWISE_OP}} --------------------- -)_"; - - return code; -} - -void ClTemplateElementwiseBinary::declare_variables(GpuKernelVariableTable &vtable, - const ComponentGroup &comp_group) const -{ - vtable.declare_variable(comp_group, _lhs, GpuKernelArgumentInfo(common_tensor_type), "lhs"); - - vtable.declare_variable(comp_group, _rhs, GpuKernelArgumentInfo(common_tensor_type), "rhs"); - - vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(common_tensor_type), "dst"); -} - -TagLUT ClTemplateElementwiseBinary::get_tag_lut(const GpuKernelVariableTable &vtable, - const ComponentGroup &comp_group) const -{ - TagLUT lut{}; - - // Local build options - lut["meta_kernel_id"] = id(); - lut["DATA_TYPE"] = get_cl_type_from_data_type(_lhs->data_type()); - // Arguments and global shared variables - - lut["lhs"] = vtable.get_variable(_lhs); - lut["rhs"] = vtable.get_variable(_rhs); - lut["dst"] = vtable.get_variable(_dst); - lut["arg_dst"] = vtable.get_variable(comp_group.get_any_dst_tensor()); - - switch (_attributes.operation()) - { - case Attributes::ElementwiseOp::Add: - lut["ELTWISE_OP"] = "ADD"; - break; - case Attributes::ElementwiseOp::Sub: - lut["ELTWISE_OP"] = "SUB"; - break; - case Attributes::ElementwiseOp::Mul: - lut["ELTWISE_OP"] = "MUL"; - break; - default: - ARM_COMPUTE_ERROR("Arithmetic Operation not supported"); - } - - ARM_COMPUTE_ERROR_ON(comp_group.is_intermediate_tensor(_lhs) && - detail::have_different_dimensions(_lhs->tensor_shape(), _dst->tensor_shape(), 0)); - ARM_COMPUTE_ERROR_ON(comp_group.is_intermediate_tensor(_rhs) && - detail::have_different_dimensions(_rhs->tensor_shape(), _dst->tensor_shape(), 0)); - - // Set broadcast parameters - // PRE: All tensors are broadcast-compatible - const auto &lhs_dims = _lhs->tensor_shape(); - const auto &rhs_dims = _rhs->tensor_shape(); - const auto &dst_dims = _dst->tensor_shape(); - - const auto lhs_broadcast_x = dst_dims[0] != 1 && lhs_dims[0] == 1; - const auto rhs_broadcast_x = dst_dims[0] != 1 && rhs_dims[0] == 1; - const auto lhs_broadcast_y = dst_dims[1] != 1 && lhs_dims[1] == 1; - const auto rhs_broadcast_y = dst_dims[1] != 1 && rhs_dims[1] == 1; - const auto lhs_broadcast_z = dst_dims[2] != 1 && lhs_dims[2] == 1; - const auto rhs_broadcast_z = dst_dims[2] != 1 && rhs_dims[2] == 1; - - const auto lhs_broadcast_yz = lhs_broadcast_y && lhs_broadcast_z; - const auto rhs_broadcast_yz = rhs_broadcast_y && rhs_broadcast_z; - - lut["lhs_n0"] = (lhs_broadcast_x) ? "1" : "N0"; - lut["lhs_start_ind_0"] = (lhs_broadcast_x) ? "0" : "g_ind_0"; - lut["rhs_n0"] = (rhs_broadcast_x) ? "1" : "N0"; - lut["rhs_start_ind_0"] = (rhs_broadcast_x) ? "0" : "g_ind_0"; - - lut["lhs_m0"] = (lhs_broadcast_yz) ? "1" : "M0"; - lut["lhs_start_ind_1"] = (lhs_broadcast_yz) ? "0" : "g_ind_1"; - lut["rhs_m0"] = (rhs_broadcast_yz) ? "1" : "M0"; - lut["rhs_start_ind_1"] = (rhs_broadcast_yz) ? "0" : "g_ind_1"; - - lut["BROADCAST_OP"] = (lhs_broadcast_yz) ? "BROADCAST_LHS_X_" : (rhs_broadcast_yz) ? "BROADCAST_RHS_X_" : ""; - - return lut; -} - -CLBuildOptions ClTemplateElementwiseBinary::get_build_options(const ComponentGroup &comp_group) const -{ - CLBuildOptions build_opts{}; - /// NOTE: For now tile sizes (n0, m0) are set by the execution window. This may change in the future - const auto root_window = comp_group.get_root_component()->template_writer()->get_window(); - const unsigned int n0 = root_window.x().step(); - const unsigned int m0 = root_window.y().step(); - const unsigned int partial_store_n0 = _dst->dimension(0) % n0; - - build_opts.add_option("-DM0=" + support::cpp11::to_string(m0)); - build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); - build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(_lhs->data_type())); - build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0)); - - return build_opts; -} - -std::string ClTemplateElementwiseBinary::get_config_id() const -{ - std::string config_id{}; - config_id += lower_string(string_from_data_type(_dst->data_type())); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(0)); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(1)); - config_id += "_"; - config_id += lower_string(string_from_data_layout(_dst->data_layout())); - - return config_id; -} - -std::set<std::string> ClTemplateElementwiseBinary::get_headers_list() const -{ - return std::set<std::string>{"helpers.h", "tile_helpers.h"}; -} - -Window ClTemplateElementwiseBinary::get_window() const -{ - ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized"); - - TensorShape output_shape = _dst->tensor_shape(); - // Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) and upper dimensions unchanged - // This is in line with the collapsing convention used by operators like Conv2d - output_shape.collapse(2U, 1U); - const unsigned int num_elems_processed_per_iteration = - adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0)); - Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration)); - - return win; -} - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h deleted file mode 100644 index 991c0eca44..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEELEMENTWISEBINARY -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEELEMENTWISEBINARY - -#include "arm_compute/core/experimental/Types.h" - -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClTemplateElementwiseBinary final : public IGpuTemplateComponentWriter -{ -public: - using Attributes = ClComponentElementwiseBinary::Attributes; - - /** Constructor - * - * Similar to @ref ClComponentElementwiseBinary::validate() - * - * @param[in] id Component id - * @param[in] tensors Tensor arguments to the components - * @param[in] attributes Component attributes - */ - ClTemplateElementwiseBinary(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes); - /** Prevent instances of this class from being copy constructed */ - ClTemplateElementwiseBinary(const ClTemplateElementwiseBinary &elementwise) = delete; - /** Prevent instances of this class from being copied */ - ClTemplateElementwiseBinary &operator=(const ClTemplateElementwiseBinary &elementwise) = delete; - /** Allow instances of this class to be move constructed */ - ClTemplateElementwiseBinary(ClTemplateElementwiseBinary &&elementwise) = default; - /** Allow instances of this class to be moved */ - ClTemplateElementwiseBinary &operator=(ClTemplateElementwiseBinary &&elementwise) = default; - - /** Generate kernel component name */ - std::string get_name() const override; - - /** Generate kernel component code template - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return std::string Component code - */ - std::string get_component_code(const ComponentGroup &comp_group) const override; - - /** Declare all variables used by the component in the @p vtable - * - * @param[out] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - */ - void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - - /** Generate the tag look-up table used to instantiate the component code. - * - * @param[in] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - * - * @return TagLUT Tag lookup table - */ - TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - - /** Generate the build options used in the component - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return CLBuildOptions Build options - */ - CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override; - - /** Generate the component config id string used for tuning */ - std::string get_config_id() const override; - - /** Generate the header list used in the component */ - std::set<std::string> get_headers_list() const override; - - /** Generate the execution window for the component */ - Window get_window() const override; - -private: - const ITensorInfo *_lhs; - const ITensorInfo *_rhs; - const ITensorInfo *_dst; - Attributes _attributes; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEELEMENTWISEBINARY */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp deleted file mode 100644 index 522c33a022..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp +++ /dev/null @@ -1,267 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h" - -#include "arm_compute/core/utils/helpers/AdjustVecSize.h" -#include "arm_compute/core/utils/StringUtils.h" - -#include "src/core/helpers/WindowHelpers.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" -#include "support/StringSupport.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -namespace -{ -constexpr unsigned int serial_vector_size = 8; -} // namespace -ClTemplateLogits1DMaxShiftExpSum::ClTemplateLogits1DMaxShiftExpSum(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes) - : IGpuTemplateComponentWriter{id, tensors}, _src{}, _sum{}, _dst{}, _attributes{attributes} -{ - _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); - _sum = this->tensors().get_const_tensor(TensorType::ACL_DST_0); - _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_1); - ARM_COMPUTE_ERROR_ON_NULLPTR(_src); - ARM_COMPUTE_ERROR_ON_NULLPTR(_sum); - ARM_COMPUTE_ERROR_ON_NULLPTR(_dst); -} - -std::string ClTemplateLogits1DMaxShiftExpSum::get_name() const -{ - return "logits_1d_max_shift_exp_sum"; -} - -std::string ClTemplateLogits1DMaxShiftExpSum::get_component_code(const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - std::string code = R"_( -//------------------ START KERNEL {{meta_kernel_id}} --------------------- -#define VEC_TYPE VEC_DATA_TYPE({{DATA_TYPE}}, N0) -#define SELECT_TYPE SELECT_VEC_DATA_TYPE({{DATA_TYPE}}, N0) -{ - __global uchar *src_addr = {{src}}_ptr + {{src}}_offset_first_element_in_bytes + g_ind_1 * {{src}}_stride_y + g_ind_2 * {{src}}_stride_z; - __global uchar *dst_addr = {{dst}}_ptr + {{dst}}_offset_first_element_in_bytes + g_ind_1 * {{dst}}_stride_y + g_ind_2 * {{dst}}_stride_z; - Image sum = CONVERT_TENSOR3D_TO_IMAGE_STRUCT({{sum}}); - VEC_TYPE max_val_vec = (VEC_TYPE)({{MINVAL}}); -)_"; - - const bool beta_defined = (_attributes.beta() != 1.f); - - if (beta_defined) - { - code += R"_( - VEC_TYPE beta = (VEC_TYPE){{BETA}}; -)_"; - } - - constexpr unsigned int _serial_vector_size = 8; - const unsigned int reduction_dim_size = _src->dimension(0); - const unsigned int vector_size = adjust_vec_size(_serial_vector_size, reduction_dim_size); - const bool non_multiple_of_n0 = ((reduction_dim_size % vector_size) != 0); - - if (non_multiple_of_n0) - { - code += R"_( - VEC_TYPE data = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)src_addr); - SELECT_TYPE widx = (SELECT_TYPE)PARTIAL_N0 > VEC_OFFS(SELECT_DATA_TYPE({{DATA_TYPE}}), N0); - max_val_vec = max(max_val_vec, select((VEC_TYPE)({{MINVAL}}), data, widx)); -)_"; - } - - code += R"_( - for(uint i = PARTIAL_N0; i < {{SRC_WIDTH}}; i += N0) - { - VEC_TYPE data = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(src_addr + i * sizeof({{DATA_TYPE}}))); - max_val_vec = max(data, max_val_vec); - } - - {{DATA_TYPE}} max_val = MAX_REDUCE(max_val_vec, N0); - VEC_TYPE sum1D = 0; -)_"; - - if (non_multiple_of_n0) - { - code += R"_( - data -= max_val; -)_"; - if (beta_defined) - { - code += R"_( - data *= beta; -)_"; - } - - if (_attributes.is_log_softmax()) - { - code += R"_( - VSTORE_PARTIAL(N0, PARTIAL_N0) - (data, 0, (__global {{DATA_TYPE}} *)dst_addr); - data = exp(data); - data = select(0, data, widx); -)_"; - } - else - { - code += R"_( - data = exp(data); - data = select(0, data, widx); - VSTORE_PARTIAL(N0, PARTIAL_N0) - (data, 0, (__global {{DATA_TYPE}} *)dst_addr); -)_"; - } - - code += R"_( - sum1D += data; -)_"; - } - code += R"_( - for(uint i = PARTIAL_N0; i < {{SRC_WIDTH}}; i += N0) - { - VEC_TYPE data = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(src_addr + i * sizeof({{DATA_TYPE}}))); - data -= max_val; -)_"; - - if (beta_defined) - { - code += R"_( - data *= beta; -)_"; - } - - if (_attributes.is_log_softmax()) - { - code += R"_( - VSTORE(N0) - (data, 0, (__global {{DATA_TYPE}} *)(dst_addr + i * sizeof({{DATA_TYPE}}))); - data = exp(data); -)_"; - } - else - { - code += R"_( - data = exp(data); - VSTORE(N0) - (data, 0, (__global {{DATA_TYPE}} *)(dst_addr + i * sizeof({{DATA_TYPE}}))); -)_"; - } - - code += R"_( - sum1D += data; - } -)_"; - - code += R"_( - *((__global {{DATA_TYPE}} *)sum.ptr) = SUM_REDUCE(sum1D, N0); -} -//------------------ END KERNEL {{meta_kernel_id}} --------------------- -)_"; - - return code; -} - -void ClTemplateLogits1DMaxShiftExpSum::declare_variables(GpuKernelVariableTable &vtable, - const ComponentGroup &comp_group) const -{ - vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "src"); - - vtable.declare_variable(comp_group, _sum, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "sum"); - - vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "dst"); -} - -TagLUT ClTemplateLogits1DMaxShiftExpSum::get_tag_lut(const GpuKernelVariableTable &vtable, - const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - TagLUT lut{}; - - // Arguments and global shared variables - lut["src"] = vtable.get_variable(_src); - lut["sum"] = vtable.get_variable(_sum); - lut["dst"] = vtable.get_variable(_dst); - - // Local build options - lut["meta_kernel_id"] = id(); - - const DataType data_type = _src->data_type(); - - lut["DATA_TYPE"] = get_cl_type_from_data_type(data_type); - lut["BETA"] = float_to_string_with_full_precision(_attributes.beta()); - lut["MINVAL"] = (data_type == DataType::F16) ? std::string("-HALF_MAX") : std::string("-FLT_MAX"); - lut["SRC_WIDTH"] = support::cpp11::to_string(_src->dimension(0)); - - return lut; -} - -CLBuildOptions ClTemplateLogits1DMaxShiftExpSum::get_build_options(const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - CLBuildOptions build_opts{}; - - const unsigned int reduction_dim_size = _src->dimension(0); - const unsigned int vector_size = adjust_vec_size(serial_vector_size, reduction_dim_size); - - build_opts.add_option("-DN0=" + support::cpp11::to_string(vector_size)); - build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string((reduction_dim_size % vector_size))); - - return build_opts; -} - -std::string ClTemplateLogits1DMaxShiftExpSum::get_config_id() const -{ - std::string config_id = get_name(); - - config_id += "_"; - config_id += support::cpp11::to_string(_src->dimension(0)); - config_id += "_"; - config_id += string_from_data_type(_src->data_type()); - - return config_id; -} - -std::set<std::string> ClTemplateLogits1DMaxShiftExpSum::get_headers_list() const -{ - return std::set<std::string>{"helpers.h", "tile_helpers.h"}; -} - -Window ClTemplateLogits1DMaxShiftExpSum::get_window() const -{ - ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized"); - - Window win = calculate_max_window(*_dst, Steps(_src->dimension(0))); - return win.collapse(win, Window::DimZ); -} - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h deleted file mode 100644 index ac9ddaa9d4..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATELOGITS1DMAXSHIFTEXPSUM -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATELOGITS1DMAXSHIFTEXPSUM - -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClTemplateLogits1DMaxShiftExpSum final : public IGpuTemplateComponentWriter -{ -public: - using Attributes = ClComponentLogits1DMaxShiftExpSum::Attributes; - - /** Constructor - * - * @param[in] id Component id - * @param[in] tensors Tensor arguments to the components - * @param[in] attributes Component attributes - */ - ClTemplateLogits1DMaxShiftExpSum(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes); - /** Prevent instances of this class from being copy constructed */ - ClTemplateLogits1DMaxShiftExpSum(const ClTemplateLogits1DMaxShiftExpSum &) = delete; - /** Prevent instances of this class from being copied */ - ClTemplateLogits1DMaxShiftExpSum &operator=(const ClTemplateLogits1DMaxShiftExpSum &) = delete; - /** Allow instances of this class to be move constructed */ - ClTemplateLogits1DMaxShiftExpSum(ClTemplateLogits1DMaxShiftExpSum &&) = default; - /** Allow instances of this class to be moved */ - ClTemplateLogits1DMaxShiftExpSum &operator=(ClTemplateLogits1DMaxShiftExpSum &&) = default; - /** Generate kernel component name */ - std::string get_name() const override; - /** Generate kernel component code template - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return std::string Component code - */ - std::string get_component_code(const ComponentGroup &comp_group) const override; - /** Declare all variables used by the component in the @p vtable - * - * @param[out] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - */ - void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the tag look-up table used to instantiate the component code. - * - * @param[in] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - * - * @return TagLUT Tag lookup table - */ - TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the build options used in the component - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return CLBuildOptions Build options - */ - CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override; - /** Generate the component config id string used for tuning */ - std::string get_config_id() const override; - /** Generate the header list used in the component */ - std::set<std::string> get_headers_list() const override; - /** Generate the execution window for the component */ - Window get_window() const override; - -private: - const ITensorInfo *_src; // input - const ITensorInfo *_sum; // exponentiated and summed input - const ITensorInfo *_dst; // exponentiated input - Attributes _attributes; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute - -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATELOGITS1DMAXSHIFTEXPSUM */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp deleted file mode 100644 index 7d7c3e6673..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.h" - -#include "arm_compute/core/utils/helpers/AdjustVecSize.h" - -#include "src/core/helpers/WindowHelpers.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" -#include "support/StringSupport.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -ClTemplateLogits1DNorm::ClTemplateLogits1DNorm(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes) - : IGpuTemplateComponentWriter{id, tensors}, _src{}, _sum{}, _dst{}, _attributes{attributes} -{ - _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); - _sum = this->tensors().get_const_tensor(TensorType::ACL_SRC_1); - _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0); - ARM_COMPUTE_ERROR_ON_NULLPTR(_src); - ARM_COMPUTE_ERROR_ON_NULLPTR(_sum); - ARM_COMPUTE_ERROR_ON_NULLPTR(_dst); -} - -std::string ClTemplateLogits1DNorm::get_name() const -{ - return "logits_1d_norm"; -} - -std::string ClTemplateLogits1DNorm::get_component_code(const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - std::string code = R"_( -//------------------ START KERNEL {{meta_kernel_id}} --------------------- -{ - const int x_offs = g_ind_0 * sizeof({{DATA_TYPE}}); - __global uchar *src_addr = {{src}}_ptr + {{src}}_offset_first_element_in_bytes + x_offs + g_ind_1 * {{src}}_stride_y + g_ind_2 * {{src}}_stride_z; - __global uchar *dst_addr = {{dst}}_ptr + {{dst}}_offset_first_element_in_bytes + x_offs + g_ind_1 * {{dst}}_stride_y + g_ind_2 * {{dst}}_stride_z; - Image sum = CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP({{sum}}); -)_"; - // Load max value of 1D logits vector (row) - code += R"_( - {{DATA_TYPE}} sum_val = *((__global {{DATA_TYPE}} *)offset(&sum, 0, g_ind_1)); - VEC_DATA_TYPE({{DATA_TYPE}}, N0) - data0 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)src_addr); -)_"; - - if (_attributes.is_log_softmax()) - { - code += R"_( - sum_val = log(sum_val); - data0 -= sum_val; -)_"; - } - else - { - code += R"_( - data0 /= sum_val; -)_"; - } - - code += R"_( - STORE_VECTOR_SELECT(data, {{DATA_TYPE}}, dst_addr, N0, PARTIAL_N0, PARTIAL_N0 != 0 && g_ind_0 == 0); -} -//------------------ END KERNEL {{meta_kernel_id}} --------------------- -)_"; - - return code; -} - -void ClTemplateLogits1DNorm::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "src"); - - vtable.declare_variable(comp_group, _sum, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "sum"); - - vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "dst"); -} - -TagLUT ClTemplateLogits1DNorm::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - TagLUT lut{}; - - // Arguments and global shared variables - lut["src"] = vtable.get_variable(_src); - lut["sum"] = vtable.get_variable(_sum); - lut["dst"] = vtable.get_variable(_dst); - - // Local build options - lut["meta_kernel_id"] = id(); - - const DataType data_type = _src->data_type(); - - lut["DATA_TYPE"] = get_cl_type_from_data_type(data_type); - - return lut; -} - -CLBuildOptions ClTemplateLogits1DNorm::get_build_options(const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - CLBuildOptions build_opts{}; - - const auto root_window = comp_group.get_root_component()->template_writer()->get_window(); - const unsigned int n0 = root_window.x().step(); - build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); - build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string((_src->dimension(0) % n0))); - - return build_opts; -} - -std::string ClTemplateLogits1DNorm::get_config_id() const -{ - std::string config_id = get_name(); - - config_id += "_"; - config_id += support::cpp11::to_string(_src->dimension(0)); - config_id += "_"; - config_id += string_from_data_type(_src->data_type()); - - return config_id; -} - -std::set<std::string> ClTemplateLogits1DNorm::get_headers_list() const -{ - return std::set<std::string>{"helpers.h", "tile_helpers.h"}; -} - -Window ClTemplateLogits1DNorm::get_window() const -{ - ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized"); - constexpr unsigned int serial_vector_size = 16; - const unsigned int vector_size = adjust_vec_size(serial_vector_size, _src->dimension(0)); - - Window win = calculate_max_window(*_src, Steps(vector_size)); - return win.collapse(win, Window::DimZ); -} - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.h deleted file mode 100644 index 5a74be5842..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATELOGITS1DNORM -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATELOGITS1DNORM - -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClTemplateLogits1DNorm final : public IGpuTemplateComponentWriter -{ -public: - using Attributes = ClComponentLogits1DNorm::Attributes; - - /** Constructor - * - * @param[in] id Component id - * @param[in] tensors Tensor arguments to the components - * @param[in] attributes Component attributes - */ - ClTemplateLogits1DNorm(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes); - /** Prevent instances of this class from being copy constructed */ - ClTemplateLogits1DNorm(const ClTemplateLogits1DNorm &) = delete; - /** Prevent instances of this class from being copied */ - ClTemplateLogits1DNorm &operator=(const ClTemplateLogits1DNorm &) = delete; - /** Allow instances of this class to be move constructed */ - ClTemplateLogits1DNorm(ClTemplateLogits1DNorm &&) = default; - /** Allow instances of this class to be moved */ - ClTemplateLogits1DNorm &operator=(ClTemplateLogits1DNorm &&) = default; - /** Generate kernel component name */ - std::string get_name() const override; - /** Generate kernel component code template - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return std::string Component code - */ - std::string get_component_code(const ComponentGroup &comp_group) const override; - /** Declare all variables used by the component in the @p vtable - * - * @param[out] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - */ - void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the tag look-up table used to instantiate the component code. - * - * @param[in] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - * - * @return TagLUT Tag lookup table - */ - TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the build options used in the component - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return CLBuildOptions Build options - */ - CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override; - /** Generate the component config id string used for tuning */ - std::string get_config_id() const override; - /** Generate the header list used in the component */ - std::set<std::string> get_headers_list() const override; - /** Generate the execution window for the component */ - Window get_window() const override; - -private: - const ITensorInfo *_src; // exponentiated input - const ITensorInfo *_sum; // exponentiated and summed input - const ITensorInfo *_dst; // normalization of input with _sum - - Attributes _attributes; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute - -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATELOGITS1DNORM */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp deleted file mode 100644 index 8936db6abe..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp +++ /dev/null @@ -1,470 +0,0 @@ -/* - * Copyright (c) 2023-2024 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "ClTemplatePool2d.h" - -#include "arm_compute/core/utils/helpers/AdjustVecSize.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/core/utils/StringUtils.h" - -#include "src/core/helpers/WindowHelpers.h" -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" -#include "support/StringSupport.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -namespace -{ -// Shape indexes for NHWC Datalayout -constexpr static int32_t height_idx = 2; -constexpr static int32_t width_idx = 1; -constexpr static int32_t channel_idx = 0; -} // namespace -ClTemplatePool2d::ClTemplatePool2d(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes, - const Settings &settings) - : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}, _attributes{attributes}, _settings{settings} -{ - _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); - _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0); - ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst); -} - -std::string ClTemplatePool2d::get_name() const -{ - return "pool2d"; -} - -std::string ClTemplatePool2d::get_component_code(const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - // Condition to use 2x2 optimized kernel - if (_attributes.pool_size() == Size2D(2, 2)) - { - return get_2x2_kernel_code(); - } - else - { - return get_MxN_kernel_code(); - } -} - -std::string ClTemplatePool2d::get_MxN_kernel_code() const -{ - const auto pool_type = _attributes.pool_type(); - const bool fp_mixed_precision = (_src->data_type() == DataType::F16) && pool_type != PoolingType::MAX; - - // Define pool op macro. - std::string pool_op = (pool_type == PoolingType::AVG) ? R"_(#define POOL_OP(x,y) ((x) + (y)))_" - : R"_(#define POOL_OP(x,y) (fmax((x), (y))) )_"; - - // Kernel start - // Note: If C is not multiple of N0, we shift back of PARTIAL_N0 elements to compute the leftover elements for get_global_id(0) == 0 - // Note: If C is less than N0, N0 should be SHRINKED to the closest smaller N0. This operation is performed on the host side - std::string code = R"_( -//------------------ START KERNEL {{meta_kernel_id}} --------------------- -// IN_0(src) {{src}} -// OUT(dst, accum) {{dst}} - -{ - const int idx_out_c = g_ind_0; - const int idx_out_w = g_ind_1; -)_"; - - // Add macro for POOL_OP - code += "\n" + pool_op + "\n"; - - code += R"_( - const int idx_out_h = g_ind_2 % {{DST_HEIGHT}}; - const int idx_out_n = g_ind_2 / {{DST_HEIGHT}}; -)_"; - - // Define common variables. - code += R"_( - __global unsigned char *in_base_ptr = {{src}}_ptr + {{src}}_offset_first_element_in_bytes + idx_out_c * sizeof({{DATA_TYPE}}) + idx_out_n * {{src}}_stride_w; - - __global unsigned char *out_base_ptr = {{dst}}_ptr + {{dst}}_offset_first_element_in_bytes + idx_out_c * sizeof({{DATA_TYPE}}) + idx_out_w * {{dst}}_stride_y + idx_out_h * {{dst}}_stride_z + idx_out_n * {{dst}}_stride_w; - - VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0) - res0 = {{INITIAL_VALUE}}; - - const int idx_in_w = idx_out_w * {{STRIDE_X}} - {{PAD_X}}; - const int idx_in_h = idx_out_h * {{STRIDE_Y}} - {{PAD_Y}}; - - const int pool_x_s = max((int)0, -idx_in_w); - const int pool_x_e = min((int){{POOL_SIZE_X}}, (int){{SRC_WIDTH}} - idx_in_w); - const int pool_y_s = max((int)0, -idx_in_h); - const int pool_y_e = min((int){{POOL_SIZE_Y}}, (int){{SRC_HEIGHT}} - idx_in_h); -)_"; - - // Determine filter size depending on if padding is excluded or not - if (_attributes.exclude_padding()) - { - code += R"_( - const int filter_size = (pool_y_e - pool_y_s) * (pool_x_e - pool_x_s); -)_"; - } - else - { - code += R"_( - const int filter_size = {{POOL_SIZE_X}} * {{POOL_SIZE_Y}}; -)_"; - } - - // Loop through pool size - // if global pooling - if (_attributes.pool_size().x() == _src->dimension(width_idx) && - _attributes.pool_size().y() == _src->dimension(height_idx)) - { - // Begin loop - code += R"_( - // Global pooling path - for(int y = 0; y < {{POOL_SIZE_Y}}; ++y) - { - #pragma unroll 8 - for(int x = 0; x < {{POOL_SIZE_X}}; ++x) - { - VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0) - data0; -)_"; - } - else // if local pooling size - { - code += R"_( - for(int y = pool_y_s; y < pool_y_e; ++y) - { - #pragma unroll 8 - for(int x = pool_x_s; x < pool_x_e; ++x) - { - VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0) - data0; -)_"; - } // end else - - // if condition inside loop - use 32bit acc if mixed_precision. - // End loop through pooling section. - if (fp_mixed_precision) - { - // In case of FP_MIXED_PRECISION, ACC_DATA_TYPE is != DATA_TYPE - code += R"_( - data0 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + (x + idx_in_w) * {{src}}_stride_y + (y + idx_in_h) * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)); - res0 = POOL_OP(res0, data0); - } - } -)_"; - } - else // load data, compute result and end loop - { - code += R"_( - data0 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + (x + idx_in_w) * {{src}}_stride_y + (y + idx_in_h) * {{src}}_stride_z)); - res0 = POOL_OP(res0, data0); - } - } -)_"; - } - - // For Pool AVG ONLY, divide pool output by filter size - if (pool_type == PoolingType::AVG) - { - code += R"_( - res0 /= (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0))filter_size; -)_"; - } - - // If mixed precision convert datatype before storing. Then end kernel. - if (fp_mixed_precision) - { - code += R"_( - VEC_DATA_TYPE({{DATA_TYPE}}, N0) - res_converted0 = CONVERT(res0, VEC_DATA_TYPE({{DATA_TYPE}}, N0)); - STORE_VECTOR_SELECT(res_converted, {{DATA_TYPE}}, out_base_ptr, N0, PARTIAL_N0, (PARTIAL_N0 != 0) && g_ind_0 == 0); -)_"; - } - else - { - // Store data - code += R"_( - STORE_VECTOR_SELECT(res, {{DATA_TYPE}}, out_base_ptr, N0, PARTIAL_N0, (PARTIAL_N0 != 0) && g_ind_0 == 0); -)_"; - } - - code += R"_( -//------------------ END KERNEL {{meta_kernel_id}} --------------------- -} -)_"; - - return code; -} - -std::string ClTemplatePool2d::get_2x2_kernel_code() const -{ - const auto pool_type = _attributes.pool_type(); - const bool fp_mixed_precision = (_src->data_type() == DataType::F16) && pool_type != PoolingType::MAX; - std::string pool_op = (pool_type == PoolingType::AVG) ? R"_(#define POOL_OP(x,y) ((x) + (y)))_" - : R"_(#define POOL_OP(x,y) (fmax((x), (y))) )_"; - - std::string code = R"_( -//------------------ START KERNEL {{meta_kernel_id}} --------------------- -// IN_0(src) {{src}} -// OUT(dst, accum) {{dst}} - -#define SELECT_TYPE SELECT_VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0) - -{ - const int idx_out_c = g_ind_0; - const int idx_out_w = g_ind_1; -)_"; - - // Add pool op macro - code += "\n" + pool_op + "\n"; - - // If batch size != 1, the batch size dimension is collapsed over the height dimension - code += R"_( - const int idx_out_h = g_ind_2 % {{DST_HEIGHT}}; - const int idx_out_n = g_ind_2 / {{DST_HEIGHT}}; -)_"; - - code += R"_( - const int idx_in_w = idx_out_w * {{STRIDE_X}} - {{PAD_X}}; - const int idx_in_h = idx_out_h * {{STRIDE_Y}} - {{PAD_Y}}; - - __global unsigned char *in_base_ptr = {{src}}_ptr + {{src}}_offset_first_element_in_bytes + idx_out_c * sizeof({{DATA_TYPE}}) + idx_out_n * {{src}}_stride_w; - __global unsigned char *out_base_ptr = {{dst}}_ptr + {{dst}}_offset_first_element_in_bytes + idx_out_c * sizeof({{DATA_TYPE}}) + idx_out_w * {{dst}}_stride_y + idx_out_h * {{dst}}_stride_z + idx_out_n * - {{dst}}_stride_w; - const int pool_x_s = max((int)0, -idx_in_w); - const int pool_x_e = min((int)2, (int){{SRC_WIDTH}} - idx_in_w); - const int pool_y_s = max((int)0, -idx_in_h); - const int pool_y_e = min((int)2, (int){{SRC_HEIGHT}} - idx_in_h); - - const int filter_size = (pool_x_e - pool_x_s) * (pool_y_e - pool_y_s); - const int x0 = pool_x_s + idx_in_w; - const int y0 = pool_y_s + idx_in_h; - const int x1 = pool_x_e - 1 + idx_in_w; - const int y1 = pool_y_e - 1 + idx_in_h; - - REPEAT_VAR_INIT_TO_CONST(4, VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0), data, 0); -)_"; - - if (fp_mixed_precision) - { - // In case of FP_MIXED_PRECISION, ACC_DATA_TYPE is != DATA_TYPE - code += R"_( - data0 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x0 * {{src}}_stride_y + y0 * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)); - data1 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x1 * {{src}}_stride_y + y0 * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)); - data2 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x0 * {{src}}_stride_y + y1 * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)); - data3 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x1 * {{src}}_stride_y + y1 * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)); -)_"; - } - else - { - code += R"_( - data0 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x0 * {{src}}_stride_y + y0 * {{src}}_stride_z)); - data1 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x1 * {{src}}_stride_y + y0 * {{src}}_stride_z)); - data2 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x0 * {{src}}_stride_y + y1 * {{src}}_stride_z)); - data3 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x1 * {{src}}_stride_y + y1 * {{src}}_stride_z)); -)_"; - } - - if (pool_type != PoolingType::MAX) - { - // Make invalid the values loaded if the x or y coordinate was clamped (out-of-bound) - code += R"_( - if(filter_size != 4) - { - SELECT_TYPE cond_w_s = (SELECT_TYPE)idx_in_w < (SELECT_TYPE)0; - SELECT_TYPE cond_w_e = (SELECT_TYPE)idx_in_w >= (SELECT_TYPE)({{SRC_WIDTH}} - 1); - SELECT_TYPE cond_h_s = (SELECT_TYPE)idx_in_h < (SELECT_TYPE)0; - SELECT_TYPE cond_h_e = (SELECT_TYPE)idx_in_h >= (SELECT_TYPE)({{SRC_HEIGHT}} - 1); - - data0 = select(data0, (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)){{INITIAL_VALUE}}, (SELECT_TYPE)(cond_w_s | cond_h_s)); - data1 = select(data1, (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)){{INITIAL_VALUE}}, (SELECT_TYPE)(cond_w_e | cond_h_s)); - data2 = select(data2, (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)){{INITIAL_VALUE}}, (SELECT_TYPE)(cond_w_s | cond_h_e)); - data3 = select(data3, (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)){{INITIAL_VALUE}}, (SELECT_TYPE)(cond_w_e | cond_h_e)); - } -)_"; - } - - code += R"_( - VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0) - res0 = data0; - res0 = POOL_OP(res0, data1); - res0 = POOL_OP(res0, data2); - res0 = POOL_OP(res0, data3); -)_"; - - if (pool_type == PoolingType::AVG) - { - // If avg pooling divide result accordingly. - if (_attributes.exclude_padding()) - { - code += R"_( - res0 /= (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0))filter_size; -)_"; - } - else - { - code += R"_( - res0 /= (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0))4; -)_"; - } - } - - // Store result - if (fp_mixed_precision) - { - code += R"_( - VEC_DATA_TYPE({{DATA_TYPE}}, N0) - res_converted0 = CONVERT(res0, VEC_DATA_TYPE({{DATA_TYPE}}, N0)); - STORE_VECTOR_SELECT(res_converted, {{DATA_TYPE}}, out_base_ptr, N0, PARTIAL_N0, (PARTIAL_N0 != 0) && g_ind_0 == 0); -)_"; - } - else - { - code += R"_( - STORE_VECTOR_SELECT(res, {{DATA_TYPE}}, out_base_ptr, N0, PARTIAL_N0, (PARTIAL_N0 != 0) && g_ind_0 == 0); -)_"; - } - - code += R"_( - //------------------ END KERNEL {{meta_kernel_id}} --------------------- -} -#undef SELECT_TYPE -)_"; - - return code; -} - -void ClTemplatePool2d::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), - "src"); - - vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), - "dst"); -} - -TagLUT ClTemplatePool2d::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - TagLUT lut{}; - // Arguments and global shared variables - lut["src"] = vtable.get_variable(_src); - lut["dst"] = vtable.get_variable(_dst); - - // Local build options - lut["meta_kernel_id"] = id(); - - // Retrieve relevant data - const auto padding = _attributes.pad(); - const auto stride = _attributes.stride(); - const auto pool_size = _attributes.pool_size(); - const auto data_type = _src->data_type(); - const auto use_fp_mixed_precision = - (_src->data_type() == DataType::F16) && _attributes.pool_type() != PoolingType::MAX; - const std::string max_initial_value = - _settings.use_inf_as_limit() ? "(-INFINITY)" - : float_to_string_with_full_precision(std::numeric_limits<float>::lowest()); - - // pool specific - lut["STRIDE_X"] = stride.x(); - lut["STRIDE_Y"] = stride.y(); - lut["PAD_X"] = padding.left; - lut["PAD_Y"] = padding.top; - lut["POOL_SIZE_X"] = pool_size.width; - lut["POOL_SIZE_Y"] = pool_size.height; - - // Datatypes and variables - lut["ACC_DATA_TYPE"] = get_cl_type_from_data_type( - (use_fp_mixed_precision) ? (DataType::F32) : (data_type)); // Type of accumulators to use. - lut["DATA_TYPE"] = get_cl_type_from_data_type(data_type); - lut["SRC_WIDTH"] = _src->dimension(width_idx); - lut["SRC_HEIGHT"] = _src->dimension(height_idx); - lut["INITIAL_VALUE"] = (_attributes.pool_type() == PoolingType::MAX) ? max_initial_value : std::string("0"); - - // Tensor specific data - lut["DST_HEIGHT"] = _dst->dimension(height_idx); - - return lut; -} - -CLBuildOptions ClTemplatePool2d::get_build_options(const ComponentGroup &comp_group) const -{ - const auto root_window = comp_group.get_root_component()->template_writer()->get_window(); - const unsigned int n0 = root_window.x().step(); - const unsigned int partial_store_n0 = _dst->dimension(0) % n0; - - CLBuildOptions build_opts{}; - build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); - build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0)); - - return build_opts; -} - -std::string ClTemplatePool2d::get_config_id() const -{ - const DataType data_type = _src->data_type(); - const DataLayout data_layout = _src->data_layout(); - - std::string config_id{}; - config_id += "pooling_layer_2d_"; - config_id += lower_string(string_from_data_type(data_type)); - config_id += "_"; - config_id += lower_string(string_from_data_layout(data_layout)); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(width_idx)); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(height_idx)); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(channel_idx)); - - return config_id; -} - -std::set<std::string> ClTemplatePool2d::get_headers_list() const -{ - return std::set<std::string>{"helpers.h", "tile_helpers.h", "repeat.h"}; -} - -Window ClTemplatePool2d::get_window() const -{ - ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized"); - const auto output_shape = _dst->tensor_shape(); - const unsigned int vec_size = adjust_vec_size(((_dst->data_type() == DataType::F32) ? 2 : 4), _dst->dimension(0)); - - // Create and configure kernel window - auto win = calculate_max_window(output_shape, Steps(vec_size)); - win = win.collapse_if_possible(win, Window::DimZ); // collapse window on batch size. - return win; -} - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h deleted file mode 100644 index d1d3c01669..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEPOOL2D -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEPOOL2D - -#include "arm_compute/core/experimental/Types.h" -#include "arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h" -#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h" - -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClTemplatePool2d final : public IGpuTemplateComponentWriter -{ -public: - using Attributes = ClComponentPool2d::Attributes; - using Settings = ClComponentPool2d::Settings; - /** Constructor - * - * @param[in] id Component id - * @param[in] tensors Tensor arguments to the components - * @param[in] attributes Component attributes - * @param[in] settings Component settings - */ - ClTemplatePool2d(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes, - const Settings &settings); - - /** Prevent instances of this class from being copy constructed */ - ClTemplatePool2d(const ClTemplatePool2d &direct_conv2d) = delete; - - /** Prevent instances of this class from being copied */ - ClTemplatePool2d &operator=(const ClTemplatePool2d &direct_conv2d) = delete; - - /** Allow instances of this class to be move constructed */ - ClTemplatePool2d(ClTemplatePool2d &&direct_conv2d) = default; - - /** Allow instances of this class to be moved */ - ClTemplatePool2d &operator=(ClTemplatePool2d &&direct_conv2d) = default; - - /** Generate kernel component name */ - std::string get_name() const override; - - /** Generate kernel component code template - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return std::string Component code - */ - std::string get_component_code(const ComponentGroup &comp_group) const override; - /** Declare all variables used by the component in the @p vtable - * - * @param[out] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - */ - void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the tag look-up table used to instantiate the component code. - * - * @param[in] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - * - * @return TagLUT Tag lookup table - */ - TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the build options used in the component - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return CLBuildOptions Build options - */ - CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override; - - /** Generate the component config id string used for tuning */ - std::string get_config_id() const override; - - /** Generate the header list used in the component */ - std::set<std::string> get_headers_list() const override; - - /** Generate the execution window for the component */ - Window get_window() const override; - -private: - /** Generate pooling kernel template code optimized for 2x2 pooling - * - * @return std::String Component code - */ - std::string get_2x2_kernel_code() const; - - /** Generate generalised pooling kernel template code for MxN pooling - * - * @return std::String Component code - */ - std::string get_MxN_kernel_code() const; - - const ITensorInfo *_src; - const ITensorInfo *_dst; - Attributes _attributes; - Settings _settings; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEPOOL2D */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp deleted file mode 100644 index c882353fcb..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "ClTemplateReshape.h" - -#include "arm_compute/core/utils/helpers/AdjustVecSize.h" -#include "arm_compute/core/utils/StringUtils.h" - -#include "src/core/helpers/WindowHelpers.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -constexpr unsigned int vector_size_byte_opencl = 16; - -ClTemplateReshape::ClTemplateReshape(ComponentId id, const ArgumentPack<ITensorInfo> &tensors) - : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{} -{ - _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); - _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0); - ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst); -} - -std::string ClTemplateReshape::get_name() const -{ - return "reshape"; -} - -std::string ClTemplateReshape::get_component_code(const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - std::string code; - - code = R"_( -//------------------ START KERNEL {{meta_kernel_id}} --------------------- - -// IN(src) {{src}} -// OUT(dst, accum) {{dst}} - -TILE(uint, M0, 1, g_dst_indirect_y); -{ - __global uchar * base_src_ptr = {{src}}_ptr + {{src}}_offset_first_element_in_bytes; - const int tile_vertical_idx = g_ind_1 * {{arg_dst}}_c + g_ind_2 * {{arg_dst}}_c * {{arg_dst}}_w; - LOOP_UNROLLING(int, _m0, 0, 1, M0, - { - const int row_idx = _m0 * {{arg_dst}}_c + tile_vertical_idx; - const int tile_horizontal_idx = g_ind_0 + row_idx; - LOOP_UNROLLING(int, _n0, 0, 1, N0, - { - {{src}}_ptr = base_src_ptr; - const int linear_idx = tile_horizontal_idx + _n0; - const int in_id_x = linear_idx % {{src}}_c; - const int in_id_y = (linear_idx / {{src}}_c) % {{src}}_w; - const int in_id_z = linear_idx / ({{src}}_c * {{src}}_w); - {{src}}_ptr += in_id_x * sizeof({{DATA_TYPE}}) + in_id_y * {{src}}_stride_y + in_id_z * {{src}}_stride_z; - {{dst}}[_m0].s[_n0] = *((__global {{DATA_TYPE}} *){{src}}_ptr); - }) - }) - - LOOP_UNROLLING(int, i, 0, 1, M0, - { - g_dst_indirect_y[i].v = (uint)min((int)(g_ind_1 + i), (int)({{arg_dst}}_w) - 1); - g_dst_indirect_y[i].v += (int)(g_ind_2 % {{arg_dst}}_h) * (int)({{arg_dst}}_w); - g_dst_indirect_y[i].v += (int)(g_ind_2 / {{arg_dst}}_h) * (int)({{arg_dst}}_w * {{arg_dst}}_h); - }) -} -//------------------ END KERNEL {{meta_kernel_id}} --------------------- -)_"; - return code; -} - -void ClTemplateReshape::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - vtable.declare_variable(comp_group, _src, - GpuKernelArgumentInfo(common_tensor_type), // GpuKernelArgumentInfo::Type::Image_3D - "src"); - - vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(common_tensor_type), "dst"); -} - -TagLUT ClTemplateReshape::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - TagLUT lut{}; - - // Arguments and global shared variables - lut["src"] = vtable.get_variable(_src); - lut["dst"] = vtable.get_variable(_dst); - lut["arg_dst"] = vtable.get_variable(comp_group.get_any_dst_tensor()); - lut["meta_kernel_id"] = id(); - lut["DATA_TYPE"] = get_cl_type_from_data_type(_dst->data_type()); - - return lut; -} - -CLBuildOptions ClTemplateReshape::get_build_options(const ComponentGroup &comp_group) const -{ - CLBuildOptions build_opts{}; - const auto root_window = comp_group.get_root_component()->template_writer()->get_window(); - const unsigned int n0 = root_window.x().step(); - const unsigned int m0 = root_window.y().step(); - const unsigned int partial_store_n0 = _dst->dimension(0) % n0; - build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); - build_opts.add_option("-DM0=" + support::cpp11::to_string(m0)); - build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0)); - - return build_opts; -} - -std::string ClTemplateReshape::get_config_id() const -{ - std::string config_id{}; - config_id += lower_string(string_from_data_type(_dst->data_type())); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(0)); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(1)); - - return config_id; -} - -std::set<std::string> ClTemplateReshape::get_headers_list() const -{ - return std::set<std::string>{"helpers.h", "tile_helpers.h"}; -} - -Window ClTemplateReshape::get_window() const -{ - ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized"); - const unsigned int n0 = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0)); - Window win = calculate_max_window(*_dst, Steps(n0)); - return win.collapse(win, Window::DimZ); -} - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.h deleted file mode 100644 index 838a21db6d..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATERESHAPE -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATERESHAPE - -#include "arm_compute/core/experimental/Types.h" - -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClTemplateReshape final : public IGpuTemplateComponentWriter -{ -public: - /** Constructor - * - * @param[in] id Component id - * @param[in] tensors Tensor arguments to the components - */ - ClTemplateReshape(ComponentId id, const ArgumentPack<ITensorInfo> &tensors); - /** Prevent instances of this class from being copy constructed */ - ClTemplateReshape(const ClTemplateReshape &reshape) = delete; - /** Prevent instances of this class from being copied */ - ClTemplateReshape &operator=(const ClTemplateReshape &reshape) = delete; - /** Allow instances of this class to be move constructed */ - ClTemplateReshape(ClTemplateReshape &&reshape) = default; - /** Allow instances of this class to be moved */ - ClTemplateReshape &operator=(ClTemplateReshape &&reshape) = default; - - /** Generate kernel component name */ - std::string get_name() const override; - - /** Generate kernel component code template - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return std::string Component code - */ - std::string get_component_code(const ComponentGroup &comp_group) const override; - - /** Declare all variables used by the component in the @p vtable - * - * @param[out] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - */ - void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - - /** Generate the tag look-up table used to instantiate the component code. - * - * @param[in] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - * - * @return TagLUT Tag lookup table - */ - TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - - /** Generate the build options used in the component - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return CLBuildOptions Build options - */ - CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override; - - /** Generate the component config id string used for tuning */ - std::string get_config_id() const override; - - /** Generate the header list used in the component */ - std::set<std::string> get_headers_list() const override; - - /** Generate the execution window for the component */ - Window get_window() const override; - -private: - const ITensorInfo *_src; - const ITensorInfo *_dst; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATERESHAPE */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp deleted file mode 100644 index 846c712ceb..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp +++ /dev/null @@ -1,279 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ClTemplateResize.h" - -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/utils/helpers/AdjustVecSize.h" -#include "arm_compute/core/utils/StringUtils.h" - -#include "src/core/helpers/WindowHelpers.h" -#include "src/core/utils/ScaleUtils.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -ClTemplateResize::ClTemplateResize(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const ClTemplateResize::Attributes &attributes) - : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}, _attributes{attributes} -{ - _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); - _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0); - - ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst); -} - -std::string ClTemplateResize::get_name() const -{ - return _attributes.interpolation_policy() == InterpolationPolicy::BILINEAR ? "resize_bilinear" : "resize_nearest"; -} - -std::string ClTemplateResize::get_component_code(const IGpuTemplateComponentWriter::ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - std::string code = R"_( -//------------------ START KERNEL {{meta_kernel_id}} --------------------- -TILE(uint, 1, 1, g_dst_indirect_y); -{ - const int yo = g_ind_2 % {{arg_dst}}_h; - const int bout = g_ind_2 / {{arg_dst}}_h; -)_"; - - if (_attributes.interpolation_policy() == InterpolationPolicy::NEAREST_NEIGHBOR) - { - if (_attributes.sampling_policy() == SamplingPolicy::TOP_LEFT) - { - code += R"_( - float xi_f = (g_ind_1 * {{SCALE_X}}); - float yi_f = (yo * {{SCALE_Y}}); -)_"; - } - else - { - code += R"_( - float xi_f = ((g_ind_1 + 0.5f) * {{SCALE_X}}); - float yi_f = ((yo + 0.5f) * {{SCALE_Y}}); -)_"; - } - - if (_attributes.align_corners()) - { - code += R"_( - xi_f = round(xi_f); - yi_f = round(yi_f); -)_"; - } - - code += R"_( - const int xi0 = clamp((int)xi_f, 0, (int){{src}}_w - 1); - const int yi0 = clamp((int)yi_f, 0, (int){{src}}_h - 1); - - T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, 1, N0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi0, xi0, g_ind_0, {{src}}_w, {{src}}_h, 1, 1, false, {{dst}}); -)_"; - } - else if (_attributes.interpolation_policy() == InterpolationPolicy::BILINEAR) - { - if (_attributes.sampling_policy() == SamplingPolicy::TOP_LEFT) - { - code += R"_( - float xi_f = (g_ind_1 * {{SCALE_X}}); - float yi_f = (yo * {{SCALE_Y}}); -)_"; - } - else - { - code += R"_( - float xi_f = ((g_ind_1 + 0.5f) * {{SCALE_X}} - 0.5f); - float yi_f = ((yo + 0.5f) * {{SCALE_Y}} - 0.5f); -)_"; - } - - code += R"_( - const int xi = (int)floor(xi_f); - const int yi = (int)floor(yi_f); - - TILE({{SRC_DATA_TYPE}}, 1, N0, in00); - TILE({{SRC_DATA_TYPE}}, 1, N0, in01); - TILE({{SRC_DATA_TYPE}}, 1, N0, in10); - TILE({{SRC_DATA_TYPE}}, 1, N0, in11); - - in00[0].v = {{CONSTANT_VALUE}}; - in01[0].v = {{CONSTANT_VALUE}}; - in10[0].v = {{CONSTANT_VALUE}}; - in11[0].v = {{CONSTANT_VALUE}}; - - const int xi0 = clamp(xi, 0, (int){{src}}_w - 1); - const int yi0 = clamp(yi, 0, (int){{src}}_h - 1); - const int xi1 = clamp(xi + 1, 0, (int){{src}}_w - 1); - const int yi1 = clamp(yi + 1, 0, (int){{src}}_h - 1); - - T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, 1, N0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi0, xi0, g_ind_0, {{src}}_w, {{src}}_h, 1, 1, false, in00); - T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, 1, N0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi0, xi1, g_ind_0, {{src}}_w, {{src}}_h, 1, 1, false, in01); - T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, 1, N0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi1, xi0, g_ind_0, {{src}}_w, {{src}}_h, 1, 1, false, in10); - T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, 1, N0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi1, xi1, g_ind_0, {{src}}_w, {{src}}_h, 1, 1, false, in11); -)_"; - - if (is_data_type_float(_src->data_type())) - { - code += R"_( - const {{SRC_DATA_TYPE}} a = ({{SRC_DATA_TYPE}})(xi_f - (float)xi); - const {{SRC_DATA_TYPE}} b = ({{SRC_DATA_TYPE}})(1.f - a); - const {{SRC_DATA_TYPE}} a1 = ({{SRC_DATA_TYPE}})(yi_f - (float)yi); - const {{SRC_DATA_TYPE}} b1 = ({{SRC_DATA_TYPE}})(1.f - a1); - - // Calculate the output - {{dst}}[0].v = ((in00[0].v * b * b1) + (in01[0].v * a * b1) + (in10[0].v * b * a1) + (in11[0].v * a * a1)); -)_"; - } - else - { - code += R"_( - const float a = (xi_f - (float)xi); - const float b = (1.f - a); - const float a1 = (yi_f - (float)yi); - const float b1 = (1.f - a1); - - {{dst}}[0].v = CONVERT_SAT( - (CONVERT(in00[0].v, VEC_DATA_TYPE(float, N0)) * b * b1) + - (CONVERT(in01[0].v, VEC_DATA_TYPE(float, N0)) * a * b1) + - (CONVERT(in10[0].v, VEC_DATA_TYPE(float, N0)) * b * a1) + - (CONVERT(in11[0].v, VEC_DATA_TYPE(float, N0)) * a * a1), VEC_DATA_TYPE({{DST_DATA_TYPE}}, N0)); -)_"; - } - } - else - { - ARM_COMPUTE_ERROR("Unsupported interpolation policy"); - } - - code += R"_( - g_dst_indirect_y[0].v = g_ind_1 + (yo * (int)({{arg_dst}}_w)) + bout * (int)({{arg_dst}}_w * {{arg_dst}}_h); -} -//------------------ END KERNEL {{meta_kernel_id}} --------------------- -)_"; - - return code; -} - -void ClTemplateResize::declare_variables(GpuKernelVariableTable &vtable, - const IGpuTemplateComponentWriter::ComponentGroup &comp_group) const -{ - vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), - "src"); - - vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), - "dst"); -} - -TagLUT ClTemplateResize::get_tag_lut(const GpuKernelVariableTable &vtable, - const IGpuTemplateComponentWriter::ComponentGroup &comp_group) const -{ - TagLUT lut{}; - - // Arguments and global shared variables - lut["src"] = vtable.get_variable(_src); - lut["dst"] = vtable.get_variable(_dst); - - const auto dst_argument = vtable.get_variable(comp_group.get_any_dst_tensor()); - lut["arg_dst"] = dst_argument.uniq_name; - - // Local build options - lut["meta_kernel_id"] = id(); - lut["SRC_DATA_TYPE"] = get_cl_type_from_data_type(_src->data_type()); - lut["SRC_TENSOR_TYPE"] = "BUFFER"; - lut["DST_DATA_TYPE"] = get_cl_type_from_data_type(_dst->data_type()); - lut["CONSTANT_VALUE"] = string_from_pixel_value(0, _src->data_type()); - - const float scale_x = - scale_utils::calculate_resize_ratio(_src->dimension(1), _dst->dimension(1), _attributes.align_corners()); - const float scale_y = - scale_utils::calculate_resize_ratio(_src->dimension(2), _dst->dimension(2), _attributes.align_corners()); - - lut["SCALE_X"] = float_to_string_with_full_precision(scale_x); - lut["SCALE_Y"] = float_to_string_with_full_precision(scale_y); - - return lut; -} - -CLBuildOptions ClTemplateResize::get_build_options(const IGpuTemplateComponentWriter::ComponentGroup &comp_group) const -{ - const Window root_window = comp_group.get_root_component()->template_writer()->get_window(); - const unsigned int n0 = root_window.x().step(); - const unsigned int m0 = root_window.y().step(); - const unsigned int partial_n0 = _dst->dimension(0) % n0; - - CLBuildOptions build_opts; - - build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); - build_opts.add_option("-DM0=" + support::cpp11::to_string(m0)); - build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_n0)); - - return build_opts; -} - -std::string ClTemplateResize::get_config_id() const -{ - std::string config_id{}; - - config_id += "resize_"; - config_id += - (_attributes.interpolation_policy() == InterpolationPolicy::NEAREST_NEIGHBOR ? "NEAREST_NEIGHBOR" : ""); - config_id += (_attributes.interpolation_policy() == InterpolationPolicy::BILINEAR ? "BILINEAR" : ""); - config_id += "_"; - config_id += (_attributes.sampling_policy() == SamplingPolicy::CENTER ? "center" : "topleft"); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(0)); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(1)); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(2)); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(3)); - - return config_id; -} - -std::set<std::string> ClTemplateResize::get_headers_list() const -{ - return std::set<std::string>{"helpers.h", "tile_helpers.h"}; -} - -Window ClTemplateResize::get_window() const -{ - ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized"); - - const unsigned int n0 = adjust_vec_size(16 / _src->element_size(), _src->dimension(0)); - Window win = calculate_max_window(*_dst, Steps(n0)); - return win.collapse(win, Window::DimZ); -} - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.h deleted file mode 100644 index 4c69007185..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.h +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATERESIZE -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATERESIZE - -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClTemplateResize final : public IGpuTemplateComponentWriter -{ -public: - using Attributes = ClComponentResize::Attributes; - - /** Constructor - * - * @param[in] id Component id - * @param[in] tensors Tensor arguments to the components - * @param[in] attributes Component attributes - */ - ClTemplateResize(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes); - - /** Destructor */ - ~ClTemplateResize() override = default; - - /** Prevent instances of this class from being copy constructed */ - ClTemplateResize(const ClTemplateResize &resize) = delete; - - /** Prevent instances of this class from being copied */ - ClTemplateResize &operator=(const ClTemplateResize &resize) = delete; - - /** Allow instances of this class to be move constructed */ - ClTemplateResize(ClTemplateResize &&resize) = default; - - /** Allow instances of this class to be moved */ - ClTemplateResize &operator=(ClTemplateResize &&resize) = default; - - /** Generate kernel component name */ - std::string get_name() const override; - - /** Generate kernel component code template - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return std::string Component code - */ - std::string get_component_code(const ComponentGroup &comp_group) const override; - - /** Declare all variables used by the component in the @p vtable - * - * @param[out] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - */ - void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - - /** Generate the tag look-up table used to instantiate the component code. - * - * @param[in] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - * - * @return TagLUT Tag lookup table - */ - TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - - /** Generate the build options used in the component - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return CLBuildOptions Build options - */ - CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override; - - /** Generate the component config id string used for tuning */ - std::string get_config_id() const override; - - /** Generate the header list used in the component */ - std::set<std::string> get_headers_list() const override; - - /** Generate the execution window for the component */ - Window get_window() const override; - -private: - const ITensorInfo *_src; - const ITensorInfo *_dst; - Attributes _attributes; -}; - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute - -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATERESIZE */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp deleted file mode 100644 index d0ec91e0a9..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "ClTemplateStore.h" - -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -ClTemplateStore::ClTemplateStore(ComponentId id, const ArgumentPack<ITensorInfo> &tensors) - : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{} -{ - _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); - _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0); -} - -std::string ClTemplateStore::get_name() const -{ - return "store"; -} - -std::string ClTemplateStore::get_component_code(const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - return R"_( -//------------------ START KERNEL {{meta_kernel_id}} STORE --------------------- -{ - bool x_cond = PARTIAL_N0 != 0 && get_global_id(0) == 0; - - T_STORE_INDIRECT_WIDTH_SELECT({{DST_DATA_TYPE}}, M0, N0, PARTIAL_N0, {{DST_TENSOR_TYPE}}, {{dst}}, g_ind_0, {{dst}}_stride_y, x_cond, {{src}}, g_dst_indirect_y); -//------------------ END KERNEL {{meta_kernel_id}} STORE --------------------- -} - -)_"; -} - -void ClTemplateStore::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), - "src"); - vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), - "dst"); -} - -TagLUT ClTemplateStore::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - TagLUT lut{}; - - // Arguments and global shared variables - lut["src"] = vtable.get_variable(_src); - lut["dst"] = vtable.get_variable(_dst); - - // Local build options - lut["meta_kernel_id"] = id(); - lut["DST_TENSOR_TYPE"] = "BUFFER"; - lut["DST_DATA_TYPE"] = _dst->data_type(); - - ARM_COMPUTE_UNUSED(comp_group); - return lut; -} - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h deleted file mode 100644 index b8c82ceadd..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATESTORE -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATESTORE - -#include "arm_compute/core/experimental/Types.h" - -#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClTemplateStore final : public IGpuTemplateComponentWriter -{ -public: - /** Constructor - * - * @param[in] id Component id - * @param[in] tensors Tensor arguments to the components - */ - ClTemplateStore(ComponentId id, const ArgumentPack<ITensorInfo> &tensors); - /** Prevent instances of this class from being copy constructed */ - ClTemplateStore(const ClTemplateStore &store) = delete; - /** Prevent instances of this class from being copied */ - ClTemplateStore &operator=(const ClTemplateStore &store) = delete; - /** Allow instances of this class to be move constructed */ - ClTemplateStore(ClTemplateStore &&store) = default; - /** Allow instances of this class to be moved */ - ClTemplateStore &operator=(ClTemplateStore &&store) = default; - /** Generate kernel component name */ - std::string get_name() const override; - /** Generate kernel component code template - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return std::string Component code - */ - std::string get_component_code(const ComponentGroup &comp_group) const override; - /** Declare all variables used by the component in the @p vtable - * - * @param[out] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - */ - void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the tag look-up table used to instantiate the component code. - * - * @param[in] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - * - * @return TagLUT Tag lookup table - */ - TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - -private: - const ITensorInfo *_src; - const ITensorInfo *_dst; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATESTORE */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp deleted file mode 100644 index d3d7c8db83..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp +++ /dev/null @@ -1,325 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "ClTemplateWriter.h" - -#include "arm_compute/core/CL/CLKernelLibrary.h" - -#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -/// @note: some tags can be unused since they could be used only for the macros, or only for the component code -std::string ClTemplateWriter::replace_tags(const std::string &code_template, const TagLUT &tags) -{ - std::string replaced_code = ""; - bool scanning_pattern = false; - std::string pattern_found = ""; - for (size_t i = 0; i < code_template.size() - 1; ++i) - { - if (!scanning_pattern) - { - if (code_template[i] == '{' && code_template[i + 1] == '{') - { - i += 1; - scanning_pattern = true; - pattern_found = ""; - } - else - { - replaced_code += code_template[i]; - } - } - else - { - if (code_template[i] == '}' && code_template[i + 1] == '}') - { - i += 1; - scanning_pattern = false; - std::string err = "Pattern " + pattern_found + " not found in tags"; - ARM_COMPUTE_ERROR_ON_MSG(tags.find(pattern_found) == tags.end(), err.c_str()); - replaced_code += tags.find(pattern_found)->second.value; - } - else - { - pattern_found += code_template[i]; - } - } - } - - return replaced_code; -} -ClTemplateWriter::~ClTemplateWriter() -{ -} -ClTemplateWriter::ClTemplateWriter(const GpuKernelComponentGroup &components) : _components{components} -{ -} -std::string ClTemplateWriter::get_name() -{ - return write_kernel_name(); -} -std::string ClTemplateWriter::get_code() -{ - return write_code(); -} -std::string ClTemplateWriter::get_config_id() -{ - std::string config_id = get_name(); - for (const auto &comp : _components) - { - config_id += "--" + comp->template_writer()->get_config_id() + "--"; - } - - return config_id; -} - -CLBuildOptions ClTemplateWriter::get_build_options() -{ - CLBuildOptions build_opts{}; - - for (const auto &comp : _components) - { - build_opts.add_options(comp->template_writer()->get_build_options(_components).options()); - } - - return build_opts; -} - -Window ClTemplateWriter::get_window() const -{ - const auto root_comp = _components.get_root_component(); - ARM_COMPUTE_ERROR_ON_MSG(root_comp == nullptr, "No root component found"); - return root_comp->template_writer()->get_window(); -} - -std::map<ITensorInfo::Id, GpuKernelArgument> ClTemplateWriter::get_tensors() -{ - // Assemble GpuKernelArguments - std::map<ITensorInfo::Id, GpuKernelArgument> tensors; - for (const auto t : _components.get_argument_tensors()) - { - tensors.emplace(t->id(), GpuKernelArgument{*t, _vtable.get_variable(t).kernel_argument_info}); - } - return tensors; -} - -std::string ClTemplateWriter::write_code() -{ - ARM_COMPUTE_ERROR_ON_MSG(_components.empty(), "No components found"); - - // These data structures will hold the data from all the components in the blueprint - std::set<std::string> headers_list{}; - std::set<std::string> additional_macros{}; - std::vector<std::string> component_codes{}; // vector because order matters - - // Pass 1: Declare all kernel variables - for (auto &component : _components) - { - component->template_writer()->declare_variables(_vtable, _components); - } - // Pass 2: Generate component codes - for (auto &component : _components) - { - const auto component_writer = component->template_writer(); - auto curr_headers_list = component_writer->get_headers_list(); - auto curr_additional_macros = component_writer->get_additional_macros(); - auto curr_component_code = component_writer->get_component_code(_components); - const auto var_lut = component_writer->get_tag_lut( - _vtable, - _components); // Ideally can be merged with get_component_code once we have finer-grained code generation technique - component_codes.push_back(replace_tags(curr_component_code, var_lut)); - - headers_list.insert(curr_headers_list.begin(), curr_headers_list.end()); - if (!additional_macros.empty()) // Some components might not have any - { - additional_macros.insert(replace_tags(curr_additional_macros, var_lut)); - } - } - - // Step 3: Assemble the data gathered by traversing the graph into the string "code" - std::string code = ""; - - for (auto &header : headers_list) - { -#if defined(EMBEDDED_KERNELS) - code += CLKernelLibrary::get().get_program(header).first; -#else // defined(EMBEDDED_KERNELS) - code += "#include \"" + header + "\"\n"; -#endif // defined(EMBEDDED_KERNELS) - } - - for (auto ¯os : additional_macros) - { - code += macros; - } - - auto arguments = _components.get_argument_tensors(); - std::sort(arguments.begin(), arguments.end(), - [](const ITensorInfo *l, const ITensorInfo *r) { return l->id() < r->id(); }); - code += write_kernel_signature(_vtable.get_variable_list(arguments)); - - code += "\n{\n\n"; - - code += " //------------------ START KERNEL_BUILDER_COORDINATE ---------------------\n\n"; - code += write_global_section(); - code += " //------------------ END KERNEL_BUILDER_COORDINATE ---------------------\n"; - - { - const auto tiles = _components.get_tiles(); - std::stringstream tiles_ss; - - tiles_ss << " //------------------ START TILE DECLARATION ---------------------\n"; - - for (auto tile : tiles) - { - const auto var = _vtable.get_variable(tile); - const auto data_type = get_cl_type_from_data_type(tile->data_type()); - const auto var_name = var.uniq_name; - - tiles_ss << " TILE(" << data_type << ", M0, N0, " << var_name << ");\n"; - } - - tiles_ss << " //------------------ END TILE DECLARATION ---------------------\n"; - - code += tiles_ss.str(); - } - - for (const auto &component_code : component_codes) - { - code += component_code; - code += "\n"; - } - - code += "}\n"; - - return code; -} -std::string ClTemplateWriter::write_global_section() const -{ - const auto dst_info = _components.get_any_dst_tensor(); - const auto dst_w = dst_info->dimension(0); - const auto tile_w = std::max(1, get_window().x().step()); - const auto tile_h = std::max(1, get_window().y().step()); - auto leftover_w = dst_w % tile_w; - - std::string code = ""; - code += std::string(" int g_ind_0 = GET_SPATIAL_IDX(0, ") + std::to_string(tile_w) + ", " + - std::to_string(leftover_w) + ");\n"; - code += std::string(" int g_ind_1 = GET_SPATIAL_IDX(1, ") + std::to_string(tile_h) + ", " + "0);\n"; - code += std::string(" int g_ind_2 = GET_SPATIAL_IDX(2, 1, 0);\n\n"); - - code += " const bool g_cond_x = (g_ind_0 == 0);\n"; - code += " const bool g_cond_y = (g_ind_1 == 0);\n"; - - return code; -} -std::string ClTemplateWriter::write_argument_declaration(const GpuKernelVariableTable::TensorVariable &var) const -{ - std::string code; - switch (var.kernel_argument_info.type) - { - case GpuKernelArgumentInfo::Type::Vector: - { - code += "\n VECTOR_DECLARATION(" + var.uniq_name + ")"; - break; - } - case GpuKernelArgumentInfo::Type::Image: - { - code += "\n IMAGE_DECLARATION(" + var.uniq_name + ")"; - break; - } - case GpuKernelArgumentInfo::Type::Image_3D: - { - code += "\n IMAGE_DECLARATION(" + var.uniq_name + "),"; - code += "\n unsigned int " + var.uniq_name + "_stride_z"; - break; - } - case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D: - { - code += "\n __read_only image2d_t " + var.uniq_name + "_img,"; - code += "\n unsigned int " + var.uniq_name + "_stride_z"; - break; - } - case GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer: - { - code += "\n TENSOR4D_T(" + var.uniq_name + ", BUFFER)"; - break; - } - case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image: - { - code += "\n TENSOR4D_T(" + var.uniq_name + ", IMAGE)"; - break; - } - case GpuKernelArgumentInfo::Type::Tensor_3D: - { - code += "\n TENSOR3D_DECLARATION(" + var.uniq_name + ")"; - break; - } - default: - { - ARM_COMPUTE_ERROR("Unsupported declaration generation for GpuKernelArgumentInfo::Type"); - } - } - return code; -} -std::string ClTemplateWriter::write_kernel_signature(const GpuKernelVariableTable::VariableList &argument_list) const -{ - std::string code = "\n__kernel void " + write_kernel_name() + "("; - - for (int i = 0; i < static_cast<int>(argument_list.size()) - 1; ++i) - { - code += write_argument_declaration(argument_list[i]) + ","; - } - if (static_cast<int>(argument_list.size()) - 1 >= 0) - { - code += write_argument_declaration(argument_list[argument_list.size() - 1]); - } - - code += ')'; - - return code; -} -std::string ClTemplateWriter::write_kernel_name() const -{ - if (_components.empty()) - { - return "empty_kernel"; - } - std::string name = _components.empty() ? "" : _components[0]->template_writer()->get_name(); - for (size_t i = 1; i < _components.size(); ++i) - { - name += "___"; - name += _components[i]->template_writer()->get_name(); - } - - return name; -} -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h deleted file mode 100644 index 83f617b6c6..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEWRITER -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEWRITER - -#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" -#include "src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h" - -#include <map> - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -/** Use a templated-string-based method to write kernel code - * It stitches the component code templates together based on the valid fusion configuration. - * It then instantiates the actual kernel code from the template and the generated tag lookup table. - */ -class ClTemplateWriter : public IGpuKernelWriter -{ -public: - /** Instantiates a kernel code string from the kernel code template - * @note: some tags can be unused since they could be used only for the macros, or only for the component code - * - * @param[in] code_template Kernel code template - * @param[in] tags Tag lookup table - * - * @return std::string Instantiated kernel string - */ - static std::string replace_tags(const std::string &code_template, const TagLUT &tags); - /** Default constructor */ - ClTemplateWriter() = default; - /** Constructor - * - * @param[in] components Kernel component group from which the kernel will be generated - */ - ClTemplateWriter(const GpuKernelComponentGroup &components); - /** Destructor */ - ~ClTemplateWriter() override; - /** Generate kernel name */ - std::string get_name() override; - /** Generate kernel code */ - std::string get_code() override; - /** Generate build options */ - CLBuildOptions get_build_options() override; - /** Generate config id string of the entire kernel. This is used for tuning */ - std::string get_config_id() override; - /** Generate execution window */ - Window get_window() const override; - /** Get the kernel argument lists of the kernel*/ - std::map<ITensorInfo::Id, GpuKernelArgument> get_tensors() override; - -private: - std::string write_kernel_name() const; - std::string write_code(); - std::string write_global_section() const; - std::string write_argument_declaration(const GpuKernelVariableTable::TensorVariable &var) const; - std::string write_kernel_signature(const GpuKernelVariableTable::VariableList &argument_list) const; - -private: - GpuKernelComponentGroup _components{}; - GpuKernelVariableTable _vtable{}; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEWRITER */ |