diff options
Diffstat (limited to 'src/dynamic_fusion/sketch/gpu')
64 files changed, 104 insertions, 5865 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.cpp b/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.cpp deleted file mode 100644 index 9cecfc2ffd..0000000000 --- a/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.cpp +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -bool operator==(const GpuKernelArgumentInfo &info0, const GpuKernelArgumentInfo &info1) -{ - return info0.type == info1.type; -} -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h b/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h index 03817173f4..c923bf9c16 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h +++ b/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELARGUMENT -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELARGUMENT +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELARGUMENT_H +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELARGUMENT_H #include "arm_compute/core/TensorInfo.h" @@ -32,96 +32,6 @@ namespace experimental { namespace dynamic_fusion { -/** Contain information required to set up a kernel argument at run time - * @deprecated To be removed along with ClTemplateWriter - */ -struct GpuKernelArgumentInfo -{ - /** Enumerate all the tensor arguments variants used by all kernel implementations. */ - enum class Type : int - { - Scalar, - - Vector, - - Image, - Image_Reinterpret_As_3D, - Image_Export_To_ClImage2D, - - Image_3D, // 3D Tensor represented as a 2D Image + stride_z - Image_3D_Export_To_ClImage2D, - - Tensor_3D, - Tensor_4D, - Tensor_4D_t_Buffer, - Tensor_4D_t_Image, - - Tensor_Special_0, - }; - /** Default constructor */ - GpuKernelArgumentInfo() = default; - /** Constructor */ - GpuKernelArgumentInfo(Type type) : type{type} - { - } - Type type{Type::Tensor_4D_t_Buffer}; -}; -bool operator==(const GpuKernelArgumentInfo &info0, const GpuKernelArgumentInfo &info1); -/** Kernel argument information linked with its corresponding @ref ITensorInfo - * @deprecated To be removed along with ClTemplateWriter - */ -class GpuKernelArgument -{ -public: - /** Constructor - * - * @param[in] tensor_info Associated @ref ITensorInfo - * @param[in] kernel_arg_info Associated @ref GpuKernelArgumentInfo - */ - GpuKernelArgument(const ITensorInfo &tensor_info, const GpuKernelArgumentInfo &kernel_arg_info) - : _tensor_info{tensor_info}, _kernel_arg_info{kernel_arg_info} - { - } - /** Get workload tensor id */ - ITensorInfo::Id id() const - { - return _tensor_info.id(); - } - /** Get associated @ref ITensorInfo */ - ITensorInfo *tensor_info() - { - return &_tensor_info; - } - /** Get associated @ref ITensorInfo */ - const ITensorInfo *tensor_info() const - { - return &_tensor_info; - } - /** Get associated @ref GpuKernelArgumentInfo */ - GpuKernelArgumentInfo *kernel_argument_info() - { - return &_kernel_arg_info; - } - /** Get associated @ref GpuKernelArgumentInfo */ - const GpuKernelArgumentInfo *kernel_argument_info() const - { - return &_kernel_arg_info; - } - /** Check if the associated workload tensor has valid id - * - * @return true if has valid id - * @return false otherwise - */ - bool has_valid_id() const - { - return _tensor_info.has_valid_id(); - } - -private: - TensorInfo _tensor_info{}; - GpuKernelArgumentInfo _kernel_arg_info{}; -}; -#ifdef ACL_INTERNAL_TEST_CKW_IN_DF /** Describe how the tensor runtime memory can be accessed * * Please see documentation under @ref GpuKernelArgumentBinding @@ -243,9 +153,8 @@ private: }; Value _value; }; -#endif // ACL_INTERNAL_TEST_CKW_IN_DF } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELARGUMENT */ +#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELARGUMENT_H diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h b/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h index 24812cd8a7..11d916eec9 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h +++ b/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,19 +21,15 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE -#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE_H +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE_H #include "arm_compute/core/CL/CLCompileContext.h" #include "arm_compute/core/Window.h" #include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -#include <map> -#else // ACL_INTERNAL_TEST_CKW_IN_DF #include <deque> -#endif // ACL_INTERNAL_TEST_CKW_IN_DF #include <string> namespace arm_compute @@ -43,11 +39,7 @@ namespace experimental namespace dynamic_fusion { /** The argument list of a @ref GpuKernelSourceCode */ -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -using GpuKernelArgumentList = std::map<ITensorInfo::Id, GpuKernelArgument>; -#else // ACL_INTERNAL_TEST_CKW_IN_DF using GpuKernelArgumentList = std::deque<GpuKernelArgumentBinding>; -#endif // ACL_INTERNAL_TEST_CKW_IN_DF /** Container of kernel code to be compiled and run in a @ref GpuUnitWorkload */ @@ -132,4 +124,4 @@ private: } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif /* ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE */ +#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE_H diff --git a/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp index 502ceab807..725a46e91c 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp +++ b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,14 +26,10 @@ #include "arm_compute/core/experimental/Types.h" #include "src/dynamic_fusion/sketch/ArgumentPack.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h" #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h" #include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h" #include "src/dynamic_fusion/sketch/gpu/GpuComponentServices.h" -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h" -#else // ACL_INTERNAL_TEST_CKW_IN_DF -#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h" -#endif // ACL_INTERNAL_TEST_CKW_IN_DF namespace arm_compute { @@ -41,8 +37,8 @@ namespace experimental { namespace dynamic_fusion { -GpuLogicalKernel::GpuLogicalKernel(GpuComponentServices *services, const GpuKernelComponentGroup &components) - : _comp_group{components}, _store_components{} +GpuLogicalKernel::GpuLogicalKernel(GpuComponentServices *services, GpuKernelComponentGroup components) // NOLINT + : _comp_group{std::move(components)}, _store_components{} { ARM_COMPUTE_UNUSED(services); } @@ -50,19 +46,11 @@ GpuLogicalKernel::GpuLogicalKernel(GpuComponentServices *services, const GpuKern GpuKernelSourceCode GpuLogicalKernel::write_kernel_code() { GpuKernelSourceCode code; -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - ClTemplateWriter writer{_comp_group}; -#else // ACL_INTERNAL_TEST_CKW_IN_DF - GpuCkwDriver writer{_comp_group}; -#endif // ACL_INTERNAL_TEST_CKW_IN_DF + GpuCkwDriver writer{_comp_group}; code.name(writer.get_name()); code.code(writer.get_code()); -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - code.arguments(writer.get_tensors()); -#else // ACL_INTERNAL_TEST_CKW_IN_DF code.arguments(writer.get_kernel_arguments()); -#endif // ACL_INTERNAL_TEST_CKW_IN_DF code.build_options(writer.get_build_options()); code.config_id(writer.get_config_id()); code.window(writer.get_window()); diff --git a/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.h b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.h index 1fd40f0acd..e2bc83b286 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.h +++ b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPULOGICALKERNEL -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPULOGICALKERNEL +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPULOGICALKERNEL_H +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPULOGICALKERNEL_H #include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" #include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h" @@ -52,7 +52,7 @@ public: * @param[in] services @ref GpuComponentServices to be used * @param[in] components Component group from which this logical kernel is initialized */ - explicit GpuLogicalKernel(GpuComponentServices *services, const GpuKernelComponentGroup &components); + explicit GpuLogicalKernel(GpuComponentServices *services, GpuKernelComponentGroup components); // NOLINT /** Allow instances of this class to be copy constructed */ GpuLogicalKernel(const GpuLogicalKernel &) = default; /** Allow instances of this class to be copied */ @@ -71,4 +71,4 @@ private: } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPULOGICALKERNEL */ +#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPULOGICALKERNEL_H diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h index 43bcc47fa0..5d75bcaaa0 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h +++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE_H +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE_H #include "arm_compute/core/experimental/Types.h" #include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h" @@ -36,7 +36,6 @@ namespace experimental { namespace dynamic_fusion { -#ifdef ACL_INTERNAL_TEST_CKW_IN_DF namespace { /** Extract kernel arguments of one tensor from a flat list of kernel arguments. @@ -70,7 +69,6 @@ GpuKernelArgumentList extract_kernel_args_for_one_tensor(GpuKernelArgumentList & return tensor_kargs; } } // namespace -#endif // ACL_INTERNAL_TEST_CKW_IN_DF /** Uniquely identifies a @ref GpuUnitWorkload within a @ref GpuWorkloadSourceCode */ using UnitWorkloadId = int32_t; @@ -83,25 +81,11 @@ class GpuWorkloadArgument public: /** Default constructor */ GpuWorkloadArgument() = default; -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF /** Constructor * - * @param[in] tensor_info @ref ITensorInfo of the workload argument - * @param[in] mem_desc @ref MemoryDescriptor of the workload argument - * @param[in] kernel_arg_info @ref GpuKernelArgumentInfo of the workload argument - */ - GpuWorkloadArgument(const ITensorInfo &tensor_info, - const MemoryDescriptor &mem_desc, - const GpuKernelArgumentInfo &kernel_arg_info) - : _tensor_info{tensor_info}, _mem_desc{mem_desc}, _kernel_arg_info{kernel_arg_info} - { - } -#else // ACL_INTERNAL_TEST_CKW_IN_DF - /** Constructor - * - * @param[in] tensor_info @ref ITensorInfo of the workload argument - * @param[in] mem_desc @ref MemoryDescriptor of the workload argument - * @param[in] kernel_arg_list @ref GpuKernelArgumentList of the workload argument + * @param[in] tensor_info @ref ITensorInfo of the workload argument + * @param[in] mem_desc @ref MemoryDescriptor of the workload argument + * @param[in] kernel_args @ref GpuKernelArgumentList of the workload argument */ GpuWorkloadArgument(const ITensorInfo &tensor_info, const MemoryDescriptor &mem_desc, @@ -109,7 +93,6 @@ public: : _tensor_info{tensor_info}, _mem_desc{mem_desc}, _kernel_args{kernel_args} { } -#endif // ACL_INTERNAL_TEST_CKW_IN_DF /** Get tensor id within workload */ ITensorInfo::Id id() const { @@ -135,18 +118,6 @@ public: { return &_mem_desc; } -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - /** Get @ref GpuKernelArgumentInfo of the argument */ - GpuKernelArgumentInfo *kernel_argument_info() - { - return &_kernel_arg_info; - } - /** Get @ref GpuKernelArgumentInfo of the argument */ - const GpuKernelArgumentInfo *kernel_argument_info() const - { - return &_kernel_arg_info; - } -#else // ACL_INTERNAL_TEST_CKW_IN_DF /** Get @ref GpuKernelArgumentList of the workload tensor */ GpuKernelArgumentList *kernel_argument_list() { @@ -157,7 +128,6 @@ public: { return &_kernel_args; } -#endif // ACL_INTERNAL_TEST_CKW_IN_DF /** Check if the workload argument has valid id * * @return true If has valid id @@ -169,13 +139,9 @@ public: } private: - TensorInfo _tensor_info{}; - MemoryDescriptor _mem_desc{}; -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - GpuKernelArgumentInfo _kernel_arg_info{}; -#else // ACL_INTERNAL_TEST_CKW_IN_DF + TensorInfo _tensor_info{}; + MemoryDescriptor _mem_desc{}; GpuKernelArgumentList _kernel_args{}; -#endif // ACL_INTERNAL_TEST_CKW_IN_DF }; /** Describes when a unit workload is run. @@ -259,22 +225,7 @@ public: const auto uwk_id = static_cast<UnitWorkloadId>(_unit_workloads.size()); const auto unit_work = GpuUnitWorkload(uwk_id, kernel_code, stage); _unit_workloads.push_back(unit_work); -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - ARM_COMPUTE_UNUSED(context); - // Assemble kernel argument with memory descriptor to form workload argument - for (const auto &id_arg : kernel_code.arguments()) - { - const auto arg_id = id_arg.first; - const auto arg = id_arg.second; - _workload_arguments[arg_id] = - GpuWorkloadArgument{*arg.tensor_info(), mem_map.at(arg_id), *arg.kernel_argument_info()}; - if (_tensor_uwork_map.find(arg_id) == _tensor_uwork_map.end()) - { - _tensor_uwork_map[arg_id] = std::set<UnitWorkloadId>(); - } - _tensor_uwork_map[arg_id].insert(uwk_id); - } -#else // ACL_INTERNAL_TEST_CKW_IN_DF + GpuKernelArgumentList flat_kernel_args = kernel_code.arguments(); GpuKernelArgumentList tensor_kargs{}; while (true) @@ -296,7 +247,7 @@ public: _tensor_uwork_map[tensor_id].insert(uwk_id); } } -#endif // ACL_INTERNAL_TEST_CKW_IN_DF + return uwk_id; } /** Get a unit workload from its id */ @@ -346,4 +297,4 @@ private: } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE */ +#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE_H diff --git a/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h b/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h index ad474674f9..84972501de 100644 --- a/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h +++ b/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_IGPUKERNELWRITER -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_IGPUKERNELWRITER +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_IGPUKERNELWRITER_H +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_IGPUKERNELWRITER_H #include "arm_compute/core/CL/CLCompileContext.h" #include "arm_compute/core/Window.h" @@ -62,23 +62,14 @@ public: virtual std::string get_config_id() = 0; /** Generate execution window */ virtual Window get_window() const = 0; - /** Get the kernel argument lists of the kernel - * @deprecated To be removed along with ClTemplateWriter - */ - virtual std::map<ITensorInfo::Id, GpuKernelArgument> get_tensors() - { - return {}; - } -#ifdef ACL_INTERNAL_TEST_CKW_IN_DF /** Get the flat list of arguments of the kernel*/ virtual GpuKernelArgumentList get_kernel_arguments() { return {}; } -#endif // ACL_INTERNAL_TEST_CKW_IN_DF }; } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_IGPUKERNELWRITER */ +#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_IGPUKERNELWRITER_H diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h index b80ce0d816..f8770920b7 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h @@ -24,15 +24,12 @@ #ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWDRIVER_H #define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWDRIVER_H -#include "ckw/Kernel.h" - #include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" #include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" #include "src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h" #include "compute_kernel_writer/include/ckw/Kernel.h" #include "compute_kernel_writer/include/ckw/KernelArgument.h" -#include <map> #include <string> namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h index f1f0e6747b..c9ce7eb269 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWSTORE -#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWSTORE +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWSTORE_H +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWSTORE_H #include "src/core/common/Macros.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h" @@ -33,8 +33,6 @@ namespace experimental { namespace dynamic_fusion { -/** An interface used by @ref ClTemplateWriter to write source code for a kernel component - */ class GpuCkwStore : public IGpuCkwComponentDriver { public: @@ -61,4 +59,4 @@ private: } // namespace experimental } // namespace arm_compute -#endif /* ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWSTORE */ +#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWSTORE_H diff --git a/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h b/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h index 4b8eea2f57..6678c929e9 100644 --- a/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h +++ b/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_IGPUKERNELCOMPONENT -#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_IGPUKERNELCOMPONENT +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_IGPUKERNELCOMPONENT_H +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_IGPUKERNELCOMPONENT_H #include "src/dynamic_fusion/sketch/ArgumentPack.h" #include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h" @@ -100,10 +100,6 @@ public: return _properties; } /** Get writer for the component */ - virtual const IGpuTemplateComponentWriter *template_writer() const - { - return nullptr; - } virtual const IGpuCkwComponentDriver *ckw_component_driver() const { return nullptr; @@ -119,4 +115,4 @@ private: } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif /* ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_IGPUKERNELCOMPONENT */ +#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_IGPUKERNELCOMPONENT_H diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp index fdf528a65d..e316bdf46d 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,11 +24,7 @@ #include "ClComponentActivation.h" #include "src/core/CL/CLValidate.h" -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h" -#else //ACL_INTERNAL_TEST_CKW_IN_DF #include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.h" -#endif //ACL_INTERNAL_TEST_CKW_IN_DF namespace arm_compute { @@ -69,11 +65,7 @@ ClComponentActivation::ClComponentActivation(ComponentId const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes) : IGpuKernelComponent{id, properties, tensors}, -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - _component_writer{std::make_unique<ClTemplateActivation>(id, tensors, attributes)} -#else //ACL_INTERNAL_TEST_CKW_IN_DF _component_writer{std::make_unique<GpuCkwActivation>(id, tensors, attributes)} -#endif //ACL_INTERNAL_TEST_CKW_IN_DF { } @@ -81,11 +73,7 @@ ClComponentActivation::~ClComponentActivation() { } -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -const IGpuTemplateComponentWriter *ClComponentActivation::template_writer() const -#else //ACL_INTERNAL_TEST_CKW_IN_DF const IGpuCkwComponentDriver *ClComponentActivation::ckw_component_driver() const -#endif //ACL_INTERNAL_TEST_CKW_IN_DF { return _component_writer.get(); } diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h index 02c854356a..b8185158f3 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTACTIVATION -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTACTIVATION +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTACTIVATION_H +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTACTIVATION_H #include "arm_compute/function_info/ActivationLayerInfo.h" @@ -41,11 +41,7 @@ template <typename T> class ArgumentPack; /** Forward declaration */ -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -class ClTemplateActivation; -#else //ACL_INTERNAL_TEST_CKW_IN_DF class GpuCkwActivation; -#endif //ACL_INTERNAL_TEST_CKW_IN_DF class ClComponentActivation final : public IGpuKernelComponent { @@ -106,11 +102,7 @@ public: ClComponentActivation &operator=(ClComponentActivation &&component) = default; /** Get writer for the component */ -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - const IGpuTemplateComponentWriter *template_writer() const override; -#else //ACL_INTERNAL_TEST_CKW_IN_DF - const IGpuCkwComponentDriver *ckw_component_driver() const override; -#endif //ACL_INTERNAL_TEST_CKW_IN_DF + const IGpuCkwComponentDriver *ckw_component_driver() const override; /** Get component type */ GpuComponentType type() const override @@ -119,13 +111,9 @@ public: } private: -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - std::unique_ptr<ClTemplateActivation> _component_writer; -#else //ACL_INTERNAL_TEST_CKW_IN_DF std::unique_ptr<GpuCkwActivation> _component_writer; -#endif //ACL_INTERNAL_TEST_CKW_IN_DF }; } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTACTIVATION */ +#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTACTIVATION_H diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp index b1636795a3..e1850d78c4 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -27,11 +27,7 @@ #include "src/core/CL/CLValidate.h" #include "src/dynamic_fusion/sketch/ArgumentPack.h" -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.h" -#else //ACL_INTERNAL_TEST_CKW_IN_DF #include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h" -#endif //ACL_INTERNAL_TEST_CKW_IN_DF namespace arm_compute { @@ -72,22 +68,16 @@ ClComponentCast::ClComponentCast(ComponentId id, const Attributes &attributes, const Settings &settings) : IGpuKernelComponent{id, properties, tensors}, -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - _component_writer{std::make_unique<ClTemplateCast>(id, tensors, attributes)} -#else //ACL_INTERNAL_TEST_CKW_IN_DF _component_writer{std::make_unique<GpuCkwCast>(id, tensors, attributes)} -#endif //ACL_INTERNAL_TEST_CKW_IN_DF { ARM_COMPUTE_UNUSED(attributes, settings); } + ClComponentCast::~ClComponentCast() { } -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -const IGpuTemplateComponentWriter *ClComponentCast::template_writer() const -#else //ACL_INTERNAL_TEST_CKW_IN_DF + const IGpuCkwComponentDriver *ClComponentCast::ckw_component_driver() const -#endif //ACL_INTERNAL_TEST_CKW_IN_DF { return _component_writer.get(); } diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h index ed77b1203b..201dacc288 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTCAST -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTCAST +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTCAST_H +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTCAST_H #include "arm_compute/dynamic_fusion/sketch/attributes/CastAttributes.h" @@ -49,11 +49,7 @@ private: }; /** Forward declaration */ -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -class ClTemplateCast; -#else //ACL_INTERNAL_TEST_CKW_IN_DF class GpuCkwCast; -#endif //ACL_INTERNAL_TEST_CKW_IN_DF class ClComponentCast final : public IGpuKernelComponent { @@ -120,11 +116,7 @@ public: /** Allow instances of this class to be moved */ ClComponentCast &operator=(ClComponentCast &&component) = default; /** Get writer for the component */ -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - const IGpuTemplateComponentWriter *template_writer() const override; -#else //ACL_INTERNAL_TEST_CKW_IN_DF const IGpuCkwComponentDriver *ckw_component_driver() const override; -#endif //ACL_INTERNAL_TEST_CKW_IN_DF /** Get component type */ GpuComponentType type() const override { @@ -132,14 +124,10 @@ public: } private: -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - std::unique_ptr<ClTemplateCast> _component_writer; -#else //ACL_INTERNAL_TEST_CKW_IN_DF - std::unique_ptr<GpuCkwCast> _component_writer; -#endif //ACL_INTERNAL_TEST_CKW_IN_DF + std::unique_ptr<GpuCkwCast> _component_writer; }; } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTCAST */ +#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTCAST_H diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp index ca8037c393..7cd23d6115 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -28,11 +28,7 @@ #include "arm_compute/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.h" #include "src/core/CL/CLValidate.h" -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h" -#else //ACL_INTERNAL_TEST_CKW_IN_DF #include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDepthwiseConv2d.h" -#endif //ACL_INTERNAL_TEST_CKW_IN_DF namespace arm_compute { @@ -212,22 +208,14 @@ ClComponentDepthwiseConv2d::ClComponentDepthwiseConv2d(ComponentId const Attributes &attributes, const Settings &settings) : IGpuKernelComponent{id, properties, tensors}, -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - _component_writer{std::make_unique<ClTemplateDepthwiseConv2d>(id, tensors, attributes, settings)} -#else //ACL_INTERNAL_TEST_CKW_IN_DF _component_writer{std::make_unique<GpuCkwDepthwiseConv2d>(id, tensors, attributes, settings)} -#endif //ACL_INTERNAL_TEST_CKW_IN_DF { ARM_COMPUTE_UNUSED(attributes, settings); } ClComponentDepthwiseConv2d::~ClComponentDepthwiseConv2d() { } -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -const IGpuTemplateComponentWriter *ClComponentDepthwiseConv2d::template_writer() const -#else //ACL_INTERNAL_TEST_CKW_IN_DF const IGpuCkwComponentDriver *ClComponentDepthwiseConv2d::ckw_component_driver() const -#endif //ACL_INTERNAL_TEST_CKW_IN_DF { return _component_writer.get(); } diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h index 01168e9ded..7526361f1c 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,11 +44,7 @@ class ArgumentPack; class DepthwiseConv2dAttributes; /** Forward declaration */ -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -class ClTemplateDepthwiseConv2d; -#else //ACL_INTERNAL_TEST_CKW_IN_DF class GpuCkwDepthwiseConv2d; -#endif //ACL_INTERNAL_TEST_CKW_IN_DF /** Component specific settings */ @@ -161,13 +157,8 @@ public: ClComponentDepthwiseConv2d(ClComponentDepthwiseConv2d &&component) = default; /** Allow instances of this class to be moved */ ClComponentDepthwiseConv2d &operator=(ClComponentDepthwiseConv2d &&component) = default; - /** Get template writer for the component */ -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - const IGpuTemplateComponentWriter *template_writer() const override; -#else //ACL_INTERNAL_TEST_CKW_IN_DF - const IGpuCkwComponentDriver *ckw_component_driver() const override; -#endif //ACL_INTERNAL_TEST_CKW_IN_DF - + /** Get writer for the component */ + const IGpuCkwComponentDriver *ckw_component_driver() const override; /** Get component type */ GpuComponentType type() const override { @@ -175,11 +166,7 @@ public: } private: -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - std::unique_ptr<ClTemplateDepthwiseConv2d> _component_writer; -#else //ACL_INTERNAL_TEST_CKW_IN_DF std::unique_ptr<GpuCkwDepthwiseConv2d> _component_writer; -#endif //ACL_INTERNAL_TEST_CKW_IN_DF }; } // namespace dynamic_fusion } // namespace experimental diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp index 98f3d6a882..783a17df30 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -28,12 +28,7 @@ #include "arm_compute/dynamic_fusion/sketch/attributes/Conv2dAttributes.h" #include "src/core/CL/CLValidate.h" - -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h" -#else // ACL_INTERNAL_TEST_CKW_IN_DF #include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDirectConv2d.h" -#endif // ACL_INTERNAL_TEST_CKW_IN_DF namespace arm_compute { @@ -153,11 +148,7 @@ ClComponentDirectConv2d::ClComponentDirectConv2d(ComponentId const Attributes &attributes, const Settings &settings) : IGpuKernelComponent{id, properties, tensors}, -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - _component_writer{std::make_unique<ClTemplateDirectConv2d>(id, tensors, attributes, settings)} -#else // ACL_INTERNAL_TEST_CKW_IN_DF _component_writer{std::make_unique<GpuCkwDirectConv2d>(id, tensors, attributes, settings)} -#endif // ACL_INTERNAL_TEST_CKW_IN_DF { } @@ -165,11 +156,7 @@ ClComponentDirectConv2d::~ClComponentDirectConv2d() { } -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -const IGpuTemplateComponentWriter *ClComponentDirectConv2d::template_writer() const -#else // ACL_INTERNAL_TEST_CKW_IN_DF const IGpuCkwComponentDriver *ClComponentDirectConv2d::ckw_component_driver() const -#endif // ACL_INTERNAL_TEST_CKW_IN_DF { return _component_writer.get(); } diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h index d6d9705d3c..c50b0fa0ce 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTDIRECTCONV2D -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTDIRECTCONV2D +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTDIRECTCONV2D_H +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTDIRECTCONV2D_H #include "arm_compute/core/Error.h" #include "arm_compute/core/KernelDescriptors.h" @@ -68,11 +68,7 @@ private: }; /** Forward declaration */ -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -class ClTemplateDirectConv2d; -#else // ACL_INTERNAL_TEST_CKW_IN_DF class GpuCkwDirectConv2d; -#endif // ACL_INTERNAL_TEST_CKW_IN_DF class ClComponentDirectConv2d final : public IGpuKernelComponent { @@ -139,11 +135,7 @@ public: /** Allow instances of this class to be moved */ ClComponentDirectConv2d &operator=(ClComponentDirectConv2d &&component) = default; /** Get writer for the component */ -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - const IGpuTemplateComponentWriter *template_writer() const override; -#else // ACL_INTERNAL_TEST_CKW_IN_DF - const IGpuCkwComponentDriver *ckw_component_driver() const override; -#endif // ACL_INTERNAL_TEST_CKW_IN_DF + const IGpuCkwComponentDriver *ckw_component_driver() const override; /** Get component type */ GpuComponentType type() const override { @@ -151,13 +143,9 @@ public: } private: -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - std::unique_ptr<ClTemplateDirectConv2d> _component_writer; -#else // ACL_INTERNAL_TEST_CKW_IN_DF std::unique_ptr<GpuCkwDirectConv2d> _component_writer; -#endif // ACL_INTERNAL_TEST_CKW_IN_DF }; } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTDIRECTCONV2D */ +#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTDIRECTCONV2D_H diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp index 5b136427e4..209c73dbee 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,11 +26,7 @@ #include "arm_compute/core/Validate.h" #include "src/core/CL/CLValidate.h" -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h" -#else //ACL_INTERNAL_TEST_CKW_IN_DF #include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.h" -#endif //ACL_INTERNAL_TEST_CKW_IN_DF namespace arm_compute { @@ -117,19 +113,11 @@ ClComponentElementwiseBinary::ClComponentElementwiseBinary(ComponentId const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes) : IGpuKernelComponent{id, properties, tensors}, -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - _component_writer{std::make_unique<ClTemplateElementwiseBinary>(id, tensors, attributes)} -#else //ACL_INTERNAL_TEST_CKW_IN_DF _component_writer{std::make_unique<GpuCkwElementwiseBinary>(id, tensors, attributes)} -#endif //ACL_INTERNAL_TEST_CKW_IN_DF { } -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -const IGpuTemplateComponentWriter *ClComponentElementwiseBinary::template_writer() const -#else //ACL_INTERNAL_TEST_CKW_IN_DF const IGpuCkwComponentDriver *ClComponentElementwiseBinary::ckw_component_driver() const -#endif //ACL_INTERNAL_TEST_CKW_IN_DF { return _component_writer.get(); } diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h index 7589b9732c..a4395a6219 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY -#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY_H +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY_H #include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h" #include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h" @@ -40,11 +40,7 @@ template <typename T> class ArgumentPack; /** Forward declaration */ -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -class ClTemplateElementwiseBinary; -#else //ACL_INTERNAL_TEST_CKW_IN_DF class GpuCkwElementwiseBinary; -#endif //ACL_INTERNAL_TEST_CKW_IN_DF class ClComponentElementwiseBinary final : public IGpuKernelComponent { @@ -105,12 +101,7 @@ public: /** Allow instances of this class to be moved */ ClComponentElementwiseBinary &operator=(ClComponentElementwiseBinary &&component) = default; /** Get writer for the component */ -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - const IGpuTemplateComponentWriter *template_writer() const override; -#else //ACL_INTERNAL_TEST_CKW_IN_DF - const IGpuCkwComponentDriver *ckw_component_driver() const override; -#endif //ACL_INTERNAL_TEST_CKW_IN_DF - + const IGpuCkwComponentDriver *ckw_component_driver() const override; /** Get component type */ GpuComponentType type() const override { @@ -118,13 +109,9 @@ public: } private: -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - std::unique_ptr<ClTemplateElementwiseBinary> _component_writer; -#else //ACL_INTERNAL_TEST_CKW_IN_DF std::unique_ptr<GpuCkwElementwiseBinary> _component_writer; -#endif //ACL_INTERNAL_TEST_CKW_IN_DF }; } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif /* ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY */ +#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY_H diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp deleted file mode 100644 index 27c13bd654..0000000000 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/dynamic_fusion/sketch/attributes/SoftmaxAttributes.h" - -#include "src/core/CL/CLValidate.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -Status ClComponentLogits1DMaxShiftExpSum::validate(const Properties &properties, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes) -{ - ARM_COMPUTE_UNUSED(properties, attributes); - - const ITensorInfo *src = tensors.get_const_tensor(TensorType::ACL_SRC_0); - const ITensorInfo *sum = tensors.get_const_tensor(TensorType::ACL_DST_0); - const ITensorInfo *dst = tensors.get_const_tensor(TensorType::ACL_DST_1); - - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src); - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(sum); - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(dst); - - // 1. Check validity - // All tensor infos are initialized - ARM_COMPUTE_RETURN_ERROR_ON(src->tensor_shape().total_size() == 0); - ARM_COMPUTE_RETURN_ERROR_ON(sum->tensor_shape().total_size() == 0); - ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0); - - // Check for mismatches in shapes and data types - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst, sum); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(src, dst); - - // Device requirements are met - ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src); - - // 2. Check support level - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32); - - return Status{}; -} - -ClComponentLogits1DMaxShiftExpSum::ClComponentLogits1DMaxShiftExpSum(ComponentId id, - const Properties &properties, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes) - : IGpuKernelComponent{id, properties, tensors}, - _component_writer{std::make_unique<ClTemplateLogits1DMaxShiftExpSum>(id, tensors, attributes)} -{ -} - -ClComponentLogits1DMaxShiftExpSum::~ClComponentLogits1DMaxShiftExpSum() -{ -} - -const IGpuTemplateComponentWriter *ClComponentLogits1DMaxShiftExpSum::template_writer() const -{ - return _component_writer.get(); -} -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h deleted file mode 100644 index 91ab5de3b5..0000000000 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTLOGITS1DMAXSHIFTEXPSUM -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTLOGITS1DMAXSHIFTEXPSUM - -#include "arm_compute/dynamic_fusion/sketch/attributes/SoftmaxAttributes.h" - -#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h" - -namespace arm_compute -{ -/** Forward declaration */ -class ITensorInfo; -namespace experimental -{ -namespace dynamic_fusion -{ -/** Forward declaration */ -template <typename T> -class ArgumentPack; - -/** Forward declaration */ -class ClTemplateLogits1DMaxShiftExpSum; - -/** Component to calculate max-shifted exponentials and their sum - * - * 1D example: - * input: [x1, x2, ... , xn], shape: (1 x d) - * - * Let max(x1...xn) = m - * - * (output) sum: [exp(x1-m) + ... + exp(xn-m)], shape: (1 x 1) - * (output) dst: [exp(x1-m) ... exp(xn-m)], shape: (1 x d) - * - * This component is used by the softmax operator. The subsequent - * operation normalizes dst with sum, therefore the max-shifting - * since exp(m) will be cancelled in numerator and denominator. -*/ -class ClComponentLogits1DMaxShiftExpSum final : public IGpuKernelComponent -{ -public: - /** Attributes are a set of backend-agnostic parameters that define what a component does */ - using Attributes = SoftmaxAttributes; - - /** Validate the component - * - * @param[in] properties Component properties @ref Properties - * @param[in] tensors Tensor arguments to the component - * @param[in] attributes Component attributes @ref Attributes - * - * @return Status Validation results - * - * Tensor argument names: - * - ACL_SRC_0: Input - * - ACL_DST_0: Output - * - ACL_DST_1: Output - * - * Tensor argument constness: - * - ACL_SRC_0: Const - * - ACL_DST_0: Const - * - ACL_DST_1: Const - * - * Valid data layouts: - * - All - * - ** Valid data type configurations: - * |ACL_SRC_0 |ACL_DST_0 |ACL_DST_1 | - * |:----------|:----------|:----------| - * |F16 | F16 | F16 | - * |F32 | F32 | F32 | - */ - static Status - validate(const Properties &properties, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes); - - /** Constructor - * - * Similar to @ref ClComponentLogits1DMaxShiftExpSum::validate() - */ - ClComponentLogits1DMaxShiftExpSum(ComponentId id, - const Properties &properties, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes); - - /** Destructor */ - ~ClComponentLogits1DMaxShiftExpSum() override; - /** Prevent instances of this class from being copy constructed */ - ClComponentLogits1DMaxShiftExpSum(const ClComponentLogits1DMaxShiftExpSum &component) = delete; - /** Prevent instances of this class from being copied */ - ClComponentLogits1DMaxShiftExpSum &operator=(const ClComponentLogits1DMaxShiftExpSum &component) = delete; - /** Allow instances of this class to be move constructed */ - ClComponentLogits1DMaxShiftExpSum(ClComponentLogits1DMaxShiftExpSum &&component) = default; - /** Allow instances of this class to be moved */ - ClComponentLogits1DMaxShiftExpSum &operator=(ClComponentLogits1DMaxShiftExpSum &&component) = default; - /** Get template writer for the component */ - const IGpuTemplateComponentWriter *template_writer() const override; - /** Get component type */ - GpuComponentType type() const override - { - return GpuComponentType::Unfusable; - } - -private: - std::unique_ptr<ClTemplateLogits1DMaxShiftExpSum> _component_writer; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute - -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTLOGITS1DMAXSHIFTEXPSUM */ diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.cpp deleted file mode 100644 index fb2544385c..0000000000 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.cpp +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/dynamic_fusion/sketch/attributes/SoftmaxAttributes.h" - -#include "src/core/CL/CLValidate.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -Status ClComponentLogits1DNorm::validate(const Properties &properties, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes) -{ - ARM_COMPUTE_UNUSED(properties, attributes); - - const ITensorInfo *src = tensors.get_const_tensor(TensorType::ACL_SRC_0); - const ITensorInfo *sum = tensors.get_const_tensor(TensorType::ACL_SRC_1); - const ITensorInfo *dst = tensors.get_const_tensor(TensorType::ACL_DST_0); - - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src); - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(sum); - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(dst); - - // 1. Check validity - // All tensor infos are initialized - ARM_COMPUTE_RETURN_ERROR_ON(src->tensor_shape().total_size() == 0); - ARM_COMPUTE_RETURN_ERROR_ON(sum->tensor_shape().total_size() == 0); - ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0); - - // Check for mismatches in shapes and data types - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst, sum); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(src, dst); - - ARM_COMPUTE_RETURN_ERROR_ON(attributes.is_log_softmax() && !is_data_type_float(src->data_type())); - - // Device requirements are met - ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src); - - // 2. Check support level - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32); - - return Status{}; -} - -ClComponentLogits1DNorm::ClComponentLogits1DNorm(ComponentId id, - const Properties &properties, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes) - : IGpuKernelComponent{id, properties, tensors}, - _component_writer{std::make_unique<ClTemplateLogits1DNorm>(id, tensors, attributes)} -{ -} - -ClComponentLogits1DNorm::~ClComponentLogits1DNorm() -{ -} - -const IGpuTemplateComponentWriter *ClComponentLogits1DNorm::template_writer() const -{ - return _component_writer.get(); -} -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h deleted file mode 100644 index 74c0273604..0000000000 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTLOGITS1DNORM -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTLOGITS1DNORM - -#include "arm_compute/dynamic_fusion/sketch/attributes/SoftmaxAttributes.h" - -#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h" - -namespace arm_compute -{ -/** Forward declaration */ -class ITensorInfo; -namespace experimental -{ -namespace dynamic_fusion -{ -/** Forward declaration */ -template <typename T> -class ArgumentPack; - -/** Forward declaration */ -class ClTemplateLogits1DNorm; - -/** Component to calculate the final step of the Softmax Layer - * where each logit value is multiplied by the inverse of the sum of the logits. - * - * 1D example: - * - * (input) src: [x1 x2 ... xn], shape: (1 x d) - * (input) sum: [x1 + x2 + ... + xn], shape: (1 x 1) - * (output) dst: [x1/sum x2/sum ... xn/sum], shape: (1 x d) - * - * This component is used by the softmax operator to get the final result. -*/ -class ClComponentLogits1DNorm final : public IGpuKernelComponent -{ -public: - /** Attributes are a set of backend-agnostic parameters that define what a component does */ - using Attributes = SoftmaxAttributes; - - /** Validate the component - * - * @param[in] properties Component properties @ref Properties - * @param[in] tensors Tensor arguments to the component - * @param[in] attributes Component attributes @ref Attributes - * - * @return Status Validation results - * - * Tensor argument names: - * - ACL_SRC_0: Input - * - ACL_SRC_1: Input - * - ACL_DST_0: Output - * - * Tensor argument constness: - * - ACL_SRC_0: Const - * - ACL_SRC_1: Const - * - ACL_DST_0: Const - * - * Valid data layouts: - * - All - * - ** Valid data type configurations: - * |ACL_SRC_0 |ACL_SRC_1 |ACL_DST_0 | - * |:----------|:----------|:----------| - * |F16 | F16 | F16 | - * |F32 | F32 | F32 | - */ - static Status - validate(const Properties &properties, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes); - - /** Constructor - * - * Similar to @ref ClComponentLogits1DNorm::validate() - */ - ClComponentLogits1DNorm(ComponentId id, - const Properties &properties, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes); - - /** Destructor */ - ~ClComponentLogits1DNorm() override; - /** Prevent instances of this class from being copy constructed */ - ClComponentLogits1DNorm(const ClComponentLogits1DNorm &component) = delete; - /** Prevent instances of this class from being copied */ - ClComponentLogits1DNorm &operator=(const ClComponentLogits1DNorm &component) = delete; - /** Allow instances of this class to be move constructed */ - ClComponentLogits1DNorm(ClComponentLogits1DNorm &&component) = default; - /** Allow instances of this class to be moved */ - ClComponentLogits1DNorm &operator=(ClComponentLogits1DNorm &&component) = default; - /** Get template writer for the component */ - const IGpuTemplateComponentWriter *template_writer() const override; - /** Get component type */ - GpuComponentType type() const override - { - return GpuComponentType::Unfusable; - } - -private: - std::unique_ptr<ClTemplateLogits1DNorm> _component_writer; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute - -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTLOGITS1DNORM */ diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp index f238d42d98..53ac8da41f 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,7 +21,6 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifdef ACL_INTERNAL_TEST_CKW_IN_DF #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.h" @@ -147,5 +146,3 @@ const IGpuCkwComponentDriver *ClComponentMatMul::ckw_component_driver() const } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute - -#endif // ACL_INTERNAL_TEST_CKW_IN_DF diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp index 5544963b3f..6e7243dc04 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp @@ -30,7 +30,6 @@ #include "src/core/CL/CLValidate.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h" #include "src/dynamic_fusion/utils/Utils.h" #include <memory> @@ -93,27 +92,16 @@ ClComponentPool2d::ClComponentPool2d(ComponentId id, const Attributes &attributes, const Settings &settings) : IGpuKernelComponent{id, properties, tensors}, -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - _component_writer{std::make_unique<ClTemplatePool2d>(id, tensors, attributes, settings)} -#else //ACL_INTERNAL_TEST_CKW_IN_DF _component_writer{std::make_unique<GpuCkwPool2d>(id, tensors, attributes, settings)} -#endif //ACL_INTERNAL_TEST_CKW_IN_DF { } ClComponentPool2d::~ClComponentPool2d() { } -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -const IGpuTemplateComponentWriter *ClComponentPool2d::template_writer() const -{ - return _component_writer.get(); -} -#else //ACL_INTERNAL_TEST_CKW_IN_DF const IGpuCkwComponentDriver *ClComponentPool2d::ckw_component_driver() const { return _component_writer.get(); } -#endif //ACL_INTERNAL_TEST_CKW_IN_DF } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h index 98fed65004..d33e601f18 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -42,11 +42,7 @@ class ArgumentPack; class Pool2dAttributes; /** Forward declaration */ -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -class ClTemplatePool2d; -#else // ACL_INTERNAL_TEST_CKW_IN_DF class GpuCkwPool2d; -#endif // ACL_INTERNAL_TEST_CKW_IN_DF class ClComponentPool2d final : public IGpuKernelComponent { @@ -116,13 +112,9 @@ public: /** Allow instances of this class to be moved */ ClComponentPool2d &operator=(ClComponentPool2d &&component) = default; -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - /** Get template writer for the component */ - const IGpuTemplateComponentWriter *template_writer() const override; -#else // ACL_INTERNAL_TEST_CKW_IN_DF + /** Get GPU kernel writer for the component */ const IGpuCkwComponentDriver *ckw_component_driver() const override; -#endif // ACL_INTERNAL_TEST_CKW_IN_DF /** Get component type */ GpuComponentType type() const override @@ -131,11 +123,7 @@ public: } private: -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - std::unique_ptr<ClTemplatePool2d> _component_writer; -#else // ACL_INTERNAL_TEST_CKW_IN_DF std::unique_ptr<GpuCkwPool2d> _component_writer; -#endif // ACL_INTERNAL_TEST_CKW_IN_DF }; } // namespace dynamic_fusion } // namespace experimental diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp index 0ece9de970..dce85c424e 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -27,7 +27,6 @@ #include "arm_compute/core/Validate.h" #include "src/core/CL/CLValidate.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.h" namespace arm_compute { @@ -54,15 +53,16 @@ Status ClComponentReshape::validate(const ArgumentPack<ITensorInfo> &tensors) ClComponentReshape::ClComponentReshape(ComponentId id, const Properties &properties, const ArgumentPack<ITensorInfo> &tensors) - : IGpuKernelComponent{id, properties, tensors}, _component_writer{std::make_unique<ClTemplateReshape>(id, tensors)} + : IGpuKernelComponent{id, properties, tensors} { } ClComponentReshape::~ClComponentReshape() { } -const IGpuTemplateComponentWriter *ClComponentReshape::template_writer() const +const IGpuCkwComponentDriver *ClComponentReshape::ckw_component_driver() const { - return _component_writer.get(); + /* NOT IMPLEMENTED */ + return nullptr; } } // namespace dynamic_fusion } // namespace experimental diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h index 78163d6603..fd0f966da1 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESHAPE -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESHAPE +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESHAPE_H +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESHAPE_H #include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h" @@ -85,8 +85,8 @@ public: ClComponentReshape(ClComponentReshape &&component) = default; /** Allow instances of this class to be moved */ ClComponentReshape &operator=(ClComponentReshape &&component) = default; - /** Get template writer for the component */ - const IGpuTemplateComponentWriter *template_writer() const override; + /** Get writer for the component */ + const IGpuCkwComponentDriver *ckw_component_driver() const override; /** Get component type */ GpuComponentType type() const override { @@ -94,10 +94,9 @@ public: } private: - std::unique_ptr<ClTemplateReshape> _component_writer; }; } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESHAPE */ +#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESHAPE_H diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp index b05eb04698..411eeca802 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,12 +29,7 @@ #include "src/core/CL/CLValidate.h" #include "src/core/utils/ScaleUtils.h" #include "src/dynamic_fusion/sketch/ArgumentPack.h" - -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.h" -#else // ACL_INTERNAL_TEST_CKW_IN_DF #include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwResize.h" -#endif // ACL_INTERNAL_TEST_CKW_IN_DF namespace arm_compute { @@ -43,11 +38,7 @@ namespace experimental namespace dynamic_fusion { /** Forward declaration */ -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -class ClTemplateResize; -#else // ACL_INTERNAL_TEST_CKW_IN_DF class GpuCkwResize; -#endif // ACL_INTERNAL_TEST_CKW_IN_DF Status ClComponentResize::validate(const IGpuKernelComponent::Properties &properties, const ArgumentPack<ITensorInfo> &tensors, @@ -82,11 +73,7 @@ ClComponentResize::ClComponentResize(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const ClComponentResize::Attributes &attributes) : IGpuKernelComponent{id, properties, tensors}, -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - _component_writer{std::make_unique<ClTemplateResize>(id, tensors, attributes)} -#else // ACL_INTERNAL_TEST_CKW_IN_DF _component_writer{std::make_unique<GpuCkwResize>(id, tensors, attributes)} -#endif // ACL_INTERNAL_TEST_CKW_IN_DF { } @@ -94,11 +81,7 @@ ClComponentResize::~ClComponentResize() { } -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -const IGpuTemplateComponentWriter *ClComponentResize::template_writer() const -#else // ACL_INTERNAL_TEST_CKW_IN_DF const IGpuCkwComponentDriver *ClComponentResize::ckw_component_driver() const -#endif // ACL_INTERNAL_TEST_CKW_IN_DF { return _component_writer.get(); } diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.h index 29276c3257..9a1169c45f 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.h +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,8 +22,8 @@ * SOFTWARE. */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESIZE -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESIZE +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESIZE_H +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESIZE_H #include "arm_compute/dynamic_fusion/sketch/attributes/ResizeAttributes.h" @@ -42,11 +42,7 @@ template <typename T> class ArgumentPack; /** Forward declaration */ -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -class ClTemplateResize; -#else // ACL_INTERNAL_TEST_CKW_IN_DF class GpuCkwResize; -#endif // ACL_INTERNAL_TEST_CKW_IN_DF class ClComponentResize final : public IGpuKernelComponent { @@ -111,11 +107,7 @@ public: ClComponentResize &operator=(ClComponentResize &&component) = default; /** Get writer for the component */ -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - const IGpuTemplateComponentWriter *template_writer() const override; -#else // ACL_INTERNAL_TEST_CKW_IN_DF const IGpuCkwComponentDriver *ckw_component_driver() const override; -#endif // ACL_INTERNAL_TEST_CKW_IN_DF /** Get component type */ GpuComponentType type() const override @@ -124,15 +116,11 @@ public: } private: -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - std::unique_ptr<ClTemplateResize> _component_writer; -#else // ACL_INTERNAL_TEST_CKW_IN_DF std::unique_ptr<GpuCkwResize> _component_writer; -#endif // ACL_INTERNAL_TEST_CKW_IN_DF }; } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESIZE */ +#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESIZE_H diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp index dcbecaff35..3db6c5cd2d 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,11 +24,7 @@ #include "ClComponentStore.h" #include "src/dynamic_fusion/sketch/ArgumentPack.h" -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h" -#else //ACL_INTERNAL_TEST_CKW_IN_DF #include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h" -#endif //ACL_INTERNAL_TEST_CKW_IN_DF #include <memory> @@ -46,22 +42,13 @@ Status ClComponentStore::validate(const Properties &properties, const ArgumentPa ClComponentStore::ClComponentStore(ComponentId id, const Properties &properties, const ArgumentPack<ITensorInfo> &tensors) - : IGpuKernelComponent{id, properties, tensors}, -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - _component_writer{std::make_unique<ClTemplateStore>(id, tensors)} -#else //ACL_INTERNAL_TEST_CKW_IN_DF - _component_writer{std::make_unique<GpuCkwStore>(id, tensors)} -#endif //ACL_INTERNAL_TEST_CKW_IN_DF + : IGpuKernelComponent{id, properties, tensors}, _component_writer{std::make_unique<GpuCkwStore>(id, tensors)} { } ClComponentStore::~ClComponentStore() { } -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -const IGpuTemplateComponentWriter *ClComponentStore::template_writer() const -#else //ACL_INTERNAL_TEST_CKW_IN_DF const IGpuCkwComponentDriver *ClComponentStore::ckw_component_driver() const -#endif //ACL_INTERNAL_TEST_CKW_IN_DF { return _component_writer.get(); } diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h index 948785c480..2c1dd0f6fc 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTSTORE -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTSTORE +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTSTORE_H +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTSTORE_H #include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h" @@ -39,11 +39,7 @@ namespace dynamic_fusion /** Forward declaration */ template <typename T> class ArgumentPack; -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -class ClTemplateStore; -#else //ACL_INTERNAL_TEST_CKW_IN_DF class GpuCkwStore; -#endif //ACL_INTERNAL_TEST_CKW_IN_DF class ClComponentStore final : public IGpuKernelComponent { @@ -88,11 +84,7 @@ public: /** Allow instances of this class to be moved */ ClComponentStore &operator=(ClComponentStore &&component) = default; /** Get writer for the component */ -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - const IGpuTemplateComponentWriter *template_writer() const override; -#else //ACL_INTERNAL_TEST_CKW_IN_DF const IGpuCkwComponentDriver *ckw_component_driver() const override; -#endif //ACL_INTERNAL_TEST_CKW_IN_DF /** Get component type */ GpuComponentType type() const override { @@ -100,13 +92,9 @@ public: } private: -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - std::unique_ptr<ClTemplateStore> _component_writer; -#else //ACL_INTERNAL_TEST_CKW_IN_DF - std::unique_ptr<GpuCkwStore> _component_writer; -#endif //ACL_INTERNAL_TEST_CKW_IN_DF + std::unique_ptr<GpuCkwStore> _component_writer; }; } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTSTORE */ +#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTSTORE_H diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp index 697b7d4e1f..4d6e7f81bb 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,7 +30,6 @@ #include "src/dynamic_fusion/sketch/ArgumentPack.h" #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h" #include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h" namespace arm_compute { diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuMatMul.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuMatMul.cpp index e24629a036..2997b28ec1 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuMatMul.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuMatMul.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,7 +21,6 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifdef ACL_INTERNAL_TEST_CKW_IN_DF #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMatMul.h" @@ -244,4 +243,3 @@ ITensorInfo *GpuMatMul::create_op(GpuWorkloadSketch &sketch, } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif // ACL_INTERNAL_TEST_CKW_IN_DF diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp index 431c9110fc..d385752201 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp @@ -28,8 +28,6 @@ #include "src/common/utils/Log.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/dynamic_fusion/sketch/ArgumentPack.h" -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h" -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h" #include "src/dynamic_fusion/sketch/gpu/GpuOperatorProperties.h" #include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h" @@ -88,9 +86,8 @@ Status GpuSoftmax::is_supported_op(const GpuWorkloadContext &context, arguments_norm.add_const_tensor(ACL_SRC_1, &sum); arguments_norm.add_const_tensor(ACL_DST_0, &dst_info_to_validate); - ARM_COMPUTE_RETURN_ON_ERROR( - ClComponentLogits1DMaxShiftExpSum::validate(properties, arguments_exp_sum, attributes)); - ARM_COMPUTE_RETURN_ON_ERROR(ClComponentLogits1DNorm::validate(properties, arguments_norm, attributes)); + ARM_COMPUTE_UNUSED(properties, attributes); + return Status(ErrorCode::RUNTIME_ERROR, "GpuSoftmax is not implemented"); } else { @@ -177,8 +174,8 @@ void GpuSoftmax::create_op(GpuWorkloadSketch &sketch, ITensorInfo *src, ITensorI arguments_norm.add_const_tensor(ACL_SRC_1, sum); arguments_norm.add_const_tensor(ACL_DST_0, dst); - comp_graph.add_new_component<ClComponentLogits1DMaxShiftExpSum>(properties, arguments_exp_sum, attributes); - comp_graph.add_new_component<ClComponentLogits1DNorm>(properties, arguments_norm, attributes); + // Add to component graph -- NOT IMPLEMENTED + ARM_COMPUTE_UNUSED(comp_graph, attributes); } } else diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp index bf0f274c5c..b9d01966b3 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -31,7 +31,6 @@ #include "src/dynamic_fusion/sketch/ArgumentPack.h" #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h" #include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h" namespace arm_compute { diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp deleted file mode 100644 index 775b0a0c8c..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "GpuKernelVariableTable.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/ITensorInfo.h" - -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -void GpuKernelVariableTable::declare_variable(const GpuKernelComponentGroup &comp_group, - const ITensorInfo *tensor, - GpuKernelArgumentInfo argument_info, - const std::string &alias) -{ - ARM_COMPUTE_ERROR_ON_MSG(!tensor->has_valid_id(), "Tensor info with valid id expected"); - - // Do not re-declare if the variable associated with the tensor has already been declared - auto it = _vars.find(tensor->id()); - - if (it != _vars.end()) - { - ARM_COMPUTE_ERROR_ON(!(it->second.kernel_argument_info == argument_info)); - return; - } - - const auto target = comp_group.get_tile_for_tensor(tensor); - - if (target != tensor) - { - // If the tensor uses a shared tile, don't declare another variable. - it = _vars.find(target->id()); - - ARM_COMPUTE_ERROR_ON_MSG(it == _vars.end(), "The variable used for this tensor must have been declared."); - - _vars[tensor->id()] = it->second; - } - else - { - // Declare variable associated with the tensor - std::stringstream ss; - ss << alias << "_t" << abs(tensor->id()); - const auto uniq_name = ss.str(); - TensorVariable var{tensor->id(), uniq_name, argument_info}; - - _vars.emplace(tensor->id(), var); - } -} - -GpuKernelVariableTable::TensorVariable GpuKernelVariableTable::get_variable(const ITensorInfo *tensor) const -{ - const auto var = _vars.at(tensor->id()); - return var; -} - -GpuKernelVariableTable::VariableList -GpuKernelVariableTable::get_variable_list(const std::vector<const ITensorInfo *> &tensors) const -{ - VariableList vars{}; - for (const auto &tensor : tensors) - { - if (!tensor->has_valid_id()) - { - continue; - } - vars.push_back(get_variable(tensor)); - } - return vars; -} - -TagVal::TagVal(const GpuKernelVariableTable::TensorVariable &var) : value{var.uniq_name} -{ -} - -TagVal::TagVal(const std::string &val) : value{val} -{ -} - -TagVal::TagVal(const char *val) : value{std::string(val)} -{ -} - -TagVal::TagVal(const DataType &data_type) : value{get_cl_type_from_data_type(data_type)} -{ -} -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h b/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h deleted file mode 100644 index c17f131ada..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_GPUKERNELVARIABLETABLE -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_GPUKERNELVARIABLETABLE - -#include "arm_compute/core/ITensorInfo.h" - -#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" -#include "support/AclRequires.h" -#include "support/StringSupport.h" - -#include <set> -#include <string> -#include <type_traits> -#include <unordered_map> - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class GpuKernelComponentGroup; - -/** A table of all the variables used in the kernel. - * Each kernel has exactly one variable table. - */ -class GpuKernelVariableTable -{ -public: - /** A tensor variable whose main purposes are: - * - Hold the newly assigned @ref GpuKernelArgumentInfo for the associated tensor info - * - Hold the generated variable name for the associated tensor info - */ - struct TensorVariable - { - public: - TensorVariable() = default; - TensorVariable(const TensorVariable &) = default; - TensorVariable &operator=(const TensorVariable &) = default; - ITensorInfo::Id id{ITensorInfo::invalid_tensor_id}; - std::string uniq_name{"empty"}; // Unique name, also the final variable name used in the built code - GpuKernelArgumentInfo kernel_argument_info{}; - bool has_valid_id() const - { - return id != ITensorInfo::invalid_tensor_id; - } - }; - using VariableList = std::vector<TensorVariable>; - -public: - /** Declare a @ref TensorVariable for a corresponding tensor info. - * - * @param[in] comp_group Component group the tensor belongs to - * @param[in] tensor Tensor info with which the new variable is associated - * @param[in] argument_info Kernel argument information - * @param[in] alias Alias for the variable. Will be used as part of the variable name - */ - void declare_variable(const GpuKernelComponentGroup &comp_group, - const ITensorInfo *tensor, - GpuKernelArgumentInfo argument_info, - const std::string &alias = "unnamed"); - /** Get the @ref TensorVariable associated with @p tensor - * - * @param[in] tensor Tensor info to be queried - * - * @return TensorVariable - */ - TensorVariable get_variable(const ITensorInfo *tensor) const; - /** Get the @ref TensorVariable list associated with @p tensors - * @note Empty tensors are skipped - * - * @param[in] tensors List of tensor infos to be queried - * - * @return VariableList - */ - VariableList get_variable_list(const std::vector<const ITensorInfo *> &tensors) const; - -private: - std::map<ITensorInfo::Id, TensorVariable> _vars{}; -}; - -/** A tag value will substitute a tag in a string template during its instantiation */ -struct TagVal -{ - /** Default constructor */ - TagVal() = default; - /** Construct a @ref TagVal from a @ref GpuKernelVariableTable::TensorVariable */ - TagVal(const GpuKernelVariableTable::TensorVariable &var); - /** Construct a @ref TagVal from an integral type */ - template <typename T, ARM_COMPUTE_REQUIRES_TA(std::is_integral<T>::value)> - TagVal(T val) : value{support::cpp11::to_string(val)} - { - } - /** Construct a @ref TagVal from a string */ - TagVal(const std::string &val); - /** Construct a @ref TagVal from a c-style string */ - TagVal(const char *val); - /** Construct a @ref TagVal from a @ref DataType */ - TagVal(const DataType &data_type); - /** Get the value of the TagVal as a converted string */ - std::string value{}; -}; - -/** A tag used in a string template is a placeholder string to be substituted by real values during template instantiation */ -using Tag = std::string; - -/** Tag lookup table. It is used to instantiate a string template */ -using TagLUT = std::unordered_map<Tag, TagVal>; - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_GPUKERNELVARIABLETABLE */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h b/src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h deleted file mode 100644 index 9d0b4f592a..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_IGPUTEMPLATECOMPONENTWRITER -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_IGPUTEMPLATECOMPONENTWRITER - -#include "arm_compute/core/CL/CLCompileContext.h" -#include "arm_compute/core/ITensorInfo.h" -#include "arm_compute/core/Window.h" - -#include "src/dynamic_fusion/sketch/ArgumentPack.h" -#include "src/dynamic_fusion/sketch/gpu/components/Types.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -/** Forward declaration */ -class GpuKernelComponentGroup; -class GpuKernelVariableTable; - -/** An interface used by @ref ClTemplateWriter to write source code for a kernel component - */ -class IGpuTemplateComponentWriter -{ -public: - using ComponentGroup = GpuKernelComponentGroup; - - /**For now all kernel intermeditate/destination tensors are expected to be of type Tensor_4D_t_Buffer*/ - static constexpr GpuKernelArgumentInfo::Type common_tensor_type = GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer; - -public: - /** Constructor - * - * @param[in] id Component id - * @param[in] tensors Tensor arguments to the components - */ - IGpuTemplateComponentWriter(ComponentId id, const ArgumentPack<ITensorInfo> &tensors) : _id{id}, _tensors{tensors} - { - } - /** Destructor */ - virtual ~IGpuTemplateComponentWriter() - { - } - /** Generate kernel component name */ - virtual std::string get_name() const = 0; - /** Generate kernel component code template - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return std::string Component code - */ - virtual std::string get_component_code(const ComponentGroup &comp_group) const = 0; - /** Declare all variables used by the component in the @p vtable - * - * @param[out] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - */ - virtual void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const = 0; - /** Generate the tag look-up table used to instantiate the component code. - * - * @param[in] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - * - * @return TagLUT Tag lookup table - */ - virtual TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const = 0; - /** Generate additional macros used in the component */ - virtual std::string get_additional_macros() const - { - return ""; - } - /** Generate the build options used in the component - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return CLBuildOptions Build options - */ - virtual CLBuildOptions get_build_options(const ComponentGroup &comp_group) const - { - ARM_COMPUTE_UNUSED(comp_group); - return CLBuildOptions{}; - } - /** Generate the component config id string used for tuning */ - virtual std::string get_config_id() const - { - return ""; - } - /** Generate the header list used in the component */ - virtual std::set<std::string> get_headers_list() const - { - return std::set<std::string>{}; - } - /** Generate the execution window for the component */ - virtual Window get_window() const - { - return Window{}; - } - /** Get tensor arguments */ - ArgumentPack<ITensorInfo> tensors() const - { - return _tensors; - } - /** Get component id */ - ComponentId id() const - { - return _id; - } - -private: - ComponentId _id{-1}; - ArgumentPack<ITensorInfo> _tensors{}; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_IGPUTEMPLATECOMPONENTWRITER */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp deleted file mode 100644 index c165fb5f33..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp +++ /dev/null @@ -1,181 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "ClTemplateActivation.h" - -#include "arm_compute/core/utils/ActivationFunctionUtils.h" -#include "arm_compute/core/utils/helpers/AdjustVecSize.h" -#include "arm_compute/core/utils/StringUtils.h" - -#include "src/core/helpers/WindowHelpers.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" -#include "support/StringSupport.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -ClTemplateActivation::ClTemplateActivation(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes) - : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}, _attributes{attributes} -{ - _src = this->tensors().get_const_tensor(TensorType::ACL_SRC); - _dst = this->tensors().get_const_tensor(TensorType::ACL_DST); - ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst); -} - -std::string ClTemplateActivation::get_name() const -{ - return "activation"; -} - -std::string ClTemplateActivation::get_component_code(const ComponentGroup &comp_group) const -{ - std::string code; - const bool is_root = (comp_group.get_root_component()->id() == this->id()); - - code = R"_( -//------------------ START KERNEL {{meta_kernel_id}} --------------------- -)_"; - if (is_root) - { - code += R"_( -// IN(src) {{src}} -// OUT(dst, accum) {{dst}} - -TILE({{DATA_TYPE}}, M0, N0, {{src}}); -TILE(uint, M0, 1, g_dst_indirect_y); -{ - {{src}}_offset_first_element_in_bytes += g_ind_2 * {{src}}_stride_z; - - T_LOAD({{DATA_TYPE}}, M0, N0, {{TENSOR_TYPE}}, {{src}}, g_ind_0, g_ind_1, 1, {{src}}_stride_y, {{src}}); - - T_ACTIVATION({{DATA_TYPE}}, M0, N0, {{ACT}}, {{A_VAL}}, {{B_VAL}}, {{src}}, {{dst}}); -} - -LOOP_UNROLLING(int, i, 0, 1, M0, -{ - g_dst_indirect_y[i].v = (uint)min((int)(g_ind_1 + i), (int)({{arg_dst}}_w) - 1); - g_dst_indirect_y[i].v += (int)(g_ind_2 % {{arg_dst}}_h) * (int)({{arg_dst}}_w); - g_dst_indirect_y[i].v += (int)(g_ind_2 / {{arg_dst}}_h) * (int)({{arg_dst}}_w * {{arg_dst}}_h); -}) -)_"; - } - else - { - code += R"_( -// IN/OUT(src, accum) {{src}} - -{ - T_ACTIVATION({{DATA_TYPE}}, M0, N0, {{ACT}}, {{A_VAL}}, {{B_VAL}}, {{src}}, {{dst}}); -} -)_"; - } - code += R"_( -//------------------ END KERNEL {{meta_kernel_id}} --------------------- -)_"; - return code; -} - -void ClTemplateActivation::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), - "src"); - - vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), - "dst"); -} - -TagLUT ClTemplateActivation::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - TagLUT lut{}; - // Arguments and global shared variables - lut["src"] = vtable.get_variable(_src); - lut["dst"] = vtable.get_variable(_dst); - - const auto dst_argument = vtable.get_variable(comp_group.get_any_dst_tensor()); - lut["arg_dst"] = dst_argument.uniq_name; - - // Local build options - lut["meta_kernel_id"] = id(); - lut["DATA_TYPE"] = get_cl_type_from_data_type(_src->data_type()); - lut["TENSOR_TYPE"] = "BUFFER"; - - const auto f_act = lower_string(string_from_activation_func(_attributes.activation())); - - lut["ACT"] = f_act; - lut["A_VAL"] = float_to_string_with_full_precision(_attributes.a()); - lut["B_VAL"] = float_to_string_with_full_precision(_attributes.b()); - - return lut; -} - -CLBuildOptions ClTemplateActivation::get_build_options(const ComponentGroup &comp_group) const -{ - /// NOTE: For now tile sizes (n0, m0) are set by the execution window. This may change in the future - const auto root_window = comp_group.get_root_component()->template_writer()->get_window(); - const unsigned int n0 = root_window.x().step(); - const unsigned int m0 = root_window.y().step(); - const unsigned int partial_store_n0 = _dst->dimension(0) % n0; - - CLBuildOptions build_opts; - build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); - build_opts.add_option("-DM0=" + support::cpp11::to_string(m0)); - build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0)); - - return build_opts; -} - -std::string ClTemplateActivation::get_config_id() const -{ - std::string config_id{}; - config_id += "activation_"; - config_id += lower_string(string_from_data_type(_src->data_type())); - config_id += "_"; - config_id += support::cpp11::to_string(_src->dimension(0)); - config_id += "_"; - config_id += support::cpp11::to_string(_src->dimension(1)); - return config_id; -} - -std::set<std::string> ClTemplateActivation::get_headers_list() const -{ - return std::set<std::string>{"helpers.h", "tile_helpers.h", "activation_float_helpers.h"}; -} - -Window ClTemplateActivation::get_window() const -{ - ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized"); - const unsigned int n0 = adjust_vec_size(16 / _dst->element_size(), _dst->dimension(0)); - Window win = calculate_max_window(*_dst, Steps(n0)); - return win.collapse(win, Window::DimZ); -} - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h deleted file mode 100644 index 88ee370342..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEACTIVATION -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEACTIVATION - -#include "arm_compute/core/experimental/Types.h" -#include "arm_compute/function_info/ActivationLayerInfo.h" - -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClTemplateActivation final : public IGpuTemplateComponentWriter -{ -public: - using Attributes = ClComponentActivation::Attributes; - - /** Constructor - * - * @param[in] id Component id - * @param[in] tensors Tensor arguments to the components - * @param[in] attributes Component attributes - */ - ClTemplateActivation(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes); - - /** Destructor */ - ~ClTemplateActivation() override = default; - - /** Prevent instances of this class from being copy constructed */ - ClTemplateActivation(const ClTemplateActivation &activation) = delete; - - /** Prevent instances of this class from being copied */ - ClTemplateActivation &operator=(const ClTemplateActivation &activation) = delete; - - /** Allow instances of this class to be move constructed */ - ClTemplateActivation(ClTemplateActivation &&activation) = default; - - /** Allow instances of this class to be moved */ - ClTemplateActivation &operator=(ClTemplateActivation &&activation) = default; - - /** Generate kernel component name */ - std::string get_name() const override; - - /** Generate kernel component code template - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return std::string Component code - */ - std::string get_component_code(const ComponentGroup &comp_group) const override; - - /** Declare all variables used by the component in the @p vtable - * - * @param[out] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - */ - void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - - /** Generate the tag look-up table used to instantiate the component code. - * - * @param[in] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - * - * @return TagLUT Tag lookup table - */ - TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - - /** Generate the build options used in the component - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return CLBuildOptions Build options - */ - CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override; - - /** Generate the component config id string used for tuning */ - std::string get_config_id() const override; - - /** Generate the header list used in the component */ - std::set<std::string> get_headers_list() const override; - - /** Generate the execution window for the component */ - Window get_window() const override; - -private: - const ITensorInfo *_src; - const ITensorInfo *_dst; - Attributes _attributes; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEACTIVATION */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp deleted file mode 100644 index 0da3a73801..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp +++ /dev/null @@ -1,212 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "ClTemplateCast.h" - -#include "arm_compute/core/utils/helpers/AdjustVecSize.h" -#include "arm_compute/core/utils/StringUtils.h" - -#include "src/core/helpers/WindowHelpers.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -ClTemplateCast::ClTemplateCast(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes) - : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}, _attributes{attributes} -{ - _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); - _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0); - - ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst); -} - -std::string ClTemplateCast::get_name() const -{ - const size_t src_size = data_size_from_type(_src->data_type()); - const size_t dst_size = data_size_from_type(_dst->data_type()); - - return (src_size >= dst_size) ? "cast_down" : "cast_up"; -} - -std::string ClTemplateCast::get_component_code(const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - const std::string kernel_name = get_name(); - const auto is_root = (comp_group.get_root_component()->id() == this->id()); - - std::string code = R"_( -//------------------ START KERNEL {{meta_kernel_id}} CAST --------------------- -)_"; - - if (is_root) - { - code += R"_( -// IN_0(src) {{src}} -// OUT(dst, accum) {{dst}} - -TILE(uint, M0, 1, g_dst_indirect_y); -{ - {{src}}_offset_first_element_in_bytes += get_global_id(2) * {{src}}_stride_z; - - TILE({{DATA_TYPE_IN}}, M0, N0, {{tmp}}); - T_LOAD({{DATA_TYPE_IN}}, M0, N0, BUFFER, {{src}}, g_ind_0, g_ind_1, 1, {{src}}_stride_y, {{tmp}}); -)_"; - } - - code += R"_( - LOOP_UNROLLING(int, m0, 0, 1, M0, - { -)_"; - - if (kernel_name == "cast_down" && is_data_type_quantized(_src->data_type())) - { - code += R"_( - {{tmp}}[m0].v ^= (VEC_DATA_TYPE({{DATA_TYPE_IN}}, N0))0x80; -)_"; - } - - if (kernel_name == "cast_down" && - (is_data_type_float(_src->data_type()) || _attributes.convert_policy() == ConvertPolicy::SATURATE)) - { - code += R"_( - {{dst}}[m0].v = CONVERT_SAT({{tmp}}[m0].v, VEC_DATA_TYPE({{DATA_TYPE_OUT}}, N0)); -)_"; - } - else - { - code += R"_( - {{dst}}[m0].v = CONVERT({{tmp}}[m0].v, VEC_DATA_TYPE({{DATA_TYPE_OUT}}, N0)); -)_"; - } - - code += R"_( - }) -)_"; - - if (is_root) - { - code += R"_( - LOOP_UNROLLING(int, i, 0, 1, M0, - { - g_dst_indirect_y[i].v = (uint)min((int)(g_ind_1 + i), (int)({{arg_dst}}_w) - 1); - g_dst_indirect_y[i].v += (int)(g_ind_2 % {{arg_dst}}_h) * (int)({{arg_dst}}_w); - g_dst_indirect_y[i].v += (int)(g_ind_2 / {{arg_dst}}_h) * (int)({{arg_dst}}_w * {{arg_dst}}_h); - }) -} -)_"; - } - - code += R"_( -//------------------ END KERNEL {{meta_kernel_id}} CAST --------------------- -)_"; - - return code; -} - -void ClTemplateCast::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), - "src"); - - vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), - "dst"); -} - -TagLUT ClTemplateCast::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - const auto is_root = (comp_group.get_root_component()->id() == this->id()); - - TagLUT lut{}; - - // Arguments and global shared variables - lut["src"] = vtable.get_variable(_src); - lut["dst"] = vtable.get_variable(_dst); - lut["tmp"] = (is_root) ? lut["src"].value + "_in_data" : lut["src"]; - - const auto dst_argument = vtable.get_variable(comp_group.get_any_dst_tensor()); - lut["arg_dst"] = dst_argument.uniq_name; - - // Local build options - lut["meta_kernel_id"] = id(); - - lut["DATA_TYPE_IN"] = get_cl_type_from_data_type(_src->data_type()); - lut["DATA_TYPE_OUT"] = get_cl_type_from_data_type(_dst->data_type()); - - return lut; -} - -CLBuildOptions ClTemplateCast::get_build_options(const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - const auto root_window = comp_group.get_root_component()->template_writer()->get_window(); - const unsigned int n0 = root_window.x().step(); - const unsigned int m0 = root_window.y().step(); - - // Set build options - CLBuildOptions build_opts{}; - build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); - build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(_src->dimension(0) % n0)); - build_opts.add_option("-DM0=" + support::cpp11::to_string(m0)); - - return build_opts; -} - -std::string ClTemplateCast::get_config_id() const -{ - std::string config_id{}; - - config_id += "_"; - config_id += lower_string(string_from_data_type(_src->data_type())); - config_id += "_"; - config_id += lower_string(string_from_data_type(_dst->data_type())); - config_id += "_"; - config_id += support::cpp11::to_string(_src->dimension(0)); - config_id += "_"; - config_id += support::cpp11::to_string(_src->dimension(1)); - - return config_id; -} - -std::set<std::string> ClTemplateCast::get_headers_list() const -{ - return std::set<std::string>{"helpers.h", "tile_helpers.h"}; -} - -Window ClTemplateCast::get_window() const -{ - ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized"); - - const unsigned int n0 = adjust_vec_size(16 / _dst->element_size(), _dst->dimension(0)); - Window win = calculate_max_window(*_dst, Steps(n0)); - return win.collapse(win, Window::DimZ); -} - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.h deleted file mode 100644 index 3adca4edc9..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATECAST -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATECAST - -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClTemplateCast final : public IGpuTemplateComponentWriter -{ -public: - using Attributes = ClComponentCast::Attributes; - - /** Constructor - * - * @param[in] id Component id - * @param[in] tensors Tensor arguments to the components - * @param[in] attributes Component attributes - */ - ClTemplateCast(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes); - /** Prevent instances of this class from being copy constructed */ - ClTemplateCast(const ClTemplateCast &cast) = delete; - /** Prevent instances of this class from being copied */ - ClTemplateCast &operator=(const ClTemplateCast &cast) = delete; - /** Allow instances of this class to be move constructed */ - ClTemplateCast(ClTemplateCast &&cast) = default; - /** Allow instances of this class to be moved */ - ClTemplateCast &operator=(ClTemplateCast &&cast) = default; - /** Generate kernel component name */ - std::string get_name() const override; - /** Generate kernel component code template - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return std::string Component code - */ - std::string get_component_code(const ComponentGroup &comp_group) const override; - /** Declare all variables used by the component in the @p vtable - * - * @param[out] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - */ - void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the tag look-up table used to instantiate the component code. - * - * @param[in] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - * - * @return TagLUT Tag lookup table - */ - TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the build options used in the component - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return CLBuildOptions Build options - */ - CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override; - /** Generate the component config id string used for tuning */ - std::string get_config_id() const override; - /** Generate the header list used in the component */ - std::set<std::string> get_headers_list() const override; - /** Generate the execution window for the component */ - Window get_window() const override; - -private: - const ITensorInfo *_src; - const ITensorInfo *_dst; - Attributes _attributes; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute - -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATECAST */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp deleted file mode 100644 index 8380620ab2..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp +++ /dev/null @@ -1,364 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "ClTemplateDepthwiseConv2d.h" - -#include "src/core/helpers/WindowHelpers.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -ClTemplateDepthwiseConv2d::ClTemplateDepthwiseConv2d(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes, - const Settings &settings) - : IGpuTemplateComponentWriter{id, tensors}, - _src{}, - _weight{}, - _bias{}, - _dst{}, - _attributes{attributes}, - _settings{settings} -{ - _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); - _weight = this->tensors().get_const_tensor(TensorType::ACL_SRC_1); - if (this->tensors().get_const_tensor(TensorType::ACL_SRC_2)) - { - _bias = this->tensors().get_const_tensor(TensorType::ACL_SRC_2); - } - _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0); - ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _weight, _dst); -} - -std::string ClTemplateDepthwiseConv2d::get_name() const -{ - return "depthwise_conv2d"; -} - -std::string ClTemplateDepthwiseConv2d::get_component_code(const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - constexpr int height_idx = 2; // Data Layout is NHWC - - std::string code = R"_( -//------------------ START KERNEL {{meta_kernel_id}} --------------------- -// IN_0(src) {{src}} -// IN_1(wei) {{weight}} -)_"; - - if (_bias != nullptr && _bias->has_valid_id()) - { - code += R"_( -// IN_1(bia) {{bias}} -)_"; - } - - code += R"_( -// OUT(dst, accum) {{dst}} - -TILE(uint, M0, 1, g_dst_indirect_y); - -{ -#define _IWEI_WIDTH {{WEI_WIDTH}} -#define _IWEI_HEIGHT {{WEI_HEIGHT}} -#define _IDST_WIDTH {{arg_dst}}_w -#define _IDST_HEIGHT {{arg_dst}}_h -#define _IM0_A M0_A -#define _IN0_A N0_A -#define _IM0_B _IWEI_WIDTH -#define _IN0_B N0 -#define _IBOUNDARY_CHECK (!((_IWEI_WIDTH == 1 && _IWEI_HEIGHT == 1 && {{PAD_LEFT}} == 0 && {{PAD_TOP}} == 0 && M0 == 1))) -)_"; - - code += R"_( - const int yo = g_ind_2 % {{arg_dst}}_h; - const int bout = g_ind_2 / {{arg_dst}}_h; -)_"; - - code += R"_( - - int xi = g_ind_1 * {{STRIDE_X}}; - int yi = yo * {{STRIDE_Y}}; - xi -= {{PAD_LEFT}}; - yi -= {{PAD_TOP}}; - - LOOP_UNROLLING(int, i, 0, 1, M0, - { - {{dst}}[i].v = 0; - }) -)_"; - - if (_weight->dimension(height_idx) < 5) - { - code += R"_( - LOOP_UNROLLING(int, yk, 0, 1, _IWEI_HEIGHT, -)_"; - } - else - { - code += R"_( - for(int yk = 0; yk < _IWEI_HEIGHT; ++yk) -)_"; - } - - code += R"_( - { - TILE({{SRC_DATA_TYPE}}, _IM0_A, _IN0_A, a); - - LOOP_UNROLLING(int, i, 0, 1, _IM0_A, - { - a[i].v = 0; - }) - - T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, _IM0_A, _IN0_A, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi + yk * {{DILATION_Y}}, xi, (g_ind_0 / {{DEPTH_MULTIPLIER}}), {{src}}_w, {{src}}_h, {{DILATION_X}}, 1, _IBOUNDARY_CHECK, a); - - TILE({{WEI_DATA_TYPE}}, _IM0_B, _IN0_B, b); - - T_LOAD({{WEI_DATA_TYPE}}, _IM0_B, _IN0_B, {{WEI_TENSOR_TYPE}}, {{weight}}, g_ind_0, yk * _IM0_B, 1, {{weight}}_stride_y, b); - - LOOP_UNROLLING(int, m0, 0, 1, M0, - { - LOOP_UNROLLING(int, xk, 0, 1, _IWEI_WIDTH, - { -)_"; - - if (!_settings.is_fma_available()) - { - code += R"_( - {{dst}}[m0].v += a[xk + m0].v * b[xk].v; -)_"; - } - else - { - code += R"_( - {{dst}}[m0].v = fma(a[xk + m0].v, b[xk].v, {{dst}}[m0].v); -)_"; - } - - code += R"_( - }) - }) - } -)_"; - - if (_weight->dimension(height_idx) < 5) - { - code += R"_( - ) -)_"; - } - - if (_bias && _bias->has_valid_id()) - { - code += R"_( - TILE({{BIA_DATA_TYPE}}, 1, N0, {{bias}}); - - T_LOAD({{BIA_DATA_TYPE}}, 1, N0, BUFFER, {{bias}}, g_ind_0, 0, 0, 0, {{bias}}); - - T_ELTWISE_BROADCAST_ADD_X({{ACC_DATA_TYPE}}, M0, N0, {{dst}}, {{bias}}, {{dst}}); -)_"; - } - - code += R"_( - LOOP_UNROLLING(int, i, 0, 1, M0, - { - g_dst_indirect_y[i].v = (uint)min((int)(g_ind_1 + i), (int)({{arg_dst}}_w) - 1); - g_dst_indirect_y[i].v += (int)(g_ind_2 % {{arg_dst}}_h) * (int)({{arg_dst}}_w); - g_dst_indirect_y[i].v += (int)(g_ind_2 / {{arg_dst}}_h) * (int)({{arg_dst}}_w * {{arg_dst}}_h); - }) -} -//------------------ END KERNEL {{meta_kernel_id}} --------------------- -)_"; - - return code; -} - -void ClTemplateDepthwiseConv2d::declare_variables(GpuKernelVariableTable &vtable, - const ComponentGroup &comp_group) const -{ - const GpuKernelArgumentInfo::Type input_type = _settings.export_input_to_cl_image() - ? GpuKernelArgumentInfo::Type::Tensor_4D_t_Image - : GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer; - - vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(input_type), "src"); - - const GpuKernelArgumentInfo::Type weight_type = _settings.export_weights_to_cl_image() - ? GpuKernelArgumentInfo::Type::Tensor_4D_t_Image - : GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer; - - vtable.declare_variable(comp_group, _weight, GpuKernelArgumentInfo(weight_type), "weight"); - - if (_bias != nullptr && _bias->has_valid_id()) // optional bias - { - vtable.declare_variable(comp_group, _bias, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Vector), "bias"); - } - vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), - "dst"); -} - -TagLUT ClTemplateDepthwiseConv2d::get_tag_lut(const GpuKernelVariableTable &vtable, - const ComponentGroup &comp_group) const -{ - TagLUT lut{}; - - // Arguments and global shared variables - lut["src"] = vtable.get_variable(_src); - lut["weight"] = vtable.get_variable(_weight); - - if (_bias != nullptr && _bias->has_valid_id()) // optional bias - { - lut["bias"] = vtable.get_variable(_bias); - lut["BIA_DATA_TYPE"] = get_cl_type_from_data_type(_bias->data_type()); - } - lut["dst"] = vtable.get_variable(_dst); - - const auto dst_argument = vtable.get_variable(comp_group.get_any_dst_tensor()); - lut["arg_dst"] = dst_argument.uniq_name; - - // Local build options - lut["meta_kernel_id"] = id(); - lut["ACC_DATA_TYPE"] = _src->data_type(); - lut["SRC_DATA_TYPE"] = _src->data_type(); - lut["WEI_DATA_TYPE"] = _weight->data_type(); - - switch (vtable.get_variable(_src).kernel_argument_info.type) - { - case GpuKernelArgumentInfo::Type::Image_Export_To_ClImage2D: - case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D: - case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image: - lut["SRC_TENSOR_TYPE"] = "IMAGE"; - break; - default: - lut["SRC_TENSOR_TYPE"] = "BUFFER"; - break; - } - - switch (vtable.get_variable(_weight).kernel_argument_info.type) - { - case GpuKernelArgumentInfo::Type::Image_Export_To_ClImage2D: - case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D: - case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image: - lut["WEI_TENSOR_TYPE"] = "IMAGE"; - break; - default: - lut["WEI_TENSOR_TYPE"] = "BUFFER"; - break; - } - - // Data Layout is NHWC - constexpr int width_idx = 1; - constexpr int height_idx = 2; - - lut["WEI_WIDTH"] = _weight->dimension(width_idx); - lut["WEI_HEIGHT"] = _weight->dimension(height_idx); - - lut["STRIDE_X"] = _attributes.stride().x(); - lut["STRIDE_Y"] = _attributes.stride().y(); - - lut["PAD_LEFT"] = _attributes.pad().left; - lut["PAD_TOP"] = _attributes.pad().top; - - lut["DILATION_X"] = _attributes.dilation().x(); - lut["DILATION_Y"] = _attributes.dilation().y(); - - lut["DEPTH_MULTIPLIER"] = _attributes.depth_multiplier(); - - return lut; -} - -CLBuildOptions ClTemplateDepthwiseConv2d::get_build_options(const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - constexpr unsigned int width_idx = 1; // Data Layout is NHWC - - const unsigned int n0 = _settings.n0(); - const unsigned int m0 = _settings.m0(); - const unsigned int m0_a = _weight->dimension(width_idx) + m0 - 1; - const unsigned int n0_a = _attributes.depth_multiplier() > 1 ? 1 : n0; - const unsigned int partial_store_n0 = _dst->dimension(0) % n0; - - CLBuildOptions build_opts{}; - - if (_settings.fast_relaxed_math()) - { - build_opts.add_option("-cl-fast-relaxed-math"); - } - else - { - // -cl-fast-relaxed-math also sets -cl-finite-math-only and -cl-unsafe-math-optimizations - // to disable -cl-finite-math-only, we only include -cl-unsafe-math-optimizations - build_opts.add_option("-cl-unsafe-math-optimizations"); - } - - build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); - build_opts.add_option("-DM0=" + support::cpp11::to_string(m0)); - build_opts.add_option("-DN0_A=" + support::cpp11::to_string(n0_a)); - build_opts.add_option("-DM0_A=" + support::cpp11::to_string(m0_a)); - build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0)); - - return build_opts; -} - -std::string ClTemplateDepthwiseConv2d::get_config_id() const -{ - std::string config_id{}; - - config_id += support::cpp11::to_string(_src->dimension(0)); - config_id += "_"; - config_id += support::cpp11::to_string(_src->dimension(1)); - config_id += "_"; - config_id += support::cpp11::to_string(_src->dimension(2)); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(0)); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(1)); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(2)); - config_id += "_"; - config_id += string_from_data_type(_src->data_type()); - - return config_id; -} - -std::set<std::string> ClTemplateDepthwiseConv2d::get_headers_list() const -{ - return std::set<std::string>{"helpers.h", "tile_helpers.h"}; -} - -Window ClTemplateDepthwiseConv2d::get_window() const -{ - ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized"); - - Window win = calculate_max_window(*_dst, Steps(_settings.n0(), _settings.m0())); - return win.collapse(win, Window::DimZ); -} - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h deleted file mode 100644 index 5d04c687c3..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDEPTHWISECONV2D -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDEPTHWISECONV2D - -#include "arm_compute/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.h" - -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClTemplateDepthwiseConv2d final : public IGpuTemplateComponentWriter -{ -public: - using Attributes = ClComponentDepthwiseConv2d::Attributes; - using Settings = ClComponentDepthwiseConv2d::Settings; - /** Constructor - * - * Similar to @ref ClComponentDepthwiseConv2d::validate() - * - * @param[in] id Component id - * @param[in] tensors Tensor arguments to the components - * @param[in] attributes Component attributes - * @param[in] settings Component settings - */ - ClTemplateDepthwiseConv2d(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes, - const Settings &settings); - /** Prevent instances of this class from being copy constructed */ - ClTemplateDepthwiseConv2d(const ClTemplateDepthwiseConv2d &depthwise_conv2d) = delete; - /** Prevent instances of this class from being copied */ - ClTemplateDepthwiseConv2d &operator=(const ClTemplateDepthwiseConv2d &depthwise_conv2d) = delete; - /** Allow instances of this class to be move constructed */ - ClTemplateDepthwiseConv2d(ClTemplateDepthwiseConv2d &&depthwise_conv2d) = default; - /** Allow instances of this class to be moved */ - ClTemplateDepthwiseConv2d &operator=(ClTemplateDepthwiseConv2d &&depthwise_conv2d) = default; - /** Generate kernel component name */ - std::string get_name() const override; - /** Generate kernel component code template - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return std::string Component code - */ - std::string get_component_code(const ComponentGroup &comp_group) const override; - /** Declare all variables used by the component in the @p vtable - * - * @param[out] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - */ - void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the tag look-up table used to instantiate the component code. - * - * @param[in] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - * - * @return TagLUT Tag lookup table - */ - TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the build options used in the component - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return CLBuildOptions Build options - */ - CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override; - /** Generate the component config id string used for tuning */ - std::string get_config_id() const override; - /** Generate the header list used in the component */ - std::set<std::string> get_headers_list() const override; - /** Generate the execution window for the component */ - Window get_window() const override; - -private: - const ITensorInfo *_src; - const ITensorInfo *_weight; - const ITensorInfo *_bias; - const ITensorInfo *_dst; - Attributes _attributes; - Settings _settings; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDEPTHWISECONV2D */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp deleted file mode 100644 index f6a7a58d1d..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp +++ /dev/null @@ -1,393 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "ClTemplateDirectConv2d.h" - -#include "arm_compute/core/utils/helpers/AdjustVecSize.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/core/utils/StringUtils.h" - -#include "src/core/helpers/WindowHelpers.h" -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" -#include "support/StringSupport.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -ClTemplateDirectConv2d::ClTemplateDirectConv2d(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes, - const Settings &settings) - : IGpuTemplateComponentWriter{id, tensors}, - _src{}, - _weight{}, - _bias{}, - _dst{}, - _attributes{attributes}, - _settings{settings} -{ - _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); - _weight = this->tensors().get_const_tensor(TensorType::ACL_SRC_1); - if (this->tensors().get_const_tensor(TensorType::ACL_SRC_2)) - { - _bias = this->tensors().get_const_tensor(TensorType::ACL_SRC_2); - } - _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0); - ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _weight, _dst); -} - -std::string ClTemplateDirectConv2d::get_name() const -{ - return "direct_conv2d"; -} - -std::string ClTemplateDirectConv2d::get_component_code(const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - const auto channel_idx = get_data_layout_dimension_index(_src->data_layout(), DataLayoutDimension::CHANNEL); - const auto k0 = adjust_vec_size(_settings.direct_conv_descriptor().k0, _src->dimension(channel_idx)); - const bool leftover_loop = (_src->dimension(channel_idx) % k0) != 0; - - std::string code = R"_( -//------------------ START KERNEL {{meta_kernel_id}} --------------------- -// IN_0(src) {{src}} -// IN_1(wei) {{weight}} -)_"; - if (_bias && _bias->has_valid_id()) - { - code += R"_( -// IN_1(bia) {{bias}} -)_"; - } - code += R"_( -// OUT(dst, accum) {{dst}} - -TILE(uint, M0, 1, g_dst_indirect_y); - -{ -#define _IWEI_WIDTH {{WEI_WIDTH}} -#define _IWEI_HEIGHT {{WEI_HEIGHT}} -#define _ISRC_WIDTH {{SRC_WIDTH}} -#define _ISRC_HEIGHT {{SRC_HEIGHT}} -#define _ISRC_CHANNELS {{SRC_CHANNELS}} -#define _IDST_WIDTH {{DST_WIDTH}} -#define _IDST_HEIGHT {{DST_HEIGHT}} -#define _IDST_CHANNELS {{DST_CHANNELS}} -#define _IY_MULTIPLIER (_IWEI_WIDTH * _IWEI_HEIGHT) - - TILE(int, M0, 1, xi); - TILE(int, M0, 1, yi); - - // Convert the linear index to coordinate - LOOP_UNROLLING(int, i, 0, 1, M0, - { - xi[0].s[i] = ((g_ind_1 + i) % _IDST_WIDTH) * {{STRIDE_X}}; - yi[0].s[i] = ((g_ind_1 + i) / _IDST_WIDTH) * {{STRIDE_Y}}; - xi[0].s[i] -= {{PAD_LEFT}}; - yi[0].s[i] -= {{PAD_TOP}}; - }) - - LOOP_UNROLLING(int, i, 0, 1, M0, - { - {{dst}}[i].v = 0; - }) - - for(int i = 0; i < (_IWEI_WIDTH * _IWEI_HEIGHT); ++i) - { - int xk = i % _IWEI_WIDTH; - int yk = i / _IWEI_WIDTH; - - TILE(int, 1, M0, my); - - LOOP_UNROLLING(int, i, 0, 1, M0, - { - int x_s = xi[0].s[i] + xk; - int y_s = yi[0].s[i] + yk; - my[0].s[i] = x_s + y_s *_ISRC_WIDTH; - my[0].s[i] = my[0].s[i] + g_ind_2 * (int)(_ISRC_WIDTH * _ISRC_HEIGHT); - my[0].s[i] = select(-1, my[0].s[i], x_s >= 0); - my[0].s[i] = select(-1, my[0].s[i], x_s < _ISRC_WIDTH); - my[0].s[i] = select(-1, my[0].s[i], y_s >= 0); - my[0].s[i] = select(-1, my[0].s[i], y_s < _ISRC_HEIGHT); - }) - - int ck = 0; - for(; ck <= (_ISRC_CHANNELS - K0); ck += K0) - { - TILE({{SRC_DATA_TYPE}}, M0, K0, a); - TILE({{WEI_DATA_TYPE}}, N0, K0, b); - - LOOP_UNROLLING(int, i, 0, 1, M0, - { - a[i].v = {{ZERO_VALUE}}; - }) - - LOOP_UNROLLING(int, i, 0, 1, N0, - { - b[i].v = {{ZERO_VALUE}}; - }) - - T_LOAD2D_INDIRECT({{SRC_DATA_TYPE}}, M0, K0, {{SRC_TENSOR_TYPE}}, {{src}}, ck, {{src}}_stride_y, my, a); - - T_LOAD({{WEI_DATA_TYPE}}, N0, K0, {{WEI_TENSOR_TYPE}}, {{weight}}, ck, g_ind_0 * _IY_MULTIPLIER + i, _IY_MULTIPLIER, {{weight}}_stride_y, b); - - T_MMUL({{SRC_DATA_TYPE}}, {{WEI_DATA_TYPE}}, {{ACC_DATA_TYPE}}, M0, N0, K0, NT, T, a, b, {{dst}}); - } -)_"; - - if (leftover_loop) - { - code += R"_( - for(; ck < _ISRC_CHANNELS; ++ck) - { - TILE({{SRC_DATA_TYPE}}, M0, 1, a); - TILE({{WEI_DATA_TYPE}}, N0, 1, b); - - LOOP_UNROLLING(int, i, 0, 1, M0, - { - a[i].v = {{ZERO_VALUE}}; - }) - - LOOP_UNROLLING(int, i, 0, 1, N0, - { - b[i].v = {{ZERO_VALUE}}; - }) - - T_LOAD2D_INDIRECT({{SRC_DATA_TYPE}}, M0, 1, {{SRC_TENSOR_TYPE}}, {{src}}, ck, {{src}}_stride_y, my, a); - - T_LOAD({{WEI_DATA_TYPE}}, N0, 1, BUFFER, {{weight}}, ck, g_ind_0 * _IY_MULTIPLIER + i, _IY_MULTIPLIER, {{weight}}_stride_y, b); - - T_MMUL({{SRC_DATA_TYPE}}, {{WEI_DATA_TYPE}}, {{ACC_DATA_TYPE}}, M0, N0, 1, NT, T, a, b, {{dst}}); - } - )_"; - } - - code += R"_( -#undef _I_WEI_WIDTH -#undef _I_WEI_HEIGHT -#undef _ISRC_WIDTH -#undef _ISRC_HEIGHT -#undef _ISRC_CHANNELS -#undef _IDST_WIDTH -#undef _IDST_HEIGHT -#undef _IDST_CHANNELS -#undef _IY_MULTIPLIER - - } -)_"; - - if (_bias && _bias->has_valid_id()) - { - code += R"_( - TILE({{BIA_DATA_TYPE}}, 1, N0, bias0); - - T_LOAD({{BIA_DATA_TYPE}}, 1, N0, BUFFER, {{bias}}, g_ind_0, 0, 1, 0, bias0); - - T_ELTWISE_BROADCAST_ADD_X({{ACC_DATA_TYPE}}, M0, N0, {{dst}}, bias0, {{dst}}); - )_"; - } - - code += R"_( - LOOP_UNROLLING(int, i, 0, 1, M0, - { - g_dst_indirect_y[i].v = (uint)min(g_ind_1 + i, (int)({{DST_WIDTH}} * {{DST_HEIGHT}}) - 1); - g_dst_indirect_y[i].v += g_ind_2 * (int)({{DST_WIDTH}} * {{DST_HEIGHT}}); - }) -} -//------------------ END KERNEL {{meta_kernel_id}} --------------------- -)_"; - return code; -} - -void ClTemplateDirectConv2d::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), - "src"); - - const GpuKernelArgumentInfo::Type weight_type = _settings.export_to_cl_image() - ? GpuKernelArgumentInfo::Type::Tensor_4D_t_Image - : GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer; - vtable.declare_variable(comp_group, _weight, GpuKernelArgumentInfo(weight_type), "weight"); - - if (_bias && _bias->has_valid_id()) // optional bias - { - vtable.declare_variable(comp_group, _bias, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Vector), "bias"); - } - vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(common_tensor_type), "dst"); -} - -TagLUT ClTemplateDirectConv2d::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - TagLUT lut{}; - // Arguments and global shared variables - lut["src"] = vtable.get_variable(_src); - lut["weight"] = vtable.get_variable(_weight); - - if (_bias && _bias->has_valid_id()) // optional bias - { - lut["bias"] = vtable.get_variable(_bias); - lut["BIA_DATA_TYPE"] = get_cl_type_from_data_type(_bias->data_type()); - } - lut["dst"] = vtable.get_variable(_dst); - - const auto dst_argument = vtable.get_variable(comp_group.get_any_dst_tensor()); - lut["arg_dst"] = dst_argument.uniq_name; - - // Local build options - lut["meta_kernel_id"] = id(); - lut["ACC_DATA_TYPE"] = _src->data_type(); - lut["SRC_DATA_TYPE"] = _src->data_type(); - lut["WEI_DATA_TYPE"] = _weight->data_type(); - - lut["SRC_TENSOR_TYPE"] = "BUFFER"; - switch (vtable.get_variable(_weight).kernel_argument_info.type) - { - case GpuKernelArgumentInfo::Type::Image_Export_To_ClImage2D: - case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D: - case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image: - { - lut["WEI_TENSOR_TYPE"] = "IMAGE"; - break; - } - default: - { - lut["WEI_TENSOR_TYPE"] = "BUFFER"; - break; - } - } - const auto width_idx = 1; - const auto height_idx = 2; - const auto channel_idx = 0; - - lut["SRC_WIDTH"] = _src->dimension(width_idx); - lut["SRC_HEIGHT"] = _src->dimension(height_idx); - lut["SRC_CHANNELS"] = _src->dimension(channel_idx); - - lut["WEI_WIDTH"] = _weight->dimension(width_idx); - lut["WEI_HEIGHT"] = _weight->dimension(height_idx); - - lut["DST_WIDTH"] = _dst->dimension(width_idx); - lut["DST_HEIGHT"] = _dst->dimension(height_idx); - lut["DST_CHANNELS"] = _dst->dimension(channel_idx); - - lut["STRIDE_X"] = _attributes.stride().x(); - lut["STRIDE_Y"] = _attributes.stride().y(); - - lut["PAD_LEFT"] = _attributes.pad().left; - lut["PAD_TOP"] = _attributes.pad().top; - - lut["ZERO_VALUE"] = 0; - - return lut; -} - -CLBuildOptions ClTemplateDirectConv2d::get_build_options(const ComponentGroup &comp_group) const -{ - const unsigned int channel_idx = get_data_layout_dimension_index(_src->data_layout(), DataLayoutDimension::CHANNEL); - - const auto root_window = comp_group.get_root_component()->template_writer()->get_window(); - const unsigned int n0 = root_window.x().step(); - const unsigned int m0 = root_window.y().step(); - const unsigned int k0 = adjust_vec_size(_settings.direct_conv_descriptor().k0, _src->dimension(channel_idx)); - const unsigned int partial_store_n0 = _dst->dimension(0) % n0; - - CLBuildOptions build_opts{}; - if (_settings.fast_relaxed_math()) - { - build_opts.add_option("-cl-fast-relaxed-math"); - } - else - { - // -cl-fast-relaxed-math also sets -cl-finite-math-only and -cl-unsafe-math-optimizations - // to disable -cl-finite-math-only, we only include -cl-unsafe-math-optimizations - build_opts.add_option("-cl-unsafe-math-optimizations"); - } - - build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); - build_opts.add_option("-DM0=" + support::cpp11::to_string(m0)); - build_opts.add_option("-DK0=" + support::cpp11::to_string(k0)); - build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0)); - - return build_opts; -} - -std::string ClTemplateDirectConv2d::get_config_id() const -{ - const DataType data_type = _src->data_type(); - const DataLayout data_layout = _src->data_layout(); - - const unsigned int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const unsigned int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - - const unsigned int kernel_size = _weight->dimension(width_idx); - - std::string config_id{}; - config_id += lower_string(string_from_data_type(data_type)); - config_id += "_"; - config_id += support::cpp11::to_string(kernel_size); - config_id += "_"; - config_id += support::cpp11::to_string(_attributes.stride().x()); - config_id += "_"; - config_id += support::cpp11::to_string(_attributes.stride().y()); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(width_idx)); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(height_idx)); - config_id += "_"; - config_id += lower_string(string_from_data_layout(data_layout)); - return config_id; -} - -std::set<std::string> ClTemplateDirectConv2d::get_headers_list() const -{ - return std::set<std::string>{"helpers.h", "tile_helpers.h"}; -} - -Window ClTemplateDirectConv2d::get_window() const -{ - ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized"); - - const auto output_shape = _dst->tensor_shape(); - const auto desc = _settings.direct_conv_descriptor(); - - const unsigned int n0 = adjust_vec_size(desc.n0, output_shape[0]); - const unsigned int m0 = adjust_vec_size(desc.m0, output_shape[1] * output_shape[2]); - - // Create and configure kernel window - Window win = calculate_max_window(output_shape, Steps(n0, m0)); - - const size_t dim_y_collapsed = ceil_to_multiple(output_shape[1] * output_shape[2], m0); - win.set(Window::DimY, Window::Dimension(0, dim_y_collapsed, m0)); - win.set(Window::DimZ, Window::Dimension(0, output_shape.total_size_upper(3), 1)); - - return win; -} - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h deleted file mode 100644 index 03c8cd2f15..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDIRECTCONV2D -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDIRECTCONV2D - -#include "arm_compute/core/experimental/Types.h" -#include "arm_compute/dynamic_fusion/sketch/attributes/Conv2dAttributes.h" - -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClTemplateDirectConv2d final : public IGpuTemplateComponentWriter -{ -public: - using Attributes = ClComponentDirectConv2d::Attributes; - using Settings = ClComponentDirectConv2d::Settings; - /** Constructor - * - * Similar to @ref ClComponentDirectConv2d::validate() - * - * @param[in] id Component id - * @param[in] tensors Tensor arguments to the components - * @param[in] attributes Component attributes - * @param[in] settings Component settings - */ - ClTemplateDirectConv2d(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes, - const Settings &settings); - /** Destructor */ - ~ClTemplateDirectConv2d() override = default; - /** Prevent instances of this class from being copy constructed */ - ClTemplateDirectConv2d(const ClTemplateDirectConv2d &direct_conv2d) = delete; - /** Prevent instances of this class from being copied */ - ClTemplateDirectConv2d &operator=(const ClTemplateDirectConv2d &direct_conv2d) = delete; - /** Allow instances of this class to be move constructed */ - ClTemplateDirectConv2d(ClTemplateDirectConv2d &&direct_conv2d) = default; - /** Allow instances of this class to be moved */ - ClTemplateDirectConv2d &operator=(ClTemplateDirectConv2d &&direct_conv2d) = default; - /** Generate kernel component name */ - std::string get_name() const override; - /** Generate kernel component code template - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return std::string Component code - */ - std::string get_component_code(const ComponentGroup &comp_group) const override; - /** Declare all variables used by the component in the @p vtable - * - * @param[out] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - */ - void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the tag look-up table used to instantiate the component code. - * - * @param[in] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - * - * @return TagLUT Tag lookup table - */ - TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the build options used in the component - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return CLBuildOptions Build options - */ - CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override; - /** Generate the component config id string used for tuning */ - std::string get_config_id() const override; - /** Generate the header list used in the component */ - std::set<std::string> get_headers_list() const override; - /** Generate the execution window for the component */ - Window get_window() const override; - -private: - const ITensorInfo *_src; - const ITensorInfo *_weight; - const ITensorInfo *_bias; - const ITensorInfo *_dst; - Attributes _attributes; - Settings _settings; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDIRECTCONV2D */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp deleted file mode 100644 index 78bff3c3f3..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp +++ /dev/null @@ -1,274 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "ClTemplateElementwiseBinary.h" - -#include "arm_compute/core/utils/helpers/AdjustVecSize.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/core/utils/StringUtils.h" - -#include "src/core/helpers/WindowHelpers.h" -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" -#include "support/StringSupport.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -constexpr unsigned int vector_size_byte_opencl = 16; - -ClTemplateElementwiseBinary::ClTemplateElementwiseBinary(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes) - : IGpuTemplateComponentWriter{id, tensors}, _lhs{}, _rhs{}, _dst{}, _attributes{attributes} -{ - _lhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); - _rhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_1); - _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0); - ARM_COMPUTE_ERROR_ON_NULLPTR(_lhs, _rhs, _dst); -} - -std::string ClTemplateElementwiseBinary::get_name() const -{ - return "elementwise_binary"; -} - -std::string ClTemplateElementwiseBinary::get_component_code(const ComponentGroup &comp_group) const -{ - std::string code; - const bool is_root = (comp_group.get_root_component()->id() == this->id()); - const bool is_lhs_input = comp_group.is_input_tensor(_lhs); - const bool is_rhs_input = comp_group.is_input_tensor(_rhs); - - code = - R"_( - //------------------ START KERNEL {{meta_kernel_id}} {{ELTWISE_OP}} --------------------- -)_"; - - if (is_root) - { - code += - R"_( - TILE(uint, M0, 1, g_dst_indirect_y); -)_"; - } - - if (is_lhs_input) - { - code += - R"_( - TILE({{DATA_TYPE}}, {{lhs_m0}}, N0, {{lhs}}); -)_"; - } - - if (is_rhs_input) - { - code += - R"_( - TILE({{DATA_TYPE}}, {{rhs_m0}}, N0, {{rhs}}); -)_"; - } - - code += - R"_( - { -)_"; - - if (is_lhs_input) - { - code += - R"_( - {{lhs}}_offset_first_element_in_bytes += g_ind_2 * {{lhs}}_stride_w; - T_LOAD({{DATA_TYPE}}, {{lhs_m0}}, {{lhs_n0}}, BUFFER, {{lhs}}, {{lhs_start_ind_0}}, {{lhs_start_ind_1}}, 1, {{lhs}}_stride_y, {{lhs}}); -)_"; - } - - if (is_rhs_input) - { - code += - R"_( - {{rhs}}_offset_first_element_in_bytes += g_ind_2 * {{rhs}}_stride_w; - T_LOAD({{DATA_TYPE}}, {{rhs_m0}}, {{rhs_n0}}, BUFFER, {{rhs}}, {{rhs_start_ind_0}}, {{rhs_start_ind_1}}, 1, {{rhs}}_stride_y, {{rhs}}); -)_"; - } - - code += - R"_( - T_ELTWISE_{{BROADCAST_OP}}{{ELTWISE_OP}}({{DATA_TYPE}}, M0, N0, {{lhs}}, {{rhs}}, {{dst}}); -)_"; - - if (is_root) - { - // Calculate the destination indirect Y - code += - R"_( - LOOP_UNROLLING(int, i, 0, 1, M0, - { - g_dst_indirect_y[i].v = (uint)min(g_ind_1 + i, (int)({{arg_dst}}_w * {{arg_dst}}_h) - 1); - g_dst_indirect_y[i].v += g_ind_2 * (int)({{arg_dst}}_w * {{arg_dst}}_h); - }) -)_"; - } - - code += - R"_( - } - //------------------ END KERNEL {{meta_kernel_id}} {{ELTWISE_OP}} --------------------- -)_"; - - return code; -} - -void ClTemplateElementwiseBinary::declare_variables(GpuKernelVariableTable &vtable, - const ComponentGroup &comp_group) const -{ - vtable.declare_variable(comp_group, _lhs, GpuKernelArgumentInfo(common_tensor_type), "lhs"); - - vtable.declare_variable(comp_group, _rhs, GpuKernelArgumentInfo(common_tensor_type), "rhs"); - - vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(common_tensor_type), "dst"); -} - -TagLUT ClTemplateElementwiseBinary::get_tag_lut(const GpuKernelVariableTable &vtable, - const ComponentGroup &comp_group) const -{ - TagLUT lut{}; - - // Local build options - lut["meta_kernel_id"] = id(); - lut["DATA_TYPE"] = get_cl_type_from_data_type(_lhs->data_type()); - // Arguments and global shared variables - - lut["lhs"] = vtable.get_variable(_lhs); - lut["rhs"] = vtable.get_variable(_rhs); - lut["dst"] = vtable.get_variable(_dst); - lut["arg_dst"] = vtable.get_variable(comp_group.get_any_dst_tensor()); - - switch (_attributes.operation()) - { - case Attributes::ElementwiseOp::Add: - lut["ELTWISE_OP"] = "ADD"; - break; - case Attributes::ElementwiseOp::Sub: - lut["ELTWISE_OP"] = "SUB"; - break; - case Attributes::ElementwiseOp::Mul: - lut["ELTWISE_OP"] = "MUL"; - break; - default: - ARM_COMPUTE_ERROR("Arithmetic Operation not supported"); - } - - ARM_COMPUTE_ERROR_ON(comp_group.is_intermediate_tensor(_lhs) && - detail::have_different_dimensions(_lhs->tensor_shape(), _dst->tensor_shape(), 0)); - ARM_COMPUTE_ERROR_ON(comp_group.is_intermediate_tensor(_rhs) && - detail::have_different_dimensions(_rhs->tensor_shape(), _dst->tensor_shape(), 0)); - - // Set broadcast parameters - // PRE: All tensors are broadcast-compatible - const auto &lhs_dims = _lhs->tensor_shape(); - const auto &rhs_dims = _rhs->tensor_shape(); - const auto &dst_dims = _dst->tensor_shape(); - - const auto lhs_broadcast_x = dst_dims[0] != 1 && lhs_dims[0] == 1; - const auto rhs_broadcast_x = dst_dims[0] != 1 && rhs_dims[0] == 1; - const auto lhs_broadcast_y = dst_dims[1] != 1 && lhs_dims[1] == 1; - const auto rhs_broadcast_y = dst_dims[1] != 1 && rhs_dims[1] == 1; - const auto lhs_broadcast_z = dst_dims[2] != 1 && lhs_dims[2] == 1; - const auto rhs_broadcast_z = dst_dims[2] != 1 && rhs_dims[2] == 1; - - const auto lhs_broadcast_yz = lhs_broadcast_y && lhs_broadcast_z; - const auto rhs_broadcast_yz = rhs_broadcast_y && rhs_broadcast_z; - - lut["lhs_n0"] = (lhs_broadcast_x) ? "1" : "N0"; - lut["lhs_start_ind_0"] = (lhs_broadcast_x) ? "0" : "g_ind_0"; - lut["rhs_n0"] = (rhs_broadcast_x) ? "1" : "N0"; - lut["rhs_start_ind_0"] = (rhs_broadcast_x) ? "0" : "g_ind_0"; - - lut["lhs_m0"] = (lhs_broadcast_yz) ? "1" : "M0"; - lut["lhs_start_ind_1"] = (lhs_broadcast_yz) ? "0" : "g_ind_1"; - lut["rhs_m0"] = (rhs_broadcast_yz) ? "1" : "M0"; - lut["rhs_start_ind_1"] = (rhs_broadcast_yz) ? "0" : "g_ind_1"; - - lut["BROADCAST_OP"] = (lhs_broadcast_yz) ? "BROADCAST_LHS_X_" : (rhs_broadcast_yz) ? "BROADCAST_RHS_X_" : ""; - - return lut; -} - -CLBuildOptions ClTemplateElementwiseBinary::get_build_options(const ComponentGroup &comp_group) const -{ - CLBuildOptions build_opts{}; - /// NOTE: For now tile sizes (n0, m0) are set by the execution window. This may change in the future - const auto root_window = comp_group.get_root_component()->template_writer()->get_window(); - const unsigned int n0 = root_window.x().step(); - const unsigned int m0 = root_window.y().step(); - const unsigned int partial_store_n0 = _dst->dimension(0) % n0; - - build_opts.add_option("-DM0=" + support::cpp11::to_string(m0)); - build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); - build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(_lhs->data_type())); - build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0)); - - return build_opts; -} - -std::string ClTemplateElementwiseBinary::get_config_id() const -{ - std::string config_id{}; - config_id += lower_string(string_from_data_type(_dst->data_type())); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(0)); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(1)); - config_id += "_"; - config_id += lower_string(string_from_data_layout(_dst->data_layout())); - - return config_id; -} - -std::set<std::string> ClTemplateElementwiseBinary::get_headers_list() const -{ - return std::set<std::string>{"helpers.h", "tile_helpers.h"}; -} - -Window ClTemplateElementwiseBinary::get_window() const -{ - ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized"); - - TensorShape output_shape = _dst->tensor_shape(); - // Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) and upper dimensions unchanged - // This is in line with the collapsing convention used by operators like Conv2d - output_shape.collapse(2U, 1U); - const unsigned int num_elems_processed_per_iteration = - adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0)); - Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration)); - - return win; -} - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h deleted file mode 100644 index 991c0eca44..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEELEMENTWISEBINARY -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEELEMENTWISEBINARY - -#include "arm_compute/core/experimental/Types.h" - -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClTemplateElementwiseBinary final : public IGpuTemplateComponentWriter -{ -public: - using Attributes = ClComponentElementwiseBinary::Attributes; - - /** Constructor - * - * Similar to @ref ClComponentElementwiseBinary::validate() - * - * @param[in] id Component id - * @param[in] tensors Tensor arguments to the components - * @param[in] attributes Component attributes - */ - ClTemplateElementwiseBinary(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes); - /** Prevent instances of this class from being copy constructed */ - ClTemplateElementwiseBinary(const ClTemplateElementwiseBinary &elementwise) = delete; - /** Prevent instances of this class from being copied */ - ClTemplateElementwiseBinary &operator=(const ClTemplateElementwiseBinary &elementwise) = delete; - /** Allow instances of this class to be move constructed */ - ClTemplateElementwiseBinary(ClTemplateElementwiseBinary &&elementwise) = default; - /** Allow instances of this class to be moved */ - ClTemplateElementwiseBinary &operator=(ClTemplateElementwiseBinary &&elementwise) = default; - - /** Generate kernel component name */ - std::string get_name() const override; - - /** Generate kernel component code template - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return std::string Component code - */ - std::string get_component_code(const ComponentGroup &comp_group) const override; - - /** Declare all variables used by the component in the @p vtable - * - * @param[out] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - */ - void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - - /** Generate the tag look-up table used to instantiate the component code. - * - * @param[in] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - * - * @return TagLUT Tag lookup table - */ - TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - - /** Generate the build options used in the component - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return CLBuildOptions Build options - */ - CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override; - - /** Generate the component config id string used for tuning */ - std::string get_config_id() const override; - - /** Generate the header list used in the component */ - std::set<std::string> get_headers_list() const override; - - /** Generate the execution window for the component */ - Window get_window() const override; - -private: - const ITensorInfo *_lhs; - const ITensorInfo *_rhs; - const ITensorInfo *_dst; - Attributes _attributes; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEELEMENTWISEBINARY */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp deleted file mode 100644 index 522c33a022..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp +++ /dev/null @@ -1,267 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h" - -#include "arm_compute/core/utils/helpers/AdjustVecSize.h" -#include "arm_compute/core/utils/StringUtils.h" - -#include "src/core/helpers/WindowHelpers.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" -#include "support/StringSupport.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -namespace -{ -constexpr unsigned int serial_vector_size = 8; -} // namespace -ClTemplateLogits1DMaxShiftExpSum::ClTemplateLogits1DMaxShiftExpSum(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes) - : IGpuTemplateComponentWriter{id, tensors}, _src{}, _sum{}, _dst{}, _attributes{attributes} -{ - _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); - _sum = this->tensors().get_const_tensor(TensorType::ACL_DST_0); - _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_1); - ARM_COMPUTE_ERROR_ON_NULLPTR(_src); - ARM_COMPUTE_ERROR_ON_NULLPTR(_sum); - ARM_COMPUTE_ERROR_ON_NULLPTR(_dst); -} - -std::string ClTemplateLogits1DMaxShiftExpSum::get_name() const -{ - return "logits_1d_max_shift_exp_sum"; -} - -std::string ClTemplateLogits1DMaxShiftExpSum::get_component_code(const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - std::string code = R"_( -//------------------ START KERNEL {{meta_kernel_id}} --------------------- -#define VEC_TYPE VEC_DATA_TYPE({{DATA_TYPE}}, N0) -#define SELECT_TYPE SELECT_VEC_DATA_TYPE({{DATA_TYPE}}, N0) -{ - __global uchar *src_addr = {{src}}_ptr + {{src}}_offset_first_element_in_bytes + g_ind_1 * {{src}}_stride_y + g_ind_2 * {{src}}_stride_z; - __global uchar *dst_addr = {{dst}}_ptr + {{dst}}_offset_first_element_in_bytes + g_ind_1 * {{dst}}_stride_y + g_ind_2 * {{dst}}_stride_z; - Image sum = CONVERT_TENSOR3D_TO_IMAGE_STRUCT({{sum}}); - VEC_TYPE max_val_vec = (VEC_TYPE)({{MINVAL}}); -)_"; - - const bool beta_defined = (_attributes.beta() != 1.f); - - if (beta_defined) - { - code += R"_( - VEC_TYPE beta = (VEC_TYPE){{BETA}}; -)_"; - } - - constexpr unsigned int _serial_vector_size = 8; - const unsigned int reduction_dim_size = _src->dimension(0); - const unsigned int vector_size = adjust_vec_size(_serial_vector_size, reduction_dim_size); - const bool non_multiple_of_n0 = ((reduction_dim_size % vector_size) != 0); - - if (non_multiple_of_n0) - { - code += R"_( - VEC_TYPE data = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)src_addr); - SELECT_TYPE widx = (SELECT_TYPE)PARTIAL_N0 > VEC_OFFS(SELECT_DATA_TYPE({{DATA_TYPE}}), N0); - max_val_vec = max(max_val_vec, select((VEC_TYPE)({{MINVAL}}), data, widx)); -)_"; - } - - code += R"_( - for(uint i = PARTIAL_N0; i < {{SRC_WIDTH}}; i += N0) - { - VEC_TYPE data = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(src_addr + i * sizeof({{DATA_TYPE}}))); - max_val_vec = max(data, max_val_vec); - } - - {{DATA_TYPE}} max_val = MAX_REDUCE(max_val_vec, N0); - VEC_TYPE sum1D = 0; -)_"; - - if (non_multiple_of_n0) - { - code += R"_( - data -= max_val; -)_"; - if (beta_defined) - { - code += R"_( - data *= beta; -)_"; - } - - if (_attributes.is_log_softmax()) - { - code += R"_( - VSTORE_PARTIAL(N0, PARTIAL_N0) - (data, 0, (__global {{DATA_TYPE}} *)dst_addr); - data = exp(data); - data = select(0, data, widx); -)_"; - } - else - { - code += R"_( - data = exp(data); - data = select(0, data, widx); - VSTORE_PARTIAL(N0, PARTIAL_N0) - (data, 0, (__global {{DATA_TYPE}} *)dst_addr); -)_"; - } - - code += R"_( - sum1D += data; -)_"; - } - code += R"_( - for(uint i = PARTIAL_N0; i < {{SRC_WIDTH}}; i += N0) - { - VEC_TYPE data = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(src_addr + i * sizeof({{DATA_TYPE}}))); - data -= max_val; -)_"; - - if (beta_defined) - { - code += R"_( - data *= beta; -)_"; - } - - if (_attributes.is_log_softmax()) - { - code += R"_( - VSTORE(N0) - (data, 0, (__global {{DATA_TYPE}} *)(dst_addr + i * sizeof({{DATA_TYPE}}))); - data = exp(data); -)_"; - } - else - { - code += R"_( - data = exp(data); - VSTORE(N0) - (data, 0, (__global {{DATA_TYPE}} *)(dst_addr + i * sizeof({{DATA_TYPE}}))); -)_"; - } - - code += R"_( - sum1D += data; - } -)_"; - - code += R"_( - *((__global {{DATA_TYPE}} *)sum.ptr) = SUM_REDUCE(sum1D, N0); -} -//------------------ END KERNEL {{meta_kernel_id}} --------------------- -)_"; - - return code; -} - -void ClTemplateLogits1DMaxShiftExpSum::declare_variables(GpuKernelVariableTable &vtable, - const ComponentGroup &comp_group) const -{ - vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "src"); - - vtable.declare_variable(comp_group, _sum, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "sum"); - - vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "dst"); -} - -TagLUT ClTemplateLogits1DMaxShiftExpSum::get_tag_lut(const GpuKernelVariableTable &vtable, - const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - TagLUT lut{}; - - // Arguments and global shared variables - lut["src"] = vtable.get_variable(_src); - lut["sum"] = vtable.get_variable(_sum); - lut["dst"] = vtable.get_variable(_dst); - - // Local build options - lut["meta_kernel_id"] = id(); - - const DataType data_type = _src->data_type(); - - lut["DATA_TYPE"] = get_cl_type_from_data_type(data_type); - lut["BETA"] = float_to_string_with_full_precision(_attributes.beta()); - lut["MINVAL"] = (data_type == DataType::F16) ? std::string("-HALF_MAX") : std::string("-FLT_MAX"); - lut["SRC_WIDTH"] = support::cpp11::to_string(_src->dimension(0)); - - return lut; -} - -CLBuildOptions ClTemplateLogits1DMaxShiftExpSum::get_build_options(const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - CLBuildOptions build_opts{}; - - const unsigned int reduction_dim_size = _src->dimension(0); - const unsigned int vector_size = adjust_vec_size(serial_vector_size, reduction_dim_size); - - build_opts.add_option("-DN0=" + support::cpp11::to_string(vector_size)); - build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string((reduction_dim_size % vector_size))); - - return build_opts; -} - -std::string ClTemplateLogits1DMaxShiftExpSum::get_config_id() const -{ - std::string config_id = get_name(); - - config_id += "_"; - config_id += support::cpp11::to_string(_src->dimension(0)); - config_id += "_"; - config_id += string_from_data_type(_src->data_type()); - - return config_id; -} - -std::set<std::string> ClTemplateLogits1DMaxShiftExpSum::get_headers_list() const -{ - return std::set<std::string>{"helpers.h", "tile_helpers.h"}; -} - -Window ClTemplateLogits1DMaxShiftExpSum::get_window() const -{ - ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized"); - - Window win = calculate_max_window(*_dst, Steps(_src->dimension(0))); - return win.collapse(win, Window::DimZ); -} - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h deleted file mode 100644 index ac9ddaa9d4..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATELOGITS1DMAXSHIFTEXPSUM -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATELOGITS1DMAXSHIFTEXPSUM - -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClTemplateLogits1DMaxShiftExpSum final : public IGpuTemplateComponentWriter -{ -public: - using Attributes = ClComponentLogits1DMaxShiftExpSum::Attributes; - - /** Constructor - * - * @param[in] id Component id - * @param[in] tensors Tensor arguments to the components - * @param[in] attributes Component attributes - */ - ClTemplateLogits1DMaxShiftExpSum(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes); - /** Prevent instances of this class from being copy constructed */ - ClTemplateLogits1DMaxShiftExpSum(const ClTemplateLogits1DMaxShiftExpSum &) = delete; - /** Prevent instances of this class from being copied */ - ClTemplateLogits1DMaxShiftExpSum &operator=(const ClTemplateLogits1DMaxShiftExpSum &) = delete; - /** Allow instances of this class to be move constructed */ - ClTemplateLogits1DMaxShiftExpSum(ClTemplateLogits1DMaxShiftExpSum &&) = default; - /** Allow instances of this class to be moved */ - ClTemplateLogits1DMaxShiftExpSum &operator=(ClTemplateLogits1DMaxShiftExpSum &&) = default; - /** Generate kernel component name */ - std::string get_name() const override; - /** Generate kernel component code template - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return std::string Component code - */ - std::string get_component_code(const ComponentGroup &comp_group) const override; - /** Declare all variables used by the component in the @p vtable - * - * @param[out] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - */ - void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the tag look-up table used to instantiate the component code. - * - * @param[in] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - * - * @return TagLUT Tag lookup table - */ - TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the build options used in the component - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return CLBuildOptions Build options - */ - CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override; - /** Generate the component config id string used for tuning */ - std::string get_config_id() const override; - /** Generate the header list used in the component */ - std::set<std::string> get_headers_list() const override; - /** Generate the execution window for the component */ - Window get_window() const override; - -private: - const ITensorInfo *_src; // input - const ITensorInfo *_sum; // exponentiated and summed input - const ITensorInfo *_dst; // exponentiated input - Attributes _attributes; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute - -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATELOGITS1DMAXSHIFTEXPSUM */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp deleted file mode 100644 index 7d7c3e6673..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.h" - -#include "arm_compute/core/utils/helpers/AdjustVecSize.h" - -#include "src/core/helpers/WindowHelpers.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" -#include "support/StringSupport.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -ClTemplateLogits1DNorm::ClTemplateLogits1DNorm(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes) - : IGpuTemplateComponentWriter{id, tensors}, _src{}, _sum{}, _dst{}, _attributes{attributes} -{ - _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); - _sum = this->tensors().get_const_tensor(TensorType::ACL_SRC_1); - _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0); - ARM_COMPUTE_ERROR_ON_NULLPTR(_src); - ARM_COMPUTE_ERROR_ON_NULLPTR(_sum); - ARM_COMPUTE_ERROR_ON_NULLPTR(_dst); -} - -std::string ClTemplateLogits1DNorm::get_name() const -{ - return "logits_1d_norm"; -} - -std::string ClTemplateLogits1DNorm::get_component_code(const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - std::string code = R"_( -//------------------ START KERNEL {{meta_kernel_id}} --------------------- -{ - const int x_offs = g_ind_0 * sizeof({{DATA_TYPE}}); - __global uchar *src_addr = {{src}}_ptr + {{src}}_offset_first_element_in_bytes + x_offs + g_ind_1 * {{src}}_stride_y + g_ind_2 * {{src}}_stride_z; - __global uchar *dst_addr = {{dst}}_ptr + {{dst}}_offset_first_element_in_bytes + x_offs + g_ind_1 * {{dst}}_stride_y + g_ind_2 * {{dst}}_stride_z; - Image sum = CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP({{sum}}); -)_"; - // Load max value of 1D logits vector (row) - code += R"_( - {{DATA_TYPE}} sum_val = *((__global {{DATA_TYPE}} *)offset(&sum, 0, g_ind_1)); - VEC_DATA_TYPE({{DATA_TYPE}}, N0) - data0 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)src_addr); -)_"; - - if (_attributes.is_log_softmax()) - { - code += R"_( - sum_val = log(sum_val); - data0 -= sum_val; -)_"; - } - else - { - code += R"_( - data0 /= sum_val; -)_"; - } - - code += R"_( - STORE_VECTOR_SELECT(data, {{DATA_TYPE}}, dst_addr, N0, PARTIAL_N0, PARTIAL_N0 != 0 && g_ind_0 == 0); -} -//------------------ END KERNEL {{meta_kernel_id}} --------------------- -)_"; - - return code; -} - -void ClTemplateLogits1DNorm::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "src"); - - vtable.declare_variable(comp_group, _sum, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "sum"); - - vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "dst"); -} - -TagLUT ClTemplateLogits1DNorm::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - TagLUT lut{}; - - // Arguments and global shared variables - lut["src"] = vtable.get_variable(_src); - lut["sum"] = vtable.get_variable(_sum); - lut["dst"] = vtable.get_variable(_dst); - - // Local build options - lut["meta_kernel_id"] = id(); - - const DataType data_type = _src->data_type(); - - lut["DATA_TYPE"] = get_cl_type_from_data_type(data_type); - - return lut; -} - -CLBuildOptions ClTemplateLogits1DNorm::get_build_options(const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - CLBuildOptions build_opts{}; - - const auto root_window = comp_group.get_root_component()->template_writer()->get_window(); - const unsigned int n0 = root_window.x().step(); - build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); - build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string((_src->dimension(0) % n0))); - - return build_opts; -} - -std::string ClTemplateLogits1DNorm::get_config_id() const -{ - std::string config_id = get_name(); - - config_id += "_"; - config_id += support::cpp11::to_string(_src->dimension(0)); - config_id += "_"; - config_id += string_from_data_type(_src->data_type()); - - return config_id; -} - -std::set<std::string> ClTemplateLogits1DNorm::get_headers_list() const -{ - return std::set<std::string>{"helpers.h", "tile_helpers.h"}; -} - -Window ClTemplateLogits1DNorm::get_window() const -{ - ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized"); - constexpr unsigned int serial_vector_size = 16; - const unsigned int vector_size = adjust_vec_size(serial_vector_size, _src->dimension(0)); - - Window win = calculate_max_window(*_src, Steps(vector_size)); - return win.collapse(win, Window::DimZ); -} - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.h deleted file mode 100644 index 5a74be5842..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATELOGITS1DNORM -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATELOGITS1DNORM - -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClTemplateLogits1DNorm final : public IGpuTemplateComponentWriter -{ -public: - using Attributes = ClComponentLogits1DNorm::Attributes; - - /** Constructor - * - * @param[in] id Component id - * @param[in] tensors Tensor arguments to the components - * @param[in] attributes Component attributes - */ - ClTemplateLogits1DNorm(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes); - /** Prevent instances of this class from being copy constructed */ - ClTemplateLogits1DNorm(const ClTemplateLogits1DNorm &) = delete; - /** Prevent instances of this class from being copied */ - ClTemplateLogits1DNorm &operator=(const ClTemplateLogits1DNorm &) = delete; - /** Allow instances of this class to be move constructed */ - ClTemplateLogits1DNorm(ClTemplateLogits1DNorm &&) = default; - /** Allow instances of this class to be moved */ - ClTemplateLogits1DNorm &operator=(ClTemplateLogits1DNorm &&) = default; - /** Generate kernel component name */ - std::string get_name() const override; - /** Generate kernel component code template - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return std::string Component code - */ - std::string get_component_code(const ComponentGroup &comp_group) const override; - /** Declare all variables used by the component in the @p vtable - * - * @param[out] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - */ - void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the tag look-up table used to instantiate the component code. - * - * @param[in] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - * - * @return TagLUT Tag lookup table - */ - TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the build options used in the component - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return CLBuildOptions Build options - */ - CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override; - /** Generate the component config id string used for tuning */ - std::string get_config_id() const override; - /** Generate the header list used in the component */ - std::set<std::string> get_headers_list() const override; - /** Generate the execution window for the component */ - Window get_window() const override; - -private: - const ITensorInfo *_src; // exponentiated input - const ITensorInfo *_sum; // exponentiated and summed input - const ITensorInfo *_dst; // normalization of input with _sum - - Attributes _attributes; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute - -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATELOGITS1DNORM */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp deleted file mode 100644 index 8936db6abe..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp +++ /dev/null @@ -1,470 +0,0 @@ -/* - * Copyright (c) 2023-2024 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "ClTemplatePool2d.h" - -#include "arm_compute/core/utils/helpers/AdjustVecSize.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/core/utils/StringUtils.h" - -#include "src/core/helpers/WindowHelpers.h" -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" -#include "support/StringSupport.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -namespace -{ -// Shape indexes for NHWC Datalayout -constexpr static int32_t height_idx = 2; -constexpr static int32_t width_idx = 1; -constexpr static int32_t channel_idx = 0; -} // namespace -ClTemplatePool2d::ClTemplatePool2d(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes, - const Settings &settings) - : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}, _attributes{attributes}, _settings{settings} -{ - _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); - _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0); - ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst); -} - -std::string ClTemplatePool2d::get_name() const -{ - return "pool2d"; -} - -std::string ClTemplatePool2d::get_component_code(const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - // Condition to use 2x2 optimized kernel - if (_attributes.pool_size() == Size2D(2, 2)) - { - return get_2x2_kernel_code(); - } - else - { - return get_MxN_kernel_code(); - } -} - -std::string ClTemplatePool2d::get_MxN_kernel_code() const -{ - const auto pool_type = _attributes.pool_type(); - const bool fp_mixed_precision = (_src->data_type() == DataType::F16) && pool_type != PoolingType::MAX; - - // Define pool op macro. - std::string pool_op = (pool_type == PoolingType::AVG) ? R"_(#define POOL_OP(x,y) ((x) + (y)))_" - : R"_(#define POOL_OP(x,y) (fmax((x), (y))) )_"; - - // Kernel start - // Note: If C is not multiple of N0, we shift back of PARTIAL_N0 elements to compute the leftover elements for get_global_id(0) == 0 - // Note: If C is less than N0, N0 should be SHRINKED to the closest smaller N0. This operation is performed on the host side - std::string code = R"_( -//------------------ START KERNEL {{meta_kernel_id}} --------------------- -// IN_0(src) {{src}} -// OUT(dst, accum) {{dst}} - -{ - const int idx_out_c = g_ind_0; - const int idx_out_w = g_ind_1; -)_"; - - // Add macro for POOL_OP - code += "\n" + pool_op + "\n"; - - code += R"_( - const int idx_out_h = g_ind_2 % {{DST_HEIGHT}}; - const int idx_out_n = g_ind_2 / {{DST_HEIGHT}}; -)_"; - - // Define common variables. - code += R"_( - __global unsigned char *in_base_ptr = {{src}}_ptr + {{src}}_offset_first_element_in_bytes + idx_out_c * sizeof({{DATA_TYPE}}) + idx_out_n * {{src}}_stride_w; - - __global unsigned char *out_base_ptr = {{dst}}_ptr + {{dst}}_offset_first_element_in_bytes + idx_out_c * sizeof({{DATA_TYPE}}) + idx_out_w * {{dst}}_stride_y + idx_out_h * {{dst}}_stride_z + idx_out_n * {{dst}}_stride_w; - - VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0) - res0 = {{INITIAL_VALUE}}; - - const int idx_in_w = idx_out_w * {{STRIDE_X}} - {{PAD_X}}; - const int idx_in_h = idx_out_h * {{STRIDE_Y}} - {{PAD_Y}}; - - const int pool_x_s = max((int)0, -idx_in_w); - const int pool_x_e = min((int){{POOL_SIZE_X}}, (int){{SRC_WIDTH}} - idx_in_w); - const int pool_y_s = max((int)0, -idx_in_h); - const int pool_y_e = min((int){{POOL_SIZE_Y}}, (int){{SRC_HEIGHT}} - idx_in_h); -)_"; - - // Determine filter size depending on if padding is excluded or not - if (_attributes.exclude_padding()) - { - code += R"_( - const int filter_size = (pool_y_e - pool_y_s) * (pool_x_e - pool_x_s); -)_"; - } - else - { - code += R"_( - const int filter_size = {{POOL_SIZE_X}} * {{POOL_SIZE_Y}}; -)_"; - } - - // Loop through pool size - // if global pooling - if (_attributes.pool_size().x() == _src->dimension(width_idx) && - _attributes.pool_size().y() == _src->dimension(height_idx)) - { - // Begin loop - code += R"_( - // Global pooling path - for(int y = 0; y < {{POOL_SIZE_Y}}; ++y) - { - #pragma unroll 8 - for(int x = 0; x < {{POOL_SIZE_X}}; ++x) - { - VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0) - data0; -)_"; - } - else // if local pooling size - { - code += R"_( - for(int y = pool_y_s; y < pool_y_e; ++y) - { - #pragma unroll 8 - for(int x = pool_x_s; x < pool_x_e; ++x) - { - VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0) - data0; -)_"; - } // end else - - // if condition inside loop - use 32bit acc if mixed_precision. - // End loop through pooling section. - if (fp_mixed_precision) - { - // In case of FP_MIXED_PRECISION, ACC_DATA_TYPE is != DATA_TYPE - code += R"_( - data0 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + (x + idx_in_w) * {{src}}_stride_y + (y + idx_in_h) * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)); - res0 = POOL_OP(res0, data0); - } - } -)_"; - } - else // load data, compute result and end loop - { - code += R"_( - data0 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + (x + idx_in_w) * {{src}}_stride_y + (y + idx_in_h) * {{src}}_stride_z)); - res0 = POOL_OP(res0, data0); - } - } -)_"; - } - - // For Pool AVG ONLY, divide pool output by filter size - if (pool_type == PoolingType::AVG) - { - code += R"_( - res0 /= (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0))filter_size; -)_"; - } - - // If mixed precision convert datatype before storing. Then end kernel. - if (fp_mixed_precision) - { - code += R"_( - VEC_DATA_TYPE({{DATA_TYPE}}, N0) - res_converted0 = CONVERT(res0, VEC_DATA_TYPE({{DATA_TYPE}}, N0)); - STORE_VECTOR_SELECT(res_converted, {{DATA_TYPE}}, out_base_ptr, N0, PARTIAL_N0, (PARTIAL_N0 != 0) && g_ind_0 == 0); -)_"; - } - else - { - // Store data - code += R"_( - STORE_VECTOR_SELECT(res, {{DATA_TYPE}}, out_base_ptr, N0, PARTIAL_N0, (PARTIAL_N0 != 0) && g_ind_0 == 0); -)_"; - } - - code += R"_( -//------------------ END KERNEL {{meta_kernel_id}} --------------------- -} -)_"; - - return code; -} - -std::string ClTemplatePool2d::get_2x2_kernel_code() const -{ - const auto pool_type = _attributes.pool_type(); - const bool fp_mixed_precision = (_src->data_type() == DataType::F16) && pool_type != PoolingType::MAX; - std::string pool_op = (pool_type == PoolingType::AVG) ? R"_(#define POOL_OP(x,y) ((x) + (y)))_" - : R"_(#define POOL_OP(x,y) (fmax((x), (y))) )_"; - - std::string code = R"_( -//------------------ START KERNEL {{meta_kernel_id}} --------------------- -// IN_0(src) {{src}} -// OUT(dst, accum) {{dst}} - -#define SELECT_TYPE SELECT_VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0) - -{ - const int idx_out_c = g_ind_0; - const int idx_out_w = g_ind_1; -)_"; - - // Add pool op macro - code += "\n" + pool_op + "\n"; - - // If batch size != 1, the batch size dimension is collapsed over the height dimension - code += R"_( - const int idx_out_h = g_ind_2 % {{DST_HEIGHT}}; - const int idx_out_n = g_ind_2 / {{DST_HEIGHT}}; -)_"; - - code += R"_( - const int idx_in_w = idx_out_w * {{STRIDE_X}} - {{PAD_X}}; - const int idx_in_h = idx_out_h * {{STRIDE_Y}} - {{PAD_Y}}; - - __global unsigned char *in_base_ptr = {{src}}_ptr + {{src}}_offset_first_element_in_bytes + idx_out_c * sizeof({{DATA_TYPE}}) + idx_out_n * {{src}}_stride_w; - __global unsigned char *out_base_ptr = {{dst}}_ptr + {{dst}}_offset_first_element_in_bytes + idx_out_c * sizeof({{DATA_TYPE}}) + idx_out_w * {{dst}}_stride_y + idx_out_h * {{dst}}_stride_z + idx_out_n * - {{dst}}_stride_w; - const int pool_x_s = max((int)0, -idx_in_w); - const int pool_x_e = min((int)2, (int){{SRC_WIDTH}} - idx_in_w); - const int pool_y_s = max((int)0, -idx_in_h); - const int pool_y_e = min((int)2, (int){{SRC_HEIGHT}} - idx_in_h); - - const int filter_size = (pool_x_e - pool_x_s) * (pool_y_e - pool_y_s); - const int x0 = pool_x_s + idx_in_w; - const int y0 = pool_y_s + idx_in_h; - const int x1 = pool_x_e - 1 + idx_in_w; - const int y1 = pool_y_e - 1 + idx_in_h; - - REPEAT_VAR_INIT_TO_CONST(4, VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0), data, 0); -)_"; - - if (fp_mixed_precision) - { - // In case of FP_MIXED_PRECISION, ACC_DATA_TYPE is != DATA_TYPE - code += R"_( - data0 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x0 * {{src}}_stride_y + y0 * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)); - data1 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x1 * {{src}}_stride_y + y0 * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)); - data2 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x0 * {{src}}_stride_y + y1 * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)); - data3 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x1 * {{src}}_stride_y + y1 * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)); -)_"; - } - else - { - code += R"_( - data0 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x0 * {{src}}_stride_y + y0 * {{src}}_stride_z)); - data1 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x1 * {{src}}_stride_y + y0 * {{src}}_stride_z)); - data2 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x0 * {{src}}_stride_y + y1 * {{src}}_stride_z)); - data3 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x1 * {{src}}_stride_y + y1 * {{src}}_stride_z)); -)_"; - } - - if (pool_type != PoolingType::MAX) - { - // Make invalid the values loaded if the x or y coordinate was clamped (out-of-bound) - code += R"_( - if(filter_size != 4) - { - SELECT_TYPE cond_w_s = (SELECT_TYPE)idx_in_w < (SELECT_TYPE)0; - SELECT_TYPE cond_w_e = (SELECT_TYPE)idx_in_w >= (SELECT_TYPE)({{SRC_WIDTH}} - 1); - SELECT_TYPE cond_h_s = (SELECT_TYPE)idx_in_h < (SELECT_TYPE)0; - SELECT_TYPE cond_h_e = (SELECT_TYPE)idx_in_h >= (SELECT_TYPE)({{SRC_HEIGHT}} - 1); - - data0 = select(data0, (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)){{INITIAL_VALUE}}, (SELECT_TYPE)(cond_w_s | cond_h_s)); - data1 = select(data1, (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)){{INITIAL_VALUE}}, (SELECT_TYPE)(cond_w_e | cond_h_s)); - data2 = select(data2, (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)){{INITIAL_VALUE}}, (SELECT_TYPE)(cond_w_s | cond_h_e)); - data3 = select(data3, (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)){{INITIAL_VALUE}}, (SELECT_TYPE)(cond_w_e | cond_h_e)); - } -)_"; - } - - code += R"_( - VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0) - res0 = data0; - res0 = POOL_OP(res0, data1); - res0 = POOL_OP(res0, data2); - res0 = POOL_OP(res0, data3); -)_"; - - if (pool_type == PoolingType::AVG) - { - // If avg pooling divide result accordingly. - if (_attributes.exclude_padding()) - { - code += R"_( - res0 /= (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0))filter_size; -)_"; - } - else - { - code += R"_( - res0 /= (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0))4; -)_"; - } - } - - // Store result - if (fp_mixed_precision) - { - code += R"_( - VEC_DATA_TYPE({{DATA_TYPE}}, N0) - res_converted0 = CONVERT(res0, VEC_DATA_TYPE({{DATA_TYPE}}, N0)); - STORE_VECTOR_SELECT(res_converted, {{DATA_TYPE}}, out_base_ptr, N0, PARTIAL_N0, (PARTIAL_N0 != 0) && g_ind_0 == 0); -)_"; - } - else - { - code += R"_( - STORE_VECTOR_SELECT(res, {{DATA_TYPE}}, out_base_ptr, N0, PARTIAL_N0, (PARTIAL_N0 != 0) && g_ind_0 == 0); -)_"; - } - - code += R"_( - //------------------ END KERNEL {{meta_kernel_id}} --------------------- -} -#undef SELECT_TYPE -)_"; - - return code; -} - -void ClTemplatePool2d::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), - "src"); - - vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), - "dst"); -} - -TagLUT ClTemplatePool2d::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - TagLUT lut{}; - // Arguments and global shared variables - lut["src"] = vtable.get_variable(_src); - lut["dst"] = vtable.get_variable(_dst); - - // Local build options - lut["meta_kernel_id"] = id(); - - // Retrieve relevant data - const auto padding = _attributes.pad(); - const auto stride = _attributes.stride(); - const auto pool_size = _attributes.pool_size(); - const auto data_type = _src->data_type(); - const auto use_fp_mixed_precision = - (_src->data_type() == DataType::F16) && _attributes.pool_type() != PoolingType::MAX; - const std::string max_initial_value = - _settings.use_inf_as_limit() ? "(-INFINITY)" - : float_to_string_with_full_precision(std::numeric_limits<float>::lowest()); - - // pool specific - lut["STRIDE_X"] = stride.x(); - lut["STRIDE_Y"] = stride.y(); - lut["PAD_X"] = padding.left; - lut["PAD_Y"] = padding.top; - lut["POOL_SIZE_X"] = pool_size.width; - lut["POOL_SIZE_Y"] = pool_size.height; - - // Datatypes and variables - lut["ACC_DATA_TYPE"] = get_cl_type_from_data_type( - (use_fp_mixed_precision) ? (DataType::F32) : (data_type)); // Type of accumulators to use. - lut["DATA_TYPE"] = get_cl_type_from_data_type(data_type); - lut["SRC_WIDTH"] = _src->dimension(width_idx); - lut["SRC_HEIGHT"] = _src->dimension(height_idx); - lut["INITIAL_VALUE"] = (_attributes.pool_type() == PoolingType::MAX) ? max_initial_value : std::string("0"); - - // Tensor specific data - lut["DST_HEIGHT"] = _dst->dimension(height_idx); - - return lut; -} - -CLBuildOptions ClTemplatePool2d::get_build_options(const ComponentGroup &comp_group) const -{ - const auto root_window = comp_group.get_root_component()->template_writer()->get_window(); - const unsigned int n0 = root_window.x().step(); - const unsigned int partial_store_n0 = _dst->dimension(0) % n0; - - CLBuildOptions build_opts{}; - build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); - build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0)); - - return build_opts; -} - -std::string ClTemplatePool2d::get_config_id() const -{ - const DataType data_type = _src->data_type(); - const DataLayout data_layout = _src->data_layout(); - - std::string config_id{}; - config_id += "pooling_layer_2d_"; - config_id += lower_string(string_from_data_type(data_type)); - config_id += "_"; - config_id += lower_string(string_from_data_layout(data_layout)); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(width_idx)); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(height_idx)); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(channel_idx)); - - return config_id; -} - -std::set<std::string> ClTemplatePool2d::get_headers_list() const -{ - return std::set<std::string>{"helpers.h", "tile_helpers.h", "repeat.h"}; -} - -Window ClTemplatePool2d::get_window() const -{ - ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized"); - const auto output_shape = _dst->tensor_shape(); - const unsigned int vec_size = adjust_vec_size(((_dst->data_type() == DataType::F32) ? 2 : 4), _dst->dimension(0)); - - // Create and configure kernel window - auto win = calculate_max_window(output_shape, Steps(vec_size)); - win = win.collapse_if_possible(win, Window::DimZ); // collapse window on batch size. - return win; -} - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h deleted file mode 100644 index d1d3c01669..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEPOOL2D -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEPOOL2D - -#include "arm_compute/core/experimental/Types.h" -#include "arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h" -#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h" - -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClTemplatePool2d final : public IGpuTemplateComponentWriter -{ -public: - using Attributes = ClComponentPool2d::Attributes; - using Settings = ClComponentPool2d::Settings; - /** Constructor - * - * @param[in] id Component id - * @param[in] tensors Tensor arguments to the components - * @param[in] attributes Component attributes - * @param[in] settings Component settings - */ - ClTemplatePool2d(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes, - const Settings &settings); - - /** Prevent instances of this class from being copy constructed */ - ClTemplatePool2d(const ClTemplatePool2d &direct_conv2d) = delete; - - /** Prevent instances of this class from being copied */ - ClTemplatePool2d &operator=(const ClTemplatePool2d &direct_conv2d) = delete; - - /** Allow instances of this class to be move constructed */ - ClTemplatePool2d(ClTemplatePool2d &&direct_conv2d) = default; - - /** Allow instances of this class to be moved */ - ClTemplatePool2d &operator=(ClTemplatePool2d &&direct_conv2d) = default; - - /** Generate kernel component name */ - std::string get_name() const override; - - /** Generate kernel component code template - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return std::string Component code - */ - std::string get_component_code(const ComponentGroup &comp_group) const override; - /** Declare all variables used by the component in the @p vtable - * - * @param[out] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - */ - void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the tag look-up table used to instantiate the component code. - * - * @param[in] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - * - * @return TagLUT Tag lookup table - */ - TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the build options used in the component - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return CLBuildOptions Build options - */ - CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override; - - /** Generate the component config id string used for tuning */ - std::string get_config_id() const override; - - /** Generate the header list used in the component */ - std::set<std::string> get_headers_list() const override; - - /** Generate the execution window for the component */ - Window get_window() const override; - -private: - /** Generate pooling kernel template code optimized for 2x2 pooling - * - * @return std::String Component code - */ - std::string get_2x2_kernel_code() const; - - /** Generate generalised pooling kernel template code for MxN pooling - * - * @return std::String Component code - */ - std::string get_MxN_kernel_code() const; - - const ITensorInfo *_src; - const ITensorInfo *_dst; - Attributes _attributes; - Settings _settings; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEPOOL2D */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp deleted file mode 100644 index c882353fcb..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "ClTemplateReshape.h" - -#include "arm_compute/core/utils/helpers/AdjustVecSize.h" -#include "arm_compute/core/utils/StringUtils.h" - -#include "src/core/helpers/WindowHelpers.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -constexpr unsigned int vector_size_byte_opencl = 16; - -ClTemplateReshape::ClTemplateReshape(ComponentId id, const ArgumentPack<ITensorInfo> &tensors) - : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{} -{ - _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); - _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0); - ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst); -} - -std::string ClTemplateReshape::get_name() const -{ - return "reshape"; -} - -std::string ClTemplateReshape::get_component_code(const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - std::string code; - - code = R"_( -//------------------ START KERNEL {{meta_kernel_id}} --------------------- - -// IN(src) {{src}} -// OUT(dst, accum) {{dst}} - -TILE(uint, M0, 1, g_dst_indirect_y); -{ - __global uchar * base_src_ptr = {{src}}_ptr + {{src}}_offset_first_element_in_bytes; - const int tile_vertical_idx = g_ind_1 * {{arg_dst}}_c + g_ind_2 * {{arg_dst}}_c * {{arg_dst}}_w; - LOOP_UNROLLING(int, _m0, 0, 1, M0, - { - const int row_idx = _m0 * {{arg_dst}}_c + tile_vertical_idx; - const int tile_horizontal_idx = g_ind_0 + row_idx; - LOOP_UNROLLING(int, _n0, 0, 1, N0, - { - {{src}}_ptr = base_src_ptr; - const int linear_idx = tile_horizontal_idx + _n0; - const int in_id_x = linear_idx % {{src}}_c; - const int in_id_y = (linear_idx / {{src}}_c) % {{src}}_w; - const int in_id_z = linear_idx / ({{src}}_c * {{src}}_w); - {{src}}_ptr += in_id_x * sizeof({{DATA_TYPE}}) + in_id_y * {{src}}_stride_y + in_id_z * {{src}}_stride_z; - {{dst}}[_m0].s[_n0] = *((__global {{DATA_TYPE}} *){{src}}_ptr); - }) - }) - - LOOP_UNROLLING(int, i, 0, 1, M0, - { - g_dst_indirect_y[i].v = (uint)min((int)(g_ind_1 + i), (int)({{arg_dst}}_w) - 1); - g_dst_indirect_y[i].v += (int)(g_ind_2 % {{arg_dst}}_h) * (int)({{arg_dst}}_w); - g_dst_indirect_y[i].v += (int)(g_ind_2 / {{arg_dst}}_h) * (int)({{arg_dst}}_w * {{arg_dst}}_h); - }) -} -//------------------ END KERNEL {{meta_kernel_id}} --------------------- -)_"; - return code; -} - -void ClTemplateReshape::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - vtable.declare_variable(comp_group, _src, - GpuKernelArgumentInfo(common_tensor_type), // GpuKernelArgumentInfo::Type::Image_3D - "src"); - - vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(common_tensor_type), "dst"); -} - -TagLUT ClTemplateReshape::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - TagLUT lut{}; - - // Arguments and global shared variables - lut["src"] = vtable.get_variable(_src); - lut["dst"] = vtable.get_variable(_dst); - lut["arg_dst"] = vtable.get_variable(comp_group.get_any_dst_tensor()); - lut["meta_kernel_id"] = id(); - lut["DATA_TYPE"] = get_cl_type_from_data_type(_dst->data_type()); - - return lut; -} - -CLBuildOptions ClTemplateReshape::get_build_options(const ComponentGroup &comp_group) const -{ - CLBuildOptions build_opts{}; - const auto root_window = comp_group.get_root_component()->template_writer()->get_window(); - const unsigned int n0 = root_window.x().step(); - const unsigned int m0 = root_window.y().step(); - const unsigned int partial_store_n0 = _dst->dimension(0) % n0; - build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); - build_opts.add_option("-DM0=" + support::cpp11::to_string(m0)); - build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0)); - - return build_opts; -} - -std::string ClTemplateReshape::get_config_id() const -{ - std::string config_id{}; - config_id += lower_string(string_from_data_type(_dst->data_type())); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(0)); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(1)); - - return config_id; -} - -std::set<std::string> ClTemplateReshape::get_headers_list() const -{ - return std::set<std::string>{"helpers.h", "tile_helpers.h"}; -} - -Window ClTemplateReshape::get_window() const -{ - ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized"); - const unsigned int n0 = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0)); - Window win = calculate_max_window(*_dst, Steps(n0)); - return win.collapse(win, Window::DimZ); -} - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.h deleted file mode 100644 index 838a21db6d..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATERESHAPE -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATERESHAPE - -#include "arm_compute/core/experimental/Types.h" - -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClTemplateReshape final : public IGpuTemplateComponentWriter -{ -public: - /** Constructor - * - * @param[in] id Component id - * @param[in] tensors Tensor arguments to the components - */ - ClTemplateReshape(ComponentId id, const ArgumentPack<ITensorInfo> &tensors); - /** Prevent instances of this class from being copy constructed */ - ClTemplateReshape(const ClTemplateReshape &reshape) = delete; - /** Prevent instances of this class from being copied */ - ClTemplateReshape &operator=(const ClTemplateReshape &reshape) = delete; - /** Allow instances of this class to be move constructed */ - ClTemplateReshape(ClTemplateReshape &&reshape) = default; - /** Allow instances of this class to be moved */ - ClTemplateReshape &operator=(ClTemplateReshape &&reshape) = default; - - /** Generate kernel component name */ - std::string get_name() const override; - - /** Generate kernel component code template - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return std::string Component code - */ - std::string get_component_code(const ComponentGroup &comp_group) const override; - - /** Declare all variables used by the component in the @p vtable - * - * @param[out] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - */ - void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - - /** Generate the tag look-up table used to instantiate the component code. - * - * @param[in] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - * - * @return TagLUT Tag lookup table - */ - TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - - /** Generate the build options used in the component - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return CLBuildOptions Build options - */ - CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override; - - /** Generate the component config id string used for tuning */ - std::string get_config_id() const override; - - /** Generate the header list used in the component */ - std::set<std::string> get_headers_list() const override; - - /** Generate the execution window for the component */ - Window get_window() const override; - -private: - const ITensorInfo *_src; - const ITensorInfo *_dst; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATERESHAPE */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp deleted file mode 100644 index 846c712ceb..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp +++ /dev/null @@ -1,279 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ClTemplateResize.h" - -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/utils/helpers/AdjustVecSize.h" -#include "arm_compute/core/utils/StringUtils.h" - -#include "src/core/helpers/WindowHelpers.h" -#include "src/core/utils/ScaleUtils.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -ClTemplateResize::ClTemplateResize(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const ClTemplateResize::Attributes &attributes) - : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}, _attributes{attributes} -{ - _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); - _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0); - - ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst); -} - -std::string ClTemplateResize::get_name() const -{ - return _attributes.interpolation_policy() == InterpolationPolicy::BILINEAR ? "resize_bilinear" : "resize_nearest"; -} - -std::string ClTemplateResize::get_component_code(const IGpuTemplateComponentWriter::ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - std::string code = R"_( -//------------------ START KERNEL {{meta_kernel_id}} --------------------- -TILE(uint, 1, 1, g_dst_indirect_y); -{ - const int yo = g_ind_2 % {{arg_dst}}_h; - const int bout = g_ind_2 / {{arg_dst}}_h; -)_"; - - if (_attributes.interpolation_policy() == InterpolationPolicy::NEAREST_NEIGHBOR) - { - if (_attributes.sampling_policy() == SamplingPolicy::TOP_LEFT) - { - code += R"_( - float xi_f = (g_ind_1 * {{SCALE_X}}); - float yi_f = (yo * {{SCALE_Y}}); -)_"; - } - else - { - code += R"_( - float xi_f = ((g_ind_1 + 0.5f) * {{SCALE_X}}); - float yi_f = ((yo + 0.5f) * {{SCALE_Y}}); -)_"; - } - - if (_attributes.align_corners()) - { - code += R"_( - xi_f = round(xi_f); - yi_f = round(yi_f); -)_"; - } - - code += R"_( - const int xi0 = clamp((int)xi_f, 0, (int){{src}}_w - 1); - const int yi0 = clamp((int)yi_f, 0, (int){{src}}_h - 1); - - T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, 1, N0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi0, xi0, g_ind_0, {{src}}_w, {{src}}_h, 1, 1, false, {{dst}}); -)_"; - } - else if (_attributes.interpolation_policy() == InterpolationPolicy::BILINEAR) - { - if (_attributes.sampling_policy() == SamplingPolicy::TOP_LEFT) - { - code += R"_( - float xi_f = (g_ind_1 * {{SCALE_X}}); - float yi_f = (yo * {{SCALE_Y}}); -)_"; - } - else - { - code += R"_( - float xi_f = ((g_ind_1 + 0.5f) * {{SCALE_X}} - 0.5f); - float yi_f = ((yo + 0.5f) * {{SCALE_Y}} - 0.5f); -)_"; - } - - code += R"_( - const int xi = (int)floor(xi_f); - const int yi = (int)floor(yi_f); - - TILE({{SRC_DATA_TYPE}}, 1, N0, in00); - TILE({{SRC_DATA_TYPE}}, 1, N0, in01); - TILE({{SRC_DATA_TYPE}}, 1, N0, in10); - TILE({{SRC_DATA_TYPE}}, 1, N0, in11); - - in00[0].v = {{CONSTANT_VALUE}}; - in01[0].v = {{CONSTANT_VALUE}}; - in10[0].v = {{CONSTANT_VALUE}}; - in11[0].v = {{CONSTANT_VALUE}}; - - const int xi0 = clamp(xi, 0, (int){{src}}_w - 1); - const int yi0 = clamp(yi, 0, (int){{src}}_h - 1); - const int xi1 = clamp(xi + 1, 0, (int){{src}}_w - 1); - const int yi1 = clamp(yi + 1, 0, (int){{src}}_h - 1); - - T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, 1, N0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi0, xi0, g_ind_0, {{src}}_w, {{src}}_h, 1, 1, false, in00); - T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, 1, N0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi0, xi1, g_ind_0, {{src}}_w, {{src}}_h, 1, 1, false, in01); - T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, 1, N0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi1, xi0, g_ind_0, {{src}}_w, {{src}}_h, 1, 1, false, in10); - T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, 1, N0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi1, xi1, g_ind_0, {{src}}_w, {{src}}_h, 1, 1, false, in11); -)_"; - - if (is_data_type_float(_src->data_type())) - { - code += R"_( - const {{SRC_DATA_TYPE}} a = ({{SRC_DATA_TYPE}})(xi_f - (float)xi); - const {{SRC_DATA_TYPE}} b = ({{SRC_DATA_TYPE}})(1.f - a); - const {{SRC_DATA_TYPE}} a1 = ({{SRC_DATA_TYPE}})(yi_f - (float)yi); - const {{SRC_DATA_TYPE}} b1 = ({{SRC_DATA_TYPE}})(1.f - a1); - - // Calculate the output - {{dst}}[0].v = ((in00[0].v * b * b1) + (in01[0].v * a * b1) + (in10[0].v * b * a1) + (in11[0].v * a * a1)); -)_"; - } - else - { - code += R"_( - const float a = (xi_f - (float)xi); - const float b = (1.f - a); - const float a1 = (yi_f - (float)yi); - const float b1 = (1.f - a1); - - {{dst}}[0].v = CONVERT_SAT( - (CONVERT(in00[0].v, VEC_DATA_TYPE(float, N0)) * b * b1) + - (CONVERT(in01[0].v, VEC_DATA_TYPE(float, N0)) * a * b1) + - (CONVERT(in10[0].v, VEC_DATA_TYPE(float, N0)) * b * a1) + - (CONVERT(in11[0].v, VEC_DATA_TYPE(float, N0)) * a * a1), VEC_DATA_TYPE({{DST_DATA_TYPE}}, N0)); -)_"; - } - } - else - { - ARM_COMPUTE_ERROR("Unsupported interpolation policy"); - } - - code += R"_( - g_dst_indirect_y[0].v = g_ind_1 + (yo * (int)({{arg_dst}}_w)) + bout * (int)({{arg_dst}}_w * {{arg_dst}}_h); -} -//------------------ END KERNEL {{meta_kernel_id}} --------------------- -)_"; - - return code; -} - -void ClTemplateResize::declare_variables(GpuKernelVariableTable &vtable, - const IGpuTemplateComponentWriter::ComponentGroup &comp_group) const -{ - vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), - "src"); - - vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), - "dst"); -} - -TagLUT ClTemplateResize::get_tag_lut(const GpuKernelVariableTable &vtable, - const IGpuTemplateComponentWriter::ComponentGroup &comp_group) const -{ - TagLUT lut{}; - - // Arguments and global shared variables - lut["src"] = vtable.get_variable(_src); - lut["dst"] = vtable.get_variable(_dst); - - const auto dst_argument = vtable.get_variable(comp_group.get_any_dst_tensor()); - lut["arg_dst"] = dst_argument.uniq_name; - - // Local build options - lut["meta_kernel_id"] = id(); - lut["SRC_DATA_TYPE"] = get_cl_type_from_data_type(_src->data_type()); - lut["SRC_TENSOR_TYPE"] = "BUFFER"; - lut["DST_DATA_TYPE"] = get_cl_type_from_data_type(_dst->data_type()); - lut["CONSTANT_VALUE"] = string_from_pixel_value(0, _src->data_type()); - - const float scale_x = - scale_utils::calculate_resize_ratio(_src->dimension(1), _dst->dimension(1), _attributes.align_corners()); - const float scale_y = - scale_utils::calculate_resize_ratio(_src->dimension(2), _dst->dimension(2), _attributes.align_corners()); - - lut["SCALE_X"] = float_to_string_with_full_precision(scale_x); - lut["SCALE_Y"] = float_to_string_with_full_precision(scale_y); - - return lut; -} - -CLBuildOptions ClTemplateResize::get_build_options(const IGpuTemplateComponentWriter::ComponentGroup &comp_group) const -{ - const Window root_window = comp_group.get_root_component()->template_writer()->get_window(); - const unsigned int n0 = root_window.x().step(); - const unsigned int m0 = root_window.y().step(); - const unsigned int partial_n0 = _dst->dimension(0) % n0; - - CLBuildOptions build_opts; - - build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); - build_opts.add_option("-DM0=" + support::cpp11::to_string(m0)); - build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_n0)); - - return build_opts; -} - -std::string ClTemplateResize::get_config_id() const -{ - std::string config_id{}; - - config_id += "resize_"; - config_id += - (_attributes.interpolation_policy() == InterpolationPolicy::NEAREST_NEIGHBOR ? "NEAREST_NEIGHBOR" : ""); - config_id += (_attributes.interpolation_policy() == InterpolationPolicy::BILINEAR ? "BILINEAR" : ""); - config_id += "_"; - config_id += (_attributes.sampling_policy() == SamplingPolicy::CENTER ? "center" : "topleft"); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(0)); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(1)); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(2)); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(3)); - - return config_id; -} - -std::set<std::string> ClTemplateResize::get_headers_list() const -{ - return std::set<std::string>{"helpers.h", "tile_helpers.h"}; -} - -Window ClTemplateResize::get_window() const -{ - ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized"); - - const unsigned int n0 = adjust_vec_size(16 / _src->element_size(), _src->dimension(0)); - Window win = calculate_max_window(*_dst, Steps(n0)); - return win.collapse(win, Window::DimZ); -} - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.h deleted file mode 100644 index 4c69007185..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.h +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATERESIZE -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATERESIZE - -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClTemplateResize final : public IGpuTemplateComponentWriter -{ -public: - using Attributes = ClComponentResize::Attributes; - - /** Constructor - * - * @param[in] id Component id - * @param[in] tensors Tensor arguments to the components - * @param[in] attributes Component attributes - */ - ClTemplateResize(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes); - - /** Destructor */ - ~ClTemplateResize() override = default; - - /** Prevent instances of this class from being copy constructed */ - ClTemplateResize(const ClTemplateResize &resize) = delete; - - /** Prevent instances of this class from being copied */ - ClTemplateResize &operator=(const ClTemplateResize &resize) = delete; - - /** Allow instances of this class to be move constructed */ - ClTemplateResize(ClTemplateResize &&resize) = default; - - /** Allow instances of this class to be moved */ - ClTemplateResize &operator=(ClTemplateResize &&resize) = default; - - /** Generate kernel component name */ - std::string get_name() const override; - - /** Generate kernel component code template - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return std::string Component code - */ - std::string get_component_code(const ComponentGroup &comp_group) const override; - - /** Declare all variables used by the component in the @p vtable - * - * @param[out] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - */ - void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - - /** Generate the tag look-up table used to instantiate the component code. - * - * @param[in] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - * - * @return TagLUT Tag lookup table - */ - TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - - /** Generate the build options used in the component - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return CLBuildOptions Build options - */ - CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override; - - /** Generate the component config id string used for tuning */ - std::string get_config_id() const override; - - /** Generate the header list used in the component */ - std::set<std::string> get_headers_list() const override; - - /** Generate the execution window for the component */ - Window get_window() const override; - -private: - const ITensorInfo *_src; - const ITensorInfo *_dst; - Attributes _attributes; -}; - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute - -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATERESIZE */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp deleted file mode 100644 index d0ec91e0a9..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "ClTemplateStore.h" - -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -ClTemplateStore::ClTemplateStore(ComponentId id, const ArgumentPack<ITensorInfo> &tensors) - : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{} -{ - _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); - _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0); -} - -std::string ClTemplateStore::get_name() const -{ - return "store"; -} - -std::string ClTemplateStore::get_component_code(const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - return R"_( -//------------------ START KERNEL {{meta_kernel_id}} STORE --------------------- -{ - bool x_cond = PARTIAL_N0 != 0 && get_global_id(0) == 0; - - T_STORE_INDIRECT_WIDTH_SELECT({{DST_DATA_TYPE}}, M0, N0, PARTIAL_N0, {{DST_TENSOR_TYPE}}, {{dst}}, g_ind_0, {{dst}}_stride_y, x_cond, {{src}}, g_dst_indirect_y); -//------------------ END KERNEL {{meta_kernel_id}} STORE --------------------- -} - -)_"; -} - -void ClTemplateStore::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), - "src"); - vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), - "dst"); -} - -TagLUT ClTemplateStore::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - TagLUT lut{}; - - // Arguments and global shared variables - lut["src"] = vtable.get_variable(_src); - lut["dst"] = vtable.get_variable(_dst); - - // Local build options - lut["meta_kernel_id"] = id(); - lut["DST_TENSOR_TYPE"] = "BUFFER"; - lut["DST_DATA_TYPE"] = _dst->data_type(); - - ARM_COMPUTE_UNUSED(comp_group); - return lut; -} - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h deleted file mode 100644 index b8c82ceadd..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATESTORE -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATESTORE - -#include "arm_compute/core/experimental/Types.h" - -#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClTemplateStore final : public IGpuTemplateComponentWriter -{ -public: - /** Constructor - * - * @param[in] id Component id - * @param[in] tensors Tensor arguments to the components - */ - ClTemplateStore(ComponentId id, const ArgumentPack<ITensorInfo> &tensors); - /** Prevent instances of this class from being copy constructed */ - ClTemplateStore(const ClTemplateStore &store) = delete; - /** Prevent instances of this class from being copied */ - ClTemplateStore &operator=(const ClTemplateStore &store) = delete; - /** Allow instances of this class to be move constructed */ - ClTemplateStore(ClTemplateStore &&store) = default; - /** Allow instances of this class to be moved */ - ClTemplateStore &operator=(ClTemplateStore &&store) = default; - /** Generate kernel component name */ - std::string get_name() const override; - /** Generate kernel component code template - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return std::string Component code - */ - std::string get_component_code(const ComponentGroup &comp_group) const override; - /** Declare all variables used by the component in the @p vtable - * - * @param[out] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - */ - void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the tag look-up table used to instantiate the component code. - * - * @param[in] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - * - * @return TagLUT Tag lookup table - */ - TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - -private: - const ITensorInfo *_src; - const ITensorInfo *_dst; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATESTORE */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp deleted file mode 100644 index d3d7c8db83..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp +++ /dev/null @@ -1,325 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "ClTemplateWriter.h" - -#include "arm_compute/core/CL/CLKernelLibrary.h" - -#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -/// @note: some tags can be unused since they could be used only for the macros, or only for the component code -std::string ClTemplateWriter::replace_tags(const std::string &code_template, const TagLUT &tags) -{ - std::string replaced_code = ""; - bool scanning_pattern = false; - std::string pattern_found = ""; - for (size_t i = 0; i < code_template.size() - 1; ++i) - { - if (!scanning_pattern) - { - if (code_template[i] == '{' && code_template[i + 1] == '{') - { - i += 1; - scanning_pattern = true; - pattern_found = ""; - } - else - { - replaced_code += code_template[i]; - } - } - else - { - if (code_template[i] == '}' && code_template[i + 1] == '}') - { - i += 1; - scanning_pattern = false; - std::string err = "Pattern " + pattern_found + " not found in tags"; - ARM_COMPUTE_ERROR_ON_MSG(tags.find(pattern_found) == tags.end(), err.c_str()); - replaced_code += tags.find(pattern_found)->second.value; - } - else - { - pattern_found += code_template[i]; - } - } - } - - return replaced_code; -} -ClTemplateWriter::~ClTemplateWriter() -{ -} -ClTemplateWriter::ClTemplateWriter(const GpuKernelComponentGroup &components) : _components{components} -{ -} -std::string ClTemplateWriter::get_name() -{ - return write_kernel_name(); -} -std::string ClTemplateWriter::get_code() -{ - return write_code(); -} -std::string ClTemplateWriter::get_config_id() -{ - std::string config_id = get_name(); - for (const auto &comp : _components) - { - config_id += "--" + comp->template_writer()->get_config_id() + "--"; - } - - return config_id; -} - -CLBuildOptions ClTemplateWriter::get_build_options() -{ - CLBuildOptions build_opts{}; - - for (const auto &comp : _components) - { - build_opts.add_options(comp->template_writer()->get_build_options(_components).options()); - } - - return build_opts; -} - -Window ClTemplateWriter::get_window() const -{ - const auto root_comp = _components.get_root_component(); - ARM_COMPUTE_ERROR_ON_MSG(root_comp == nullptr, "No root component found"); - return root_comp->template_writer()->get_window(); -} - -std::map<ITensorInfo::Id, GpuKernelArgument> ClTemplateWriter::get_tensors() -{ - // Assemble GpuKernelArguments - std::map<ITensorInfo::Id, GpuKernelArgument> tensors; - for (const auto t : _components.get_argument_tensors()) - { - tensors.emplace(t->id(), GpuKernelArgument{*t, _vtable.get_variable(t).kernel_argument_info}); - } - return tensors; -} - -std::string ClTemplateWriter::write_code() -{ - ARM_COMPUTE_ERROR_ON_MSG(_components.empty(), "No components found"); - - // These data structures will hold the data from all the components in the blueprint - std::set<std::string> headers_list{}; - std::set<std::string> additional_macros{}; - std::vector<std::string> component_codes{}; // vector because order matters - - // Pass 1: Declare all kernel variables - for (auto &component : _components) - { - component->template_writer()->declare_variables(_vtable, _components); - } - // Pass 2: Generate component codes - for (auto &component : _components) - { - const auto component_writer = component->template_writer(); - auto curr_headers_list = component_writer->get_headers_list(); - auto curr_additional_macros = component_writer->get_additional_macros(); - auto curr_component_code = component_writer->get_component_code(_components); - const auto var_lut = component_writer->get_tag_lut( - _vtable, - _components); // Ideally can be merged with get_component_code once we have finer-grained code generation technique - component_codes.push_back(replace_tags(curr_component_code, var_lut)); - - headers_list.insert(curr_headers_list.begin(), curr_headers_list.end()); - if (!additional_macros.empty()) // Some components might not have any - { - additional_macros.insert(replace_tags(curr_additional_macros, var_lut)); - } - } - - // Step 3: Assemble the data gathered by traversing the graph into the string "code" - std::string code = ""; - - for (auto &header : headers_list) - { -#if defined(EMBEDDED_KERNELS) - code += CLKernelLibrary::get().get_program(header).first; -#else // defined(EMBEDDED_KERNELS) - code += "#include \"" + header + "\"\n"; -#endif // defined(EMBEDDED_KERNELS) - } - - for (auto ¯os : additional_macros) - { - code += macros; - } - - auto arguments = _components.get_argument_tensors(); - std::sort(arguments.begin(), arguments.end(), - [](const ITensorInfo *l, const ITensorInfo *r) { return l->id() < r->id(); }); - code += write_kernel_signature(_vtable.get_variable_list(arguments)); - - code += "\n{\n\n"; - - code += " //------------------ START KERNEL_BUILDER_COORDINATE ---------------------\n\n"; - code += write_global_section(); - code += " //------------------ END KERNEL_BUILDER_COORDINATE ---------------------\n"; - - { - const auto tiles = _components.get_tiles(); - std::stringstream tiles_ss; - - tiles_ss << " //------------------ START TILE DECLARATION ---------------------\n"; - - for (auto tile : tiles) - { - const auto var = _vtable.get_variable(tile); - const auto data_type = get_cl_type_from_data_type(tile->data_type()); - const auto var_name = var.uniq_name; - - tiles_ss << " TILE(" << data_type << ", M0, N0, " << var_name << ");\n"; - } - - tiles_ss << " //------------------ END TILE DECLARATION ---------------------\n"; - - code += tiles_ss.str(); - } - - for (const auto &component_code : component_codes) - { - code += component_code; - code += "\n"; - } - - code += "}\n"; - - return code; -} -std::string ClTemplateWriter::write_global_section() const -{ - const auto dst_info = _components.get_any_dst_tensor(); - const auto dst_w = dst_info->dimension(0); - const auto tile_w = std::max(1, get_window().x().step()); - const auto tile_h = std::max(1, get_window().y().step()); - auto leftover_w = dst_w % tile_w; - - std::string code = ""; - code += std::string(" int g_ind_0 = GET_SPATIAL_IDX(0, ") + std::to_string(tile_w) + ", " + - std::to_string(leftover_w) + ");\n"; - code += std::string(" int g_ind_1 = GET_SPATIAL_IDX(1, ") + std::to_string(tile_h) + ", " + "0);\n"; - code += std::string(" int g_ind_2 = GET_SPATIAL_IDX(2, 1, 0);\n\n"); - - code += " const bool g_cond_x = (g_ind_0 == 0);\n"; - code += " const bool g_cond_y = (g_ind_1 == 0);\n"; - - return code; -} -std::string ClTemplateWriter::write_argument_declaration(const GpuKernelVariableTable::TensorVariable &var) const -{ - std::string code; - switch (var.kernel_argument_info.type) - { - case GpuKernelArgumentInfo::Type::Vector: - { - code += "\n VECTOR_DECLARATION(" + var.uniq_name + ")"; - break; - } - case GpuKernelArgumentInfo::Type::Image: - { - code += "\n IMAGE_DECLARATION(" + var.uniq_name + ")"; - break; - } - case GpuKernelArgumentInfo::Type::Image_3D: - { - code += "\n IMAGE_DECLARATION(" + var.uniq_name + "),"; - code += "\n unsigned int " + var.uniq_name + "_stride_z"; - break; - } - case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D: - { - code += "\n __read_only image2d_t " + var.uniq_name + "_img,"; - code += "\n unsigned int " + var.uniq_name + "_stride_z"; - break; - } - case GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer: - { - code += "\n TENSOR4D_T(" + var.uniq_name + ", BUFFER)"; - break; - } - case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image: - { - code += "\n TENSOR4D_T(" + var.uniq_name + ", IMAGE)"; - break; - } - case GpuKernelArgumentInfo::Type::Tensor_3D: - { - code += "\n TENSOR3D_DECLARATION(" + var.uniq_name + ")"; - break; - } - default: - { - ARM_COMPUTE_ERROR("Unsupported declaration generation for GpuKernelArgumentInfo::Type"); - } - } - return code; -} -std::string ClTemplateWriter::write_kernel_signature(const GpuKernelVariableTable::VariableList &argument_list) const -{ - std::string code = "\n__kernel void " + write_kernel_name() + "("; - - for (int i = 0; i < static_cast<int>(argument_list.size()) - 1; ++i) - { - code += write_argument_declaration(argument_list[i]) + ","; - } - if (static_cast<int>(argument_list.size()) - 1 >= 0) - { - code += write_argument_declaration(argument_list[argument_list.size() - 1]); - } - - code += ')'; - - return code; -} -std::string ClTemplateWriter::write_kernel_name() const -{ - if (_components.empty()) - { - return "empty_kernel"; - } - std::string name = _components.empty() ? "" : _components[0]->template_writer()->get_name(); - for (size_t i = 1; i < _components.size(); ++i) - { - name += "___"; - name += _components[i]->template_writer()->get_name(); - } - - return name; -} -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h deleted file mode 100644 index 83f617b6c6..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEWRITER -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEWRITER - -#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" -#include "src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h" - -#include <map> - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -/** Use a templated-string-based method to write kernel code - * It stitches the component code templates together based on the valid fusion configuration. - * It then instantiates the actual kernel code from the template and the generated tag lookup table. - */ -class ClTemplateWriter : public IGpuKernelWriter -{ -public: - /** Instantiates a kernel code string from the kernel code template - * @note: some tags can be unused since they could be used only for the macros, or only for the component code - * - * @param[in] code_template Kernel code template - * @param[in] tags Tag lookup table - * - * @return std::string Instantiated kernel string - */ - static std::string replace_tags(const std::string &code_template, const TagLUT &tags); - /** Default constructor */ - ClTemplateWriter() = default; - /** Constructor - * - * @param[in] components Kernel component group from which the kernel will be generated - */ - ClTemplateWriter(const GpuKernelComponentGroup &components); - /** Destructor */ - ~ClTemplateWriter() override; - /** Generate kernel name */ - std::string get_name() override; - /** Generate kernel code */ - std::string get_code() override; - /** Generate build options */ - CLBuildOptions get_build_options() override; - /** Generate config id string of the entire kernel. This is used for tuning */ - std::string get_config_id() override; - /** Generate execution window */ - Window get_window() const override; - /** Get the kernel argument lists of the kernel*/ - std::map<ITensorInfo::Id, GpuKernelArgument> get_tensors() override; - -private: - std::string write_kernel_name() const; - std::string write_code(); - std::string write_global_section() const; - std::string write_argument_declaration(const GpuKernelVariableTable::TensorVariable &var) const; - std::string write_kernel_signature(const GpuKernelVariableTable::VariableList &argument_list) const; - -private: - GpuKernelComponentGroup _components{}; - GpuKernelVariableTable _vtable{}; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEWRITER */ |