aboutsummaryrefslogtreecommitdiff
path: root/src/dynamic_fusion/sketch/gpu
diff options
context:
space:
mode:
Diffstat (limited to 'src/dynamic_fusion/sketch/gpu')
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelArgument.cpp37
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h99
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h16
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp22
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.h10
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h71
-rw-r--r--src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h17
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h3
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h10
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h12
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp14
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h22
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp16
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h22
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp14
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h19
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp15
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h22
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp14
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h23
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp93
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h130
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.cpp95
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h127
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp5
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp12
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h16
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp10
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h13
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp19
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.h20
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp17
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h22
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp3
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/GpuMatMul.cpp4
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp11
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp3
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp114
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h135
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h140
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp181
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h120
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp212
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.h103
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp364
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h112
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp393
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h116
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp274
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h115
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp267
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h107
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp171
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.h106
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp470
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h132
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp161
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.h107
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp279
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.h120
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp89
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h86
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp325
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h92
64 files changed, 104 insertions, 5865 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.cpp b/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.cpp
deleted file mode 100644
index 9cecfc2ffd..0000000000
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-bool operator==(const GpuKernelArgumentInfo &info0, const GpuKernelArgumentInfo &info1)
-{
- return info0.type == info1.type;
-}
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h b/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h
index 03817173f4..c923bf9c16 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELARGUMENT
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELARGUMENT
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELARGUMENT_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELARGUMENT_H
#include "arm_compute/core/TensorInfo.h"
@@ -32,96 +32,6 @@ namespace experimental
{
namespace dynamic_fusion
{
-/** Contain information required to set up a kernel argument at run time
- * @deprecated To be removed along with ClTemplateWriter
- */
-struct GpuKernelArgumentInfo
-{
- /** Enumerate all the tensor arguments variants used by all kernel implementations. */
- enum class Type : int
- {
- Scalar,
-
- Vector,
-
- Image,
- Image_Reinterpret_As_3D,
- Image_Export_To_ClImage2D,
-
- Image_3D, // 3D Tensor represented as a 2D Image + stride_z
- Image_3D_Export_To_ClImage2D,
-
- Tensor_3D,
- Tensor_4D,
- Tensor_4D_t_Buffer,
- Tensor_4D_t_Image,
-
- Tensor_Special_0,
- };
- /** Default constructor */
- GpuKernelArgumentInfo() = default;
- /** Constructor */
- GpuKernelArgumentInfo(Type type) : type{type}
- {
- }
- Type type{Type::Tensor_4D_t_Buffer};
-};
-bool operator==(const GpuKernelArgumentInfo &info0, const GpuKernelArgumentInfo &info1);
-/** Kernel argument information linked with its corresponding @ref ITensorInfo
- * @deprecated To be removed along with ClTemplateWriter
- */
-class GpuKernelArgument
-{
-public:
- /** Constructor
- *
- * @param[in] tensor_info Associated @ref ITensorInfo
- * @param[in] kernel_arg_info Associated @ref GpuKernelArgumentInfo
- */
- GpuKernelArgument(const ITensorInfo &tensor_info, const GpuKernelArgumentInfo &kernel_arg_info)
- : _tensor_info{tensor_info}, _kernel_arg_info{kernel_arg_info}
- {
- }
- /** Get workload tensor id */
- ITensorInfo::Id id() const
- {
- return _tensor_info.id();
- }
- /** Get associated @ref ITensorInfo */
- ITensorInfo *tensor_info()
- {
- return &_tensor_info;
- }
- /** Get associated @ref ITensorInfo */
- const ITensorInfo *tensor_info() const
- {
- return &_tensor_info;
- }
- /** Get associated @ref GpuKernelArgumentInfo */
- GpuKernelArgumentInfo *kernel_argument_info()
- {
- return &_kernel_arg_info;
- }
- /** Get associated @ref GpuKernelArgumentInfo */
- const GpuKernelArgumentInfo *kernel_argument_info() const
- {
- return &_kernel_arg_info;
- }
- /** Check if the associated workload tensor has valid id
- *
- * @return true if has valid id
- * @return false otherwise
- */
- bool has_valid_id() const
- {
- return _tensor_info.has_valid_id();
- }
-
-private:
- TensorInfo _tensor_info{};
- GpuKernelArgumentInfo _kernel_arg_info{};
-};
-#ifdef ACL_INTERNAL_TEST_CKW_IN_DF
/** Describe how the tensor runtime memory can be accessed
*
* Please see documentation under @ref GpuKernelArgumentBinding
@@ -243,9 +153,8 @@ private:
};
Value _value;
};
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELARGUMENT */
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELARGUMENT_H
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h b/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h
index 24812cd8a7..11d916eec9 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,19 +21,15 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE
-#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE_H
#include "arm_compute/core/CL/CLCompileContext.h"
#include "arm_compute/core/Window.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-#include <map>
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
#include <deque>
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
#include <string>
namespace arm_compute
@@ -43,11 +39,7 @@ namespace experimental
namespace dynamic_fusion
{
/** The argument list of a @ref GpuKernelSourceCode */
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-using GpuKernelArgumentList = std::map<ITensorInfo::Id, GpuKernelArgument>;
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
using GpuKernelArgumentList = std::deque<GpuKernelArgumentBinding>;
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
/** Container of kernel code to be compiled and run in a @ref GpuUnitWorkload
*/
@@ -132,4 +124,4 @@ private:
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-#endif /* ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE */
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE_H
diff --git a/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp
index 502ceab807..725a46e91c 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp
+++ b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,14 +26,10 @@
#include "arm_compute/core/experimental/Types.h"
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h"
#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h"
#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
#include "src/dynamic_fusion/sketch/gpu/GpuComponentServices.h"
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h"
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h"
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
namespace arm_compute
{
@@ -41,8 +37,8 @@ namespace experimental
{
namespace dynamic_fusion
{
-GpuLogicalKernel::GpuLogicalKernel(GpuComponentServices *services, const GpuKernelComponentGroup &components)
- : _comp_group{components}, _store_components{}
+GpuLogicalKernel::GpuLogicalKernel(GpuComponentServices *services, GpuKernelComponentGroup components) // NOLINT
+ : _comp_group{std::move(components)}, _store_components{}
{
ARM_COMPUTE_UNUSED(services);
}
@@ -50,19 +46,11 @@ GpuLogicalKernel::GpuLogicalKernel(GpuComponentServices *services, const GpuKern
GpuKernelSourceCode GpuLogicalKernel::write_kernel_code()
{
GpuKernelSourceCode code;
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- ClTemplateWriter writer{_comp_group};
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
- GpuCkwDriver writer{_comp_group};
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
+ GpuCkwDriver writer{_comp_group};
code.name(writer.get_name());
code.code(writer.get_code());
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- code.arguments(writer.get_tensors());
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
code.arguments(writer.get_kernel_arguments());
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
code.build_options(writer.get_build_options());
code.config_id(writer.get_config_id());
code.window(writer.get_window());
diff --git a/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.h b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.h
index 1fd40f0acd..e2bc83b286 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPULOGICALKERNEL
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPULOGICALKERNEL
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPULOGICALKERNEL_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPULOGICALKERNEL_H
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h"
@@ -52,7 +52,7 @@ public:
* @param[in] services @ref GpuComponentServices to be used
* @param[in] components Component group from which this logical kernel is initialized
*/
- explicit GpuLogicalKernel(GpuComponentServices *services, const GpuKernelComponentGroup &components);
+ explicit GpuLogicalKernel(GpuComponentServices *services, GpuKernelComponentGroup components); // NOLINT
/** Allow instances of this class to be copy constructed */
GpuLogicalKernel(const GpuLogicalKernel &) = default;
/** Allow instances of this class to be copied */
@@ -71,4 +71,4 @@ private:
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPULOGICALKERNEL */
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPULOGICALKERNEL_H
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h
index 43bcc47fa0..5d75bcaaa0 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE_H
#include "arm_compute/core/experimental/Types.h"
#include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h"
@@ -36,7 +36,6 @@ namespace experimental
{
namespace dynamic_fusion
{
-#ifdef ACL_INTERNAL_TEST_CKW_IN_DF
namespace
{
/** Extract kernel arguments of one tensor from a flat list of kernel arguments.
@@ -70,7 +69,6 @@ GpuKernelArgumentList extract_kernel_args_for_one_tensor(GpuKernelArgumentList &
return tensor_kargs;
}
} // namespace
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
/** Uniquely identifies a @ref GpuUnitWorkload within a @ref GpuWorkloadSourceCode */
using UnitWorkloadId = int32_t;
@@ -83,25 +81,11 @@ class GpuWorkloadArgument
public:
/** Default constructor */
GpuWorkloadArgument() = default;
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
/** Constructor
*
- * @param[in] tensor_info @ref ITensorInfo of the workload argument
- * @param[in] mem_desc @ref MemoryDescriptor of the workload argument
- * @param[in] kernel_arg_info @ref GpuKernelArgumentInfo of the workload argument
- */
- GpuWorkloadArgument(const ITensorInfo &tensor_info,
- const MemoryDescriptor &mem_desc,
- const GpuKernelArgumentInfo &kernel_arg_info)
- : _tensor_info{tensor_info}, _mem_desc{mem_desc}, _kernel_arg_info{kernel_arg_info}
- {
- }
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
- /** Constructor
- *
- * @param[in] tensor_info @ref ITensorInfo of the workload argument
- * @param[in] mem_desc @ref MemoryDescriptor of the workload argument
- * @param[in] kernel_arg_list @ref GpuKernelArgumentList of the workload argument
+ * @param[in] tensor_info @ref ITensorInfo of the workload argument
+ * @param[in] mem_desc @ref MemoryDescriptor of the workload argument
+ * @param[in] kernel_args @ref GpuKernelArgumentList of the workload argument
*/
GpuWorkloadArgument(const ITensorInfo &tensor_info,
const MemoryDescriptor &mem_desc,
@@ -109,7 +93,6 @@ public:
: _tensor_info{tensor_info}, _mem_desc{mem_desc}, _kernel_args{kernel_args}
{
}
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
/** Get tensor id within workload */
ITensorInfo::Id id() const
{
@@ -135,18 +118,6 @@ public:
{
return &_mem_desc;
}
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- /** Get @ref GpuKernelArgumentInfo of the argument */
- GpuKernelArgumentInfo *kernel_argument_info()
- {
- return &_kernel_arg_info;
- }
- /** Get @ref GpuKernelArgumentInfo of the argument */
- const GpuKernelArgumentInfo *kernel_argument_info() const
- {
- return &_kernel_arg_info;
- }
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
/** Get @ref GpuKernelArgumentList of the workload tensor */
GpuKernelArgumentList *kernel_argument_list()
{
@@ -157,7 +128,6 @@ public:
{
return &_kernel_args;
}
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
/** Check if the workload argument has valid id
*
* @return true If has valid id
@@ -169,13 +139,9 @@ public:
}
private:
- TensorInfo _tensor_info{};
- MemoryDescriptor _mem_desc{};
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- GpuKernelArgumentInfo _kernel_arg_info{};
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
+ TensorInfo _tensor_info{};
+ MemoryDescriptor _mem_desc{};
GpuKernelArgumentList _kernel_args{};
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
};
/** Describes when a unit workload is run.
@@ -259,22 +225,7 @@ public:
const auto uwk_id = static_cast<UnitWorkloadId>(_unit_workloads.size());
const auto unit_work = GpuUnitWorkload(uwk_id, kernel_code, stage);
_unit_workloads.push_back(unit_work);
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- ARM_COMPUTE_UNUSED(context);
- // Assemble kernel argument with memory descriptor to form workload argument
- for (const auto &id_arg : kernel_code.arguments())
- {
- const auto arg_id = id_arg.first;
- const auto arg = id_arg.second;
- _workload_arguments[arg_id] =
- GpuWorkloadArgument{*arg.tensor_info(), mem_map.at(arg_id), *arg.kernel_argument_info()};
- if (_tensor_uwork_map.find(arg_id) == _tensor_uwork_map.end())
- {
- _tensor_uwork_map[arg_id] = std::set<UnitWorkloadId>();
- }
- _tensor_uwork_map[arg_id].insert(uwk_id);
- }
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
+
GpuKernelArgumentList flat_kernel_args = kernel_code.arguments();
GpuKernelArgumentList tensor_kargs{};
while (true)
@@ -296,7 +247,7 @@ public:
_tensor_uwork_map[tensor_id].insert(uwk_id);
}
}
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
+
return uwk_id;
}
/** Get a unit workload from its id */
@@ -346,4 +297,4 @@ private:
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE */
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE_H
diff --git a/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h b/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h
index ad474674f9..84972501de 100644
--- a/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h
+++ b/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_IGPUKERNELWRITER
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_IGPUKERNELWRITER
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_IGPUKERNELWRITER_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_IGPUKERNELWRITER_H
#include "arm_compute/core/CL/CLCompileContext.h"
#include "arm_compute/core/Window.h"
@@ -62,23 +62,14 @@ public:
virtual std::string get_config_id() = 0;
/** Generate execution window */
virtual Window get_window() const = 0;
- /** Get the kernel argument lists of the kernel
- * @deprecated To be removed along with ClTemplateWriter
- */
- virtual std::map<ITensorInfo::Id, GpuKernelArgument> get_tensors()
- {
- return {};
- }
-#ifdef ACL_INTERNAL_TEST_CKW_IN_DF
/** Get the flat list of arguments of the kernel*/
virtual GpuKernelArgumentList get_kernel_arguments()
{
return {};
}
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
};
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_IGPUKERNELWRITER */
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_IGPUKERNELWRITER_H
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h
index b80ce0d816..f8770920b7 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h
@@ -24,15 +24,12 @@
#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWDRIVER_H
#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWDRIVER_H
-#include "ckw/Kernel.h"
-
#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
#include "src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h"
#include "compute_kernel_writer/include/ckw/Kernel.h"
#include "compute_kernel_writer/include/ckw/KernelArgument.h"
-#include <map>
#include <string>
namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h
index f1f0e6747b..c9ce7eb269 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2023 Arm Limited.
+ * Copyright (c) 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWSTORE
-#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWSTORE
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWSTORE_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWSTORE_H
#include "src/core/common/Macros.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h"
@@ -33,8 +33,6 @@ namespace experimental
{
namespace dynamic_fusion
{
-/** An interface used by @ref ClTemplateWriter to write source code for a kernel component
- */
class GpuCkwStore : public IGpuCkwComponentDriver
{
public:
@@ -61,4 +59,4 @@ private:
} // namespace experimental
} // namespace arm_compute
-#endif /* ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWSTORE */
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWSTORE_H
diff --git a/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h b/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h
index 4b8eea2f57..6678c929e9 100644
--- a/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h
+++ b/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_IGPUKERNELCOMPONENT
-#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_IGPUKERNELCOMPONENT
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_IGPUKERNELCOMPONENT_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_IGPUKERNELCOMPONENT_H
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h"
@@ -100,10 +100,6 @@ public:
return _properties;
}
/** Get writer for the component */
- virtual const IGpuTemplateComponentWriter *template_writer() const
- {
- return nullptr;
- }
virtual const IGpuCkwComponentDriver *ckw_component_driver() const
{
return nullptr;
@@ -119,4 +115,4 @@ private:
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-#endif /* ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_IGPUKERNELCOMPONENT */
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_IGPUKERNELCOMPONENT_H
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp
index fdf528a65d..e316bdf46d 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,11 +24,7 @@
#include "ClComponentActivation.h"
#include "src/core/CL/CLValidate.h"
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h"
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.h"
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
namespace arm_compute
{
@@ -69,11 +65,7 @@ ClComponentActivation::ClComponentActivation(ComponentId
const ArgumentPack<ITensorInfo> &tensors,
const Attributes &attributes)
: IGpuKernelComponent{id, properties, tensors},
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- _component_writer{std::make_unique<ClTemplateActivation>(id, tensors, attributes)}
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
_component_writer{std::make_unique<GpuCkwActivation>(id, tensors, attributes)}
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
}
@@ -81,11 +73,7 @@ ClComponentActivation::~ClComponentActivation()
{
}
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-const IGpuTemplateComponentWriter *ClComponentActivation::template_writer() const
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuCkwComponentDriver *ClComponentActivation::ckw_component_driver() const
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
return _component_writer.get();
}
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h
index 02c854356a..b8185158f3 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTACTIVATION
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTACTIVATION
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTACTIVATION_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTACTIVATION_H
#include "arm_compute/function_info/ActivationLayerInfo.h"
@@ -41,11 +41,7 @@ template <typename T>
class ArgumentPack;
/** Forward declaration */
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-class ClTemplateActivation;
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
class GpuCkwActivation;
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
class ClComponentActivation final : public IGpuKernelComponent
{
@@ -106,11 +102,7 @@ public:
ClComponentActivation &operator=(ClComponentActivation &&component) = default;
/** Get writer for the component */
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- const IGpuTemplateComponentWriter *template_writer() const override;
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
- const IGpuCkwComponentDriver *ckw_component_driver() const override;
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
+ const IGpuCkwComponentDriver *ckw_component_driver() const override;
/** Get component type */
GpuComponentType type() const override
@@ -119,13 +111,9 @@ public:
}
private:
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- std::unique_ptr<ClTemplateActivation> _component_writer;
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
std::unique_ptr<GpuCkwActivation> _component_writer;
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
};
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTACTIVATION */
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTACTIVATION_H
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp
index b1636795a3..e1850d78c4 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,11 +27,7 @@
#include "src/core/CL/CLValidate.h"
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.h"
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h"
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
namespace arm_compute
{
@@ -72,22 +68,16 @@ ClComponentCast::ClComponentCast(ComponentId id,
const Attributes &attributes,
const Settings &settings)
: IGpuKernelComponent{id, properties, tensors},
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- _component_writer{std::make_unique<ClTemplateCast>(id, tensors, attributes)}
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
_component_writer{std::make_unique<GpuCkwCast>(id, tensors, attributes)}
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
ARM_COMPUTE_UNUSED(attributes, settings);
}
+
ClComponentCast::~ClComponentCast()
{
}
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-const IGpuTemplateComponentWriter *ClComponentCast::template_writer() const
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
+
const IGpuCkwComponentDriver *ClComponentCast::ckw_component_driver() const
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
return _component_writer.get();
}
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h
index ed77b1203b..201dacc288 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTCAST
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTCAST
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTCAST_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTCAST_H
#include "arm_compute/dynamic_fusion/sketch/attributes/CastAttributes.h"
@@ -49,11 +49,7 @@ private:
};
/** Forward declaration */
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-class ClTemplateCast;
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
class GpuCkwCast;
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
class ClComponentCast final : public IGpuKernelComponent
{
@@ -120,11 +116,7 @@ public:
/** Allow instances of this class to be moved */
ClComponentCast &operator=(ClComponentCast &&component) = default;
/** Get writer for the component */
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- const IGpuTemplateComponentWriter *template_writer() const override;
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuCkwComponentDriver *ckw_component_driver() const override;
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
/** Get component type */
GpuComponentType type() const override
{
@@ -132,14 +124,10 @@ public:
}
private:
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- std::unique_ptr<ClTemplateCast> _component_writer;
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
- std::unique_ptr<GpuCkwCast> _component_writer;
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
+ std::unique_ptr<GpuCkwCast> _component_writer;
};
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTCAST */
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTCAST_H
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp
index ca8037c393..7cd23d6115 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -28,11 +28,7 @@
#include "arm_compute/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.h"
#include "src/core/CL/CLValidate.h"
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h"
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDepthwiseConv2d.h"
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
namespace arm_compute
{
@@ -212,22 +208,14 @@ ClComponentDepthwiseConv2d::ClComponentDepthwiseConv2d(ComponentId
const Attributes &attributes,
const Settings &settings)
: IGpuKernelComponent{id, properties, tensors},
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- _component_writer{std::make_unique<ClTemplateDepthwiseConv2d>(id, tensors, attributes, settings)}
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
_component_writer{std::make_unique<GpuCkwDepthwiseConv2d>(id, tensors, attributes, settings)}
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
ARM_COMPUTE_UNUSED(attributes, settings);
}
ClComponentDepthwiseConv2d::~ClComponentDepthwiseConv2d()
{
}
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-const IGpuTemplateComponentWriter *ClComponentDepthwiseConv2d::template_writer() const
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuCkwComponentDriver *ClComponentDepthwiseConv2d::ckw_component_driver() const
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
return _component_writer.get();
}
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h
index 01168e9ded..7526361f1c 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -44,11 +44,7 @@ class ArgumentPack;
class DepthwiseConv2dAttributes;
/** Forward declaration */
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-class ClTemplateDepthwiseConv2d;
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
class GpuCkwDepthwiseConv2d;
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
/** Component specific settings
*/
@@ -161,13 +157,8 @@ public:
ClComponentDepthwiseConv2d(ClComponentDepthwiseConv2d &&component) = default;
/** Allow instances of this class to be moved */
ClComponentDepthwiseConv2d &operator=(ClComponentDepthwiseConv2d &&component) = default;
- /** Get template writer for the component */
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- const IGpuTemplateComponentWriter *template_writer() const override;
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
- const IGpuCkwComponentDriver *ckw_component_driver() const override;
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
-
+ /** Get writer for the component */
+ const IGpuCkwComponentDriver *ckw_component_driver() const override;
/** Get component type */
GpuComponentType type() const override
{
@@ -175,11 +166,7 @@ public:
}
private:
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- std::unique_ptr<ClTemplateDepthwiseConv2d> _component_writer;
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
std::unique_ptr<GpuCkwDepthwiseConv2d> _component_writer;
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
};
} // namespace dynamic_fusion
} // namespace experimental
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp
index 98f3d6a882..783a17df30 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -28,12 +28,7 @@
#include "arm_compute/dynamic_fusion/sketch/attributes/Conv2dAttributes.h"
#include "src/core/CL/CLValidate.h"
-
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h"
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDirectConv2d.h"
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
namespace arm_compute
{
@@ -153,11 +148,7 @@ ClComponentDirectConv2d::ClComponentDirectConv2d(ComponentId
const Attributes &attributes,
const Settings &settings)
: IGpuKernelComponent{id, properties, tensors},
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- _component_writer{std::make_unique<ClTemplateDirectConv2d>(id, tensors, attributes, settings)}
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
_component_writer{std::make_unique<GpuCkwDirectConv2d>(id, tensors, attributes, settings)}
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
{
}
@@ -165,11 +156,7 @@ ClComponentDirectConv2d::~ClComponentDirectConv2d()
{
}
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-const IGpuTemplateComponentWriter *ClComponentDirectConv2d::template_writer() const
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuCkwComponentDriver *ClComponentDirectConv2d::ckw_component_driver() const
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
{
return _component_writer.get();
}
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h
index d6d9705d3c..c50b0fa0ce 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTDIRECTCONV2D
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTDIRECTCONV2D
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTDIRECTCONV2D_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTDIRECTCONV2D_H
#include "arm_compute/core/Error.h"
#include "arm_compute/core/KernelDescriptors.h"
@@ -68,11 +68,7 @@ private:
};
/** Forward declaration */
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-class ClTemplateDirectConv2d;
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
class GpuCkwDirectConv2d;
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
class ClComponentDirectConv2d final : public IGpuKernelComponent
{
@@ -139,11 +135,7 @@ public:
/** Allow instances of this class to be moved */
ClComponentDirectConv2d &operator=(ClComponentDirectConv2d &&component) = default;
/** Get writer for the component */
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- const IGpuTemplateComponentWriter *template_writer() const override;
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
- const IGpuCkwComponentDriver *ckw_component_driver() const override;
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
+ const IGpuCkwComponentDriver *ckw_component_driver() const override;
/** Get component type */
GpuComponentType type() const override
{
@@ -151,13 +143,9 @@ public:
}
private:
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- std::unique_ptr<ClTemplateDirectConv2d> _component_writer;
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
std::unique_ptr<GpuCkwDirectConv2d> _component_writer;
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
};
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTDIRECTCONV2D */
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTDIRECTCONV2D_H
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp
index 5b136427e4..209c73dbee 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,11 +26,7 @@
#include "arm_compute/core/Validate.h"
#include "src/core/CL/CLValidate.h"
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h"
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.h"
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
namespace arm_compute
{
@@ -117,19 +113,11 @@ ClComponentElementwiseBinary::ClComponentElementwiseBinary(ComponentId
const ArgumentPack<ITensorInfo> &tensors,
const Attributes &attributes)
: IGpuKernelComponent{id, properties, tensors},
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- _component_writer{std::make_unique<ClTemplateElementwiseBinary>(id, tensors, attributes)}
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
_component_writer{std::make_unique<GpuCkwElementwiseBinary>(id, tensors, attributes)}
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
}
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-const IGpuTemplateComponentWriter *ClComponentElementwiseBinary::template_writer() const
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuCkwComponentDriver *ClComponentElementwiseBinary::ckw_component_driver() const
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
return _component_writer.get();
}
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h
index 7589b9732c..a4395a6219 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY
-#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY_H
#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
#include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h"
@@ -40,11 +40,7 @@ template <typename T>
class ArgumentPack;
/** Forward declaration */
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-class ClTemplateElementwiseBinary;
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
class GpuCkwElementwiseBinary;
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
class ClComponentElementwiseBinary final : public IGpuKernelComponent
{
@@ -105,12 +101,7 @@ public:
/** Allow instances of this class to be moved */
ClComponentElementwiseBinary &operator=(ClComponentElementwiseBinary &&component) = default;
/** Get writer for the component */
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- const IGpuTemplateComponentWriter *template_writer() const override;
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
- const IGpuCkwComponentDriver *ckw_component_driver() const override;
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
-
+ const IGpuCkwComponentDriver *ckw_component_driver() const override;
/** Get component type */
GpuComponentType type() const override
{
@@ -118,13 +109,9 @@ public:
}
private:
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- std::unique_ptr<ClTemplateElementwiseBinary> _component_writer;
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
std::unique_ptr<GpuCkwElementwiseBinary> _component_writer;
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
};
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-#endif /* ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY */
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY_H
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp
deleted file mode 100644
index 27c13bd654..0000000000
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright (c) 2022-2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/dynamic_fusion/sketch/attributes/SoftmaxAttributes.h"
-
-#include "src/core/CL/CLValidate.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-Status ClComponentLogits1DMaxShiftExpSum::validate(const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes)
-{
- ARM_COMPUTE_UNUSED(properties, attributes);
-
- const ITensorInfo *src = tensors.get_const_tensor(TensorType::ACL_SRC_0);
- const ITensorInfo *sum = tensors.get_const_tensor(TensorType::ACL_DST_0);
- const ITensorInfo *dst = tensors.get_const_tensor(TensorType::ACL_DST_1);
-
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(sum);
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(dst);
-
- // 1. Check validity
- // All tensor infos are initialized
- ARM_COMPUTE_RETURN_ERROR_ON(src->tensor_shape().total_size() == 0);
- ARM_COMPUTE_RETURN_ERROR_ON(sum->tensor_shape().total_size() == 0);
- ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0);
-
- // Check for mismatches in shapes and data types
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst, sum);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(src, dst);
-
- // Device requirements are met
- ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src);
-
- // 2. Check support level
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32);
-
- return Status{};
-}
-
-ClComponentLogits1DMaxShiftExpSum::ClComponentLogits1DMaxShiftExpSum(ComponentId id,
- const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes)
- : IGpuKernelComponent{id, properties, tensors},
- _component_writer{std::make_unique<ClTemplateLogits1DMaxShiftExpSum>(id, tensors, attributes)}
-{
-}
-
-ClComponentLogits1DMaxShiftExpSum::~ClComponentLogits1DMaxShiftExpSum()
-{
-}
-
-const IGpuTemplateComponentWriter *ClComponentLogits1DMaxShiftExpSum::template_writer() const
-{
- return _component_writer.get();
-}
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h
deleted file mode 100644
index 91ab5de3b5..0000000000
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (c) 2022-2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTLOGITS1DMAXSHIFTEXPSUM
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTLOGITS1DMAXSHIFTEXPSUM
-
-#include "arm_compute/dynamic_fusion/sketch/attributes/SoftmaxAttributes.h"
-
-#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
-
-namespace arm_compute
-{
-/** Forward declaration */
-class ITensorInfo;
-namespace experimental
-{
-namespace dynamic_fusion
-{
-/** Forward declaration */
-template <typename T>
-class ArgumentPack;
-
-/** Forward declaration */
-class ClTemplateLogits1DMaxShiftExpSum;
-
-/** Component to calculate max-shifted exponentials and their sum
- *
- * 1D example:
- * input: [x1, x2, ... , xn], shape: (1 x d)
- *
- * Let max(x1...xn) = m
- *
- * (output) sum: [exp(x1-m) + ... + exp(xn-m)], shape: (1 x 1)
- * (output) dst: [exp(x1-m) ... exp(xn-m)], shape: (1 x d)
- *
- * This component is used by the softmax operator. The subsequent
- * operation normalizes dst with sum, therefore the max-shifting
- * since exp(m) will be cancelled in numerator and denominator.
-*/
-class ClComponentLogits1DMaxShiftExpSum final : public IGpuKernelComponent
-{
-public:
- /** Attributes are a set of backend-agnostic parameters that define what a component does */
- using Attributes = SoftmaxAttributes;
-
- /** Validate the component
- *
- * @param[in] properties Component properties @ref Properties
- * @param[in] tensors Tensor arguments to the component
- * @param[in] attributes Component attributes @ref Attributes
- *
- * @return Status Validation results
- *
- * Tensor argument names:
- * - ACL_SRC_0: Input
- * - ACL_DST_0: Output
- * - ACL_DST_1: Output
- *
- * Tensor argument constness:
- * - ACL_SRC_0: Const
- * - ACL_DST_0: Const
- * - ACL_DST_1: Const
- *
- * Valid data layouts:
- * - All
- *
- ** Valid data type configurations:
- * |ACL_SRC_0 |ACL_DST_0 |ACL_DST_1 |
- * |:----------|:----------|:----------|
- * |F16 | F16 | F16 |
- * |F32 | F32 | F32 |
- */
- static Status
- validate(const Properties &properties, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
-
- /** Constructor
- *
- * Similar to @ref ClComponentLogits1DMaxShiftExpSum::validate()
- */
- ClComponentLogits1DMaxShiftExpSum(ComponentId id,
- const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes);
-
- /** Destructor */
- ~ClComponentLogits1DMaxShiftExpSum() override;
- /** Prevent instances of this class from being copy constructed */
- ClComponentLogits1DMaxShiftExpSum(const ClComponentLogits1DMaxShiftExpSum &component) = delete;
- /** Prevent instances of this class from being copied */
- ClComponentLogits1DMaxShiftExpSum &operator=(const ClComponentLogits1DMaxShiftExpSum &component) = delete;
- /** Allow instances of this class to be move constructed */
- ClComponentLogits1DMaxShiftExpSum(ClComponentLogits1DMaxShiftExpSum &&component) = default;
- /** Allow instances of this class to be moved */
- ClComponentLogits1DMaxShiftExpSum &operator=(ClComponentLogits1DMaxShiftExpSum &&component) = default;
- /** Get template writer for the component */
- const IGpuTemplateComponentWriter *template_writer() const override;
- /** Get component type */
- GpuComponentType type() const override
- {
- return GpuComponentType::Unfusable;
- }
-
-private:
- std::unique_ptr<ClTemplateLogits1DMaxShiftExpSum> _component_writer;
-};
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTLOGITS1DMAXSHIFTEXPSUM */
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.cpp
deleted file mode 100644
index fb2544385c..0000000000
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.cpp
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/dynamic_fusion/sketch/attributes/SoftmaxAttributes.h"
-
-#include "src/core/CL/CLValidate.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-Status ClComponentLogits1DNorm::validate(const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes)
-{
- ARM_COMPUTE_UNUSED(properties, attributes);
-
- const ITensorInfo *src = tensors.get_const_tensor(TensorType::ACL_SRC_0);
- const ITensorInfo *sum = tensors.get_const_tensor(TensorType::ACL_SRC_1);
- const ITensorInfo *dst = tensors.get_const_tensor(TensorType::ACL_DST_0);
-
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(sum);
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(dst);
-
- // 1. Check validity
- // All tensor infos are initialized
- ARM_COMPUTE_RETURN_ERROR_ON(src->tensor_shape().total_size() == 0);
- ARM_COMPUTE_RETURN_ERROR_ON(sum->tensor_shape().total_size() == 0);
- ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0);
-
- // Check for mismatches in shapes and data types
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst, sum);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(src, dst);
-
- ARM_COMPUTE_RETURN_ERROR_ON(attributes.is_log_softmax() && !is_data_type_float(src->data_type()));
-
- // Device requirements are met
- ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src);
-
- // 2. Check support level
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32);
-
- return Status{};
-}
-
-ClComponentLogits1DNorm::ClComponentLogits1DNorm(ComponentId id,
- const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes)
- : IGpuKernelComponent{id, properties, tensors},
- _component_writer{std::make_unique<ClTemplateLogits1DNorm>(id, tensors, attributes)}
-{
-}
-
-ClComponentLogits1DNorm::~ClComponentLogits1DNorm()
-{
-}
-
-const IGpuTemplateComponentWriter *ClComponentLogits1DNorm::template_writer() const
-{
- return _component_writer.get();
-}
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h
deleted file mode 100644
index 74c0273604..0000000000
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTLOGITS1DNORM
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTLOGITS1DNORM
-
-#include "arm_compute/dynamic_fusion/sketch/attributes/SoftmaxAttributes.h"
-
-#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
-
-namespace arm_compute
-{
-/** Forward declaration */
-class ITensorInfo;
-namespace experimental
-{
-namespace dynamic_fusion
-{
-/** Forward declaration */
-template <typename T>
-class ArgumentPack;
-
-/** Forward declaration */
-class ClTemplateLogits1DNorm;
-
-/** Component to calculate the final step of the Softmax Layer
- * where each logit value is multiplied by the inverse of the sum of the logits.
- *
- * 1D example:
- *
- * (input) src: [x1 x2 ... xn], shape: (1 x d)
- * (input) sum: [x1 + x2 + ... + xn], shape: (1 x 1)
- * (output) dst: [x1/sum x2/sum ... xn/sum], shape: (1 x d)
- *
- * This component is used by the softmax operator to get the final result.
-*/
-class ClComponentLogits1DNorm final : public IGpuKernelComponent
-{
-public:
- /** Attributes are a set of backend-agnostic parameters that define what a component does */
- using Attributes = SoftmaxAttributes;
-
- /** Validate the component
- *
- * @param[in] properties Component properties @ref Properties
- * @param[in] tensors Tensor arguments to the component
- * @param[in] attributes Component attributes @ref Attributes
- *
- * @return Status Validation results
- *
- * Tensor argument names:
- * - ACL_SRC_0: Input
- * - ACL_SRC_1: Input
- * - ACL_DST_0: Output
- *
- * Tensor argument constness:
- * - ACL_SRC_0: Const
- * - ACL_SRC_1: Const
- * - ACL_DST_0: Const
- *
- * Valid data layouts:
- * - All
- *
- ** Valid data type configurations:
- * |ACL_SRC_0 |ACL_SRC_1 |ACL_DST_0 |
- * |:----------|:----------|:----------|
- * |F16 | F16 | F16 |
- * |F32 | F32 | F32 |
- */
- static Status
- validate(const Properties &properties, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
-
- /** Constructor
- *
- * Similar to @ref ClComponentLogits1DNorm::validate()
- */
- ClComponentLogits1DNorm(ComponentId id,
- const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes);
-
- /** Destructor */
- ~ClComponentLogits1DNorm() override;
- /** Prevent instances of this class from being copy constructed */
- ClComponentLogits1DNorm(const ClComponentLogits1DNorm &component) = delete;
- /** Prevent instances of this class from being copied */
- ClComponentLogits1DNorm &operator=(const ClComponentLogits1DNorm &component) = delete;
- /** Allow instances of this class to be move constructed */
- ClComponentLogits1DNorm(ClComponentLogits1DNorm &&component) = default;
- /** Allow instances of this class to be moved */
- ClComponentLogits1DNorm &operator=(ClComponentLogits1DNorm &&component) = default;
- /** Get template writer for the component */
- const IGpuTemplateComponentWriter *template_writer() const override;
- /** Get component type */
- GpuComponentType type() const override
- {
- return GpuComponentType::Unfusable;
- }
-
-private:
- std::unique_ptr<ClTemplateLogits1DNorm> _component_writer;
-};
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTLOGITS1DNORM */
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp
index f238d42d98..53ac8da41f 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2023 Arm Limited.
+ * Copyright (c) 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,7 +21,6 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifdef ACL_INTERNAL_TEST_CKW_IN_DF
#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.h"
@@ -147,5 +146,3 @@ const IGpuCkwComponentDriver *ClComponentMatMul::ckw_component_driver() const
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp
index 5544963b3f..6e7243dc04 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp
@@ -30,7 +30,6 @@
#include "src/core/CL/CLValidate.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h"
#include "src/dynamic_fusion/utils/Utils.h"
#include <memory>
@@ -93,27 +92,16 @@ ClComponentPool2d::ClComponentPool2d(ComponentId id,
const Attributes &attributes,
const Settings &settings)
: IGpuKernelComponent{id, properties, tensors},
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- _component_writer{std::make_unique<ClTemplatePool2d>(id, tensors, attributes, settings)}
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
_component_writer{std::make_unique<GpuCkwPool2d>(id, tensors, attributes, settings)}
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
}
ClComponentPool2d::~ClComponentPool2d()
{
}
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-const IGpuTemplateComponentWriter *ClComponentPool2d::template_writer() const
-{
- return _component_writer.get();
-}
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuCkwComponentDriver *ClComponentPool2d::ckw_component_driver() const
{
return _component_writer.get();
}
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h
index 98fed65004..d33e601f18 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2023 Arm Limited.
+ * Copyright (c) 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -42,11 +42,7 @@ class ArgumentPack;
class Pool2dAttributes;
/** Forward declaration */
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-class ClTemplatePool2d;
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
class GpuCkwPool2d;
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
class ClComponentPool2d final : public IGpuKernelComponent
{
@@ -116,13 +112,9 @@ public:
/** Allow instances of this class to be moved */
ClComponentPool2d &operator=(ClComponentPool2d &&component) = default;
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- /** Get template writer for the component */
- const IGpuTemplateComponentWriter *template_writer() const override;
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
+
/** Get GPU kernel writer for the component */
const IGpuCkwComponentDriver *ckw_component_driver() const override;
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
/** Get component type */
GpuComponentType type() const override
@@ -131,11 +123,7 @@ public:
}
private:
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- std::unique_ptr<ClTemplatePool2d> _component_writer;
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
std::unique_ptr<GpuCkwPool2d> _component_writer;
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
};
} // namespace dynamic_fusion
} // namespace experimental
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp
index 0ece9de970..dce85c424e 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2023 Arm Limited.
+ * Copyright (c) 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,7 +27,6 @@
#include "arm_compute/core/Validate.h"
#include "src/core/CL/CLValidate.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.h"
namespace arm_compute
{
@@ -54,15 +53,16 @@ Status ClComponentReshape::validate(const ArgumentPack<ITensorInfo> &tensors)
ClComponentReshape::ClComponentReshape(ComponentId id,
const Properties &properties,
const ArgumentPack<ITensorInfo> &tensors)
- : IGpuKernelComponent{id, properties, tensors}, _component_writer{std::make_unique<ClTemplateReshape>(id, tensors)}
+ : IGpuKernelComponent{id, properties, tensors}
{
}
ClComponentReshape::~ClComponentReshape()
{
}
-const IGpuTemplateComponentWriter *ClComponentReshape::template_writer() const
+const IGpuCkwComponentDriver *ClComponentReshape::ckw_component_driver() const
{
- return _component_writer.get();
+ /* NOT IMPLEMENTED */
+ return nullptr;
}
} // namespace dynamic_fusion
} // namespace experimental
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h
index 78163d6603..fd0f966da1 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2023 Arm Limited.
+ * Copyright (c) 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESHAPE
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESHAPE
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESHAPE_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESHAPE_H
#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
@@ -85,8 +85,8 @@ public:
ClComponentReshape(ClComponentReshape &&component) = default;
/** Allow instances of this class to be moved */
ClComponentReshape &operator=(ClComponentReshape &&component) = default;
- /** Get template writer for the component */
- const IGpuTemplateComponentWriter *template_writer() const override;
+ /** Get writer for the component */
+ const IGpuCkwComponentDriver *ckw_component_driver() const override;
/** Get component type */
GpuComponentType type() const override
{
@@ -94,10 +94,9 @@ public:
}
private:
- std::unique_ptr<ClTemplateReshape> _component_writer;
};
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESHAPE */
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESHAPE_H
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp
index b05eb04698..411eeca802 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -29,12 +29,7 @@
#include "src/core/CL/CLValidate.h"
#include "src/core/utils/ScaleUtils.h"
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
-
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.h"
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwResize.h"
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
namespace arm_compute
{
@@ -43,11 +38,7 @@ namespace experimental
namespace dynamic_fusion
{
/** Forward declaration */
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-class ClTemplateResize;
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
class GpuCkwResize;
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
Status ClComponentResize::validate(const IGpuKernelComponent::Properties &properties,
const ArgumentPack<ITensorInfo> &tensors,
@@ -82,11 +73,7 @@ ClComponentResize::ClComponentResize(ComponentId id,
const ArgumentPack<ITensorInfo> &tensors,
const ClComponentResize::Attributes &attributes)
: IGpuKernelComponent{id, properties, tensors},
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- _component_writer{std::make_unique<ClTemplateResize>(id, tensors, attributes)}
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
_component_writer{std::make_unique<GpuCkwResize>(id, tensors, attributes)}
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
{
}
@@ -94,11 +81,7 @@ ClComponentResize::~ClComponentResize()
{
}
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-const IGpuTemplateComponentWriter *ClComponentResize::template_writer() const
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuCkwComponentDriver *ClComponentResize::ckw_component_driver() const
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
{
return _component_writer.get();
}
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.h
index 29276c3257..9a1169c45f 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -22,8 +22,8 @@
* SOFTWARE.
*/
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESIZE
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESIZE
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESIZE_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESIZE_H
#include "arm_compute/dynamic_fusion/sketch/attributes/ResizeAttributes.h"
@@ -42,11 +42,7 @@ template <typename T>
class ArgumentPack;
/** Forward declaration */
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-class ClTemplateResize;
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
class GpuCkwResize;
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
class ClComponentResize final : public IGpuKernelComponent
{
@@ -111,11 +107,7 @@ public:
ClComponentResize &operator=(ClComponentResize &&component) = default;
/** Get writer for the component */
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- const IGpuTemplateComponentWriter *template_writer() const override;
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuCkwComponentDriver *ckw_component_driver() const override;
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
/** Get component type */
GpuComponentType type() const override
@@ -124,15 +116,11 @@ public:
}
private:
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- std::unique_ptr<ClTemplateResize> _component_writer;
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
std::unique_ptr<GpuCkwResize> _component_writer;
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
};
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESIZE */
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESIZE_H
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp
index dcbecaff35..3db6c5cd2d 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,11 +24,7 @@
#include "ClComponentStore.h"
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h"
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h"
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
#include <memory>
@@ -46,22 +42,13 @@ Status ClComponentStore::validate(const Properties &properties, const ArgumentPa
ClComponentStore::ClComponentStore(ComponentId id,
const Properties &properties,
const ArgumentPack<ITensorInfo> &tensors)
- : IGpuKernelComponent{id, properties, tensors},
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- _component_writer{std::make_unique<ClTemplateStore>(id, tensors)}
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
- _component_writer{std::make_unique<GpuCkwStore>(id, tensors)}
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
+ : IGpuKernelComponent{id, properties, tensors}, _component_writer{std::make_unique<GpuCkwStore>(id, tensors)}
{
}
ClComponentStore::~ClComponentStore()
{
}
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-const IGpuTemplateComponentWriter *ClComponentStore::template_writer() const
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuCkwComponentDriver *ClComponentStore::ckw_component_driver() const
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
return _component_writer.get();
}
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h
index 948785c480..2c1dd0f6fc 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTSTORE
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTSTORE
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTSTORE_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTSTORE_H
#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
@@ -39,11 +39,7 @@ namespace dynamic_fusion
/** Forward declaration */
template <typename T>
class ArgumentPack;
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-class ClTemplateStore;
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
class GpuCkwStore;
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
class ClComponentStore final : public IGpuKernelComponent
{
@@ -88,11 +84,7 @@ public:
/** Allow instances of this class to be moved */
ClComponentStore &operator=(ClComponentStore &&component) = default;
/** Get writer for the component */
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- const IGpuTemplateComponentWriter *template_writer() const override;
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuCkwComponentDriver *ckw_component_driver() const override;
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
/** Get component type */
GpuComponentType type() const override
{
@@ -100,13 +92,9 @@ public:
}
private:
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- std::unique_ptr<ClTemplateStore> _component_writer;
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
- std::unique_ptr<GpuCkwStore> _component_writer;
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
+ std::unique_ptr<GpuCkwStore> _component_writer;
};
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTSTORE */
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTSTORE_H
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp
index 697b7d4e1f..4d6e7f81bb 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,7 +30,6 @@
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h"
#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h"
namespace arm_compute
{
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuMatMul.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuMatMul.cpp
index e24629a036..2997b28ec1 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuMatMul.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuMatMul.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2023 Arm Limited.
+ * Copyright (c) 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,7 +21,6 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifdef ACL_INTERNAL_TEST_CKW_IN_DF
#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMatMul.h"
@@ -244,4 +243,3 @@ ITensorInfo *GpuMatMul::create_op(GpuWorkloadSketch &sketch,
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp
index 431c9110fc..d385752201 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp
@@ -28,8 +28,6 @@
#include "src/common/utils/Log.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h"
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h"
#include "src/dynamic_fusion/sketch/gpu/GpuOperatorProperties.h"
#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
@@ -88,9 +86,8 @@ Status GpuSoftmax::is_supported_op(const GpuWorkloadContext &context,
arguments_norm.add_const_tensor(ACL_SRC_1, &sum);
arguments_norm.add_const_tensor(ACL_DST_0, &dst_info_to_validate);
- ARM_COMPUTE_RETURN_ON_ERROR(
- ClComponentLogits1DMaxShiftExpSum::validate(properties, arguments_exp_sum, attributes));
- ARM_COMPUTE_RETURN_ON_ERROR(ClComponentLogits1DNorm::validate(properties, arguments_norm, attributes));
+ ARM_COMPUTE_UNUSED(properties, attributes);
+ return Status(ErrorCode::RUNTIME_ERROR, "GpuSoftmax is not implemented");
}
else
{
@@ -177,8 +174,8 @@ void GpuSoftmax::create_op(GpuWorkloadSketch &sketch, ITensorInfo *src, ITensorI
arguments_norm.add_const_tensor(ACL_SRC_1, sum);
arguments_norm.add_const_tensor(ACL_DST_0, dst);
- comp_graph.add_new_component<ClComponentLogits1DMaxShiftExpSum>(properties, arguments_exp_sum, attributes);
- comp_graph.add_new_component<ClComponentLogits1DNorm>(properties, arguments_norm, attributes);
+ // Add to component graph -- NOT IMPLEMENTED
+ ARM_COMPUTE_UNUSED(comp_graph, attributes);
}
}
else
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp
index bf0f274c5c..b9d01966b3 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2023 Arm Limited.
+ * Copyright (c) 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -31,7 +31,6 @@
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h"
#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h"
namespace arm_compute
{
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp
deleted file mode 100644
index 775b0a0c8c..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (c) 2022-2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "GpuKernelVariableTable.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/ITensorInfo.h"
-
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-void GpuKernelVariableTable::declare_variable(const GpuKernelComponentGroup &comp_group,
- const ITensorInfo *tensor,
- GpuKernelArgumentInfo argument_info,
- const std::string &alias)
-{
- ARM_COMPUTE_ERROR_ON_MSG(!tensor->has_valid_id(), "Tensor info with valid id expected");
-
- // Do not re-declare if the variable associated with the tensor has already been declared
- auto it = _vars.find(tensor->id());
-
- if (it != _vars.end())
- {
- ARM_COMPUTE_ERROR_ON(!(it->second.kernel_argument_info == argument_info));
- return;
- }
-
- const auto target = comp_group.get_tile_for_tensor(tensor);
-
- if (target != tensor)
- {
- // If the tensor uses a shared tile, don't declare another variable.
- it = _vars.find(target->id());
-
- ARM_COMPUTE_ERROR_ON_MSG(it == _vars.end(), "The variable used for this tensor must have been declared.");
-
- _vars[tensor->id()] = it->second;
- }
- else
- {
- // Declare variable associated with the tensor
- std::stringstream ss;
- ss << alias << "_t" << abs(tensor->id());
- const auto uniq_name = ss.str();
- TensorVariable var{tensor->id(), uniq_name, argument_info};
-
- _vars.emplace(tensor->id(), var);
- }
-}
-
-GpuKernelVariableTable::TensorVariable GpuKernelVariableTable::get_variable(const ITensorInfo *tensor) const
-{
- const auto var = _vars.at(tensor->id());
- return var;
-}
-
-GpuKernelVariableTable::VariableList
-GpuKernelVariableTable::get_variable_list(const std::vector<const ITensorInfo *> &tensors) const
-{
- VariableList vars{};
- for (const auto &tensor : tensors)
- {
- if (!tensor->has_valid_id())
- {
- continue;
- }
- vars.push_back(get_variable(tensor));
- }
- return vars;
-}
-
-TagVal::TagVal(const GpuKernelVariableTable::TensorVariable &var) : value{var.uniq_name}
-{
-}
-
-TagVal::TagVal(const std::string &val) : value{val}
-{
-}
-
-TagVal::TagVal(const char *val) : value{std::string(val)}
-{
-}
-
-TagVal::TagVal(const DataType &data_type) : value{get_cl_type_from_data_type(data_type)}
-{
-}
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h b/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h
deleted file mode 100644
index c17f131ada..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Copyright (c) 2022-2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_GPUKERNELVARIABLETABLE
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_GPUKERNELVARIABLETABLE
-
-#include "arm_compute/core/ITensorInfo.h"
-
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
-#include "support/AclRequires.h"
-#include "support/StringSupport.h"
-
-#include <set>
-#include <string>
-#include <type_traits>
-#include <unordered_map>
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-class GpuKernelComponentGroup;
-
-/** A table of all the variables used in the kernel.
- * Each kernel has exactly one variable table.
- */
-class GpuKernelVariableTable
-{
-public:
- /** A tensor variable whose main purposes are:
- * - Hold the newly assigned @ref GpuKernelArgumentInfo for the associated tensor info
- * - Hold the generated variable name for the associated tensor info
- */
- struct TensorVariable
- {
- public:
- TensorVariable() = default;
- TensorVariable(const TensorVariable &) = default;
- TensorVariable &operator=(const TensorVariable &) = default;
- ITensorInfo::Id id{ITensorInfo::invalid_tensor_id};
- std::string uniq_name{"empty"}; // Unique name, also the final variable name used in the built code
- GpuKernelArgumentInfo kernel_argument_info{};
- bool has_valid_id() const
- {
- return id != ITensorInfo::invalid_tensor_id;
- }
- };
- using VariableList = std::vector<TensorVariable>;
-
-public:
- /** Declare a @ref TensorVariable for a corresponding tensor info.
- *
- * @param[in] comp_group Component group the tensor belongs to
- * @param[in] tensor Tensor info with which the new variable is associated
- * @param[in] argument_info Kernel argument information
- * @param[in] alias Alias for the variable. Will be used as part of the variable name
- */
- void declare_variable(const GpuKernelComponentGroup &comp_group,
- const ITensorInfo *tensor,
- GpuKernelArgumentInfo argument_info,
- const std::string &alias = "unnamed");
- /** Get the @ref TensorVariable associated with @p tensor
- *
- * @param[in] tensor Tensor info to be queried
- *
- * @return TensorVariable
- */
- TensorVariable get_variable(const ITensorInfo *tensor) const;
- /** Get the @ref TensorVariable list associated with @p tensors
- * @note Empty tensors are skipped
- *
- * @param[in] tensors List of tensor infos to be queried
- *
- * @return VariableList
- */
- VariableList get_variable_list(const std::vector<const ITensorInfo *> &tensors) const;
-
-private:
- std::map<ITensorInfo::Id, TensorVariable> _vars{};
-};
-
-/** A tag value will substitute a tag in a string template during its instantiation */
-struct TagVal
-{
- /** Default constructor */
- TagVal() = default;
- /** Construct a @ref TagVal from a @ref GpuKernelVariableTable::TensorVariable */
- TagVal(const GpuKernelVariableTable::TensorVariable &var);
- /** Construct a @ref TagVal from an integral type */
- template <typename T, ARM_COMPUTE_REQUIRES_TA(std::is_integral<T>::value)>
- TagVal(T val) : value{support::cpp11::to_string(val)}
- {
- }
- /** Construct a @ref TagVal from a string */
- TagVal(const std::string &val);
- /** Construct a @ref TagVal from a c-style string */
- TagVal(const char *val);
- /** Construct a @ref TagVal from a @ref DataType */
- TagVal(const DataType &data_type);
- /** Get the value of the TagVal as a converted string */
- std::string value{};
-};
-
-/** A tag used in a string template is a placeholder string to be substituted by real values during template instantiation */
-using Tag = std::string;
-
-/** Tag lookup table. It is used to instantiate a string template */
-using TagLUT = std::unordered_map<Tag, TagVal>;
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_GPUKERNELVARIABLETABLE */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h b/src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h
deleted file mode 100644
index 9d0b4f592a..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_IGPUTEMPLATECOMPONENTWRITER
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_IGPUTEMPLATECOMPONENTWRITER
-
-#include "arm_compute/core/CL/CLCompileContext.h"
-#include "arm_compute/core/ITensorInfo.h"
-#include "arm_compute/core/Window.h"
-
-#include "src/dynamic_fusion/sketch/ArgumentPack.h"
-#include "src/dynamic_fusion/sketch/gpu/components/Types.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-/** Forward declaration */
-class GpuKernelComponentGroup;
-class GpuKernelVariableTable;
-
-/** An interface used by @ref ClTemplateWriter to write source code for a kernel component
- */
-class IGpuTemplateComponentWriter
-{
-public:
- using ComponentGroup = GpuKernelComponentGroup;
-
- /**For now all kernel intermeditate/destination tensors are expected to be of type Tensor_4D_t_Buffer*/
- static constexpr GpuKernelArgumentInfo::Type common_tensor_type = GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer;
-
-public:
- /** Constructor
- *
- * @param[in] id Component id
- * @param[in] tensors Tensor arguments to the components
- */
- IGpuTemplateComponentWriter(ComponentId id, const ArgumentPack<ITensorInfo> &tensors) : _id{id}, _tensors{tensors}
- {
- }
- /** Destructor */
- virtual ~IGpuTemplateComponentWriter()
- {
- }
- /** Generate kernel component name */
- virtual std::string get_name() const = 0;
- /** Generate kernel component code template
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return std::string Component code
- */
- virtual std::string get_component_code(const ComponentGroup &comp_group) const = 0;
- /** Declare all variables used by the component in the @p vtable
- *
- * @param[out] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- */
- virtual void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const = 0;
- /** Generate the tag look-up table used to instantiate the component code.
- *
- * @param[in] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return TagLUT Tag lookup table
- */
- virtual TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const = 0;
- /** Generate additional macros used in the component */
- virtual std::string get_additional_macros() const
- {
- return "";
- }
- /** Generate the build options used in the component
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return CLBuildOptions Build options
- */
- virtual CLBuildOptions get_build_options(const ComponentGroup &comp_group) const
- {
- ARM_COMPUTE_UNUSED(comp_group);
- return CLBuildOptions{};
- }
- /** Generate the component config id string used for tuning */
- virtual std::string get_config_id() const
- {
- return "";
- }
- /** Generate the header list used in the component */
- virtual std::set<std::string> get_headers_list() const
- {
- return std::set<std::string>{};
- }
- /** Generate the execution window for the component */
- virtual Window get_window() const
- {
- return Window{};
- }
- /** Get tensor arguments */
- ArgumentPack<ITensorInfo> tensors() const
- {
- return _tensors;
- }
- /** Get component id */
- ComponentId id() const
- {
- return _id;
- }
-
-private:
- ComponentId _id{-1};
- ArgumentPack<ITensorInfo> _tensors{};
-};
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_IGPUTEMPLATECOMPONENTWRITER */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp
deleted file mode 100644
index c165fb5f33..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp
+++ /dev/null
@@ -1,181 +0,0 @@
-/*
- * Copyright (c) 2022-2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "ClTemplateActivation.h"
-
-#include "arm_compute/core/utils/ActivationFunctionUtils.h"
-#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
-#include "arm_compute/core/utils/StringUtils.h"
-
-#include "src/core/helpers/WindowHelpers.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
-#include "support/StringSupport.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-ClTemplateActivation::ClTemplateActivation(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes)
- : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}, _attributes{attributes}
-{
- _src = this->tensors().get_const_tensor(TensorType::ACL_SRC);
- _dst = this->tensors().get_const_tensor(TensorType::ACL_DST);
- ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
-}
-
-std::string ClTemplateActivation::get_name() const
-{
- return "activation";
-}
-
-std::string ClTemplateActivation::get_component_code(const ComponentGroup &comp_group) const
-{
- std::string code;
- const bool is_root = (comp_group.get_root_component()->id() == this->id());
-
- code = R"_(
-//------------------ START KERNEL {{meta_kernel_id}} ---------------------
-)_";
- if (is_root)
- {
- code += R"_(
-// IN(src) {{src}}
-// OUT(dst, accum) {{dst}}
-
-TILE({{DATA_TYPE}}, M0, N0, {{src}});
-TILE(uint, M0, 1, g_dst_indirect_y);
-{
- {{src}}_offset_first_element_in_bytes += g_ind_2 * {{src}}_stride_z;
-
- T_LOAD({{DATA_TYPE}}, M0, N0, {{TENSOR_TYPE}}, {{src}}, g_ind_0, g_ind_1, 1, {{src}}_stride_y, {{src}});
-
- T_ACTIVATION({{DATA_TYPE}}, M0, N0, {{ACT}}, {{A_VAL}}, {{B_VAL}}, {{src}}, {{dst}});
-}
-
-LOOP_UNROLLING(int, i, 0, 1, M0,
-{
- g_dst_indirect_y[i].v = (uint)min((int)(g_ind_1 + i), (int)({{arg_dst}}_w) - 1);
- g_dst_indirect_y[i].v += (int)(g_ind_2 % {{arg_dst}}_h) * (int)({{arg_dst}}_w);
- g_dst_indirect_y[i].v += (int)(g_ind_2 / {{arg_dst}}_h) * (int)({{arg_dst}}_w * {{arg_dst}}_h);
-})
-)_";
- }
- else
- {
- code += R"_(
-// IN/OUT(src, accum) {{src}}
-
-{
- T_ACTIVATION({{DATA_TYPE}}, M0, N0, {{ACT}}, {{A_VAL}}, {{B_VAL}}, {{src}}, {{dst}});
-}
-)_";
- }
- code += R"_(
-//------------------ END KERNEL {{meta_kernel_id}} ---------------------
-)_";
- return code;
-}
-
-void ClTemplateActivation::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
-{
- vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "src");
-
- vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "dst");
-}
-
-TagLUT ClTemplateActivation::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
-{
- ARM_COMPUTE_UNUSED(comp_group);
-
- TagLUT lut{};
- // Arguments and global shared variables
- lut["src"] = vtable.get_variable(_src);
- lut["dst"] = vtable.get_variable(_dst);
-
- const auto dst_argument = vtable.get_variable(comp_group.get_any_dst_tensor());
- lut["arg_dst"] = dst_argument.uniq_name;
-
- // Local build options
- lut["meta_kernel_id"] = id();
- lut["DATA_TYPE"] = get_cl_type_from_data_type(_src->data_type());
- lut["TENSOR_TYPE"] = "BUFFER";
-
- const auto f_act = lower_string(string_from_activation_func(_attributes.activation()));
-
- lut["ACT"] = f_act;
- lut["A_VAL"] = float_to_string_with_full_precision(_attributes.a());
- lut["B_VAL"] = float_to_string_with_full_precision(_attributes.b());
-
- return lut;
-}
-
-CLBuildOptions ClTemplateActivation::get_build_options(const ComponentGroup &comp_group) const
-{
- /// NOTE: For now tile sizes (n0, m0) are set by the execution window. This may change in the future
- const auto root_window = comp_group.get_root_component()->template_writer()->get_window();
- const unsigned int n0 = root_window.x().step();
- const unsigned int m0 = root_window.y().step();
- const unsigned int partial_store_n0 = _dst->dimension(0) % n0;
-
- CLBuildOptions build_opts;
- build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
- build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
- build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0));
-
- return build_opts;
-}
-
-std::string ClTemplateActivation::get_config_id() const
-{
- std::string config_id{};
- config_id += "activation_";
- config_id += lower_string(string_from_data_type(_src->data_type()));
- config_id += "_";
- config_id += support::cpp11::to_string(_src->dimension(0));
- config_id += "_";
- config_id += support::cpp11::to_string(_src->dimension(1));
- return config_id;
-}
-
-std::set<std::string> ClTemplateActivation::get_headers_list() const
-{
- return std::set<std::string>{"helpers.h", "tile_helpers.h", "activation_float_helpers.h"};
-}
-
-Window ClTemplateActivation::get_window() const
-{
- ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
- const unsigned int n0 = adjust_vec_size(16 / _dst->element_size(), _dst->dimension(0));
- Window win = calculate_max_window(*_dst, Steps(n0));
- return win.collapse(win, Window::DimZ);
-}
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h
deleted file mode 100644
index 88ee370342..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright (c) 2022-2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEACTIVATION
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEACTIVATION
-
-#include "arm_compute/core/experimental/Types.h"
-#include "arm_compute/function_info/ActivationLayerInfo.h"
-
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-class ClTemplateActivation final : public IGpuTemplateComponentWriter
-{
-public:
- using Attributes = ClComponentActivation::Attributes;
-
- /** Constructor
- *
- * @param[in] id Component id
- * @param[in] tensors Tensor arguments to the components
- * @param[in] attributes Component attributes
- */
- ClTemplateActivation(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
-
- /** Destructor */
- ~ClTemplateActivation() override = default;
-
- /** Prevent instances of this class from being copy constructed */
- ClTemplateActivation(const ClTemplateActivation &activation) = delete;
-
- /** Prevent instances of this class from being copied */
- ClTemplateActivation &operator=(const ClTemplateActivation &activation) = delete;
-
- /** Allow instances of this class to be move constructed */
- ClTemplateActivation(ClTemplateActivation &&activation) = default;
-
- /** Allow instances of this class to be moved */
- ClTemplateActivation &operator=(ClTemplateActivation &&activation) = default;
-
- /** Generate kernel component name */
- std::string get_name() const override;
-
- /** Generate kernel component code template
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return std::string Component code
- */
- std::string get_component_code(const ComponentGroup &comp_group) const override;
-
- /** Declare all variables used by the component in the @p vtable
- *
- * @param[out] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- */
- void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
-
- /** Generate the tag look-up table used to instantiate the component code.
- *
- * @param[in] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return TagLUT Tag lookup table
- */
- TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
-
- /** Generate the build options used in the component
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return CLBuildOptions Build options
- */
- CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override;
-
- /** Generate the component config id string used for tuning */
- std::string get_config_id() const override;
-
- /** Generate the header list used in the component */
- std::set<std::string> get_headers_list() const override;
-
- /** Generate the execution window for the component */
- Window get_window() const override;
-
-private:
- const ITensorInfo *_src;
- const ITensorInfo *_dst;
- Attributes _attributes;
-};
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEACTIVATION */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp
deleted file mode 100644
index 0da3a73801..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * Copyright (c) 2022-2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "ClTemplateCast.h"
-
-#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
-#include "arm_compute/core/utils/StringUtils.h"
-
-#include "src/core/helpers/WindowHelpers.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-ClTemplateCast::ClTemplateCast(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes)
- : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}, _attributes{attributes}
-{
- _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
- _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
-
- ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
-}
-
-std::string ClTemplateCast::get_name() const
-{
- const size_t src_size = data_size_from_type(_src->data_type());
- const size_t dst_size = data_size_from_type(_dst->data_type());
-
- return (src_size >= dst_size) ? "cast_down" : "cast_up";
-}
-
-std::string ClTemplateCast::get_component_code(const ComponentGroup &comp_group) const
-{
- ARM_COMPUTE_UNUSED(comp_group);
-
- const std::string kernel_name = get_name();
- const auto is_root = (comp_group.get_root_component()->id() == this->id());
-
- std::string code = R"_(
-//------------------ START KERNEL {{meta_kernel_id}} CAST ---------------------
-)_";
-
- if (is_root)
- {
- code += R"_(
-// IN_0(src) {{src}}
-// OUT(dst, accum) {{dst}}
-
-TILE(uint, M0, 1, g_dst_indirect_y);
-{
- {{src}}_offset_first_element_in_bytes += get_global_id(2) * {{src}}_stride_z;
-
- TILE({{DATA_TYPE_IN}}, M0, N0, {{tmp}});
- T_LOAD({{DATA_TYPE_IN}}, M0, N0, BUFFER, {{src}}, g_ind_0, g_ind_1, 1, {{src}}_stride_y, {{tmp}});
-)_";
- }
-
- code += R"_(
- LOOP_UNROLLING(int, m0, 0, 1, M0,
- {
-)_";
-
- if (kernel_name == "cast_down" && is_data_type_quantized(_src->data_type()))
- {
- code += R"_(
- {{tmp}}[m0].v ^= (VEC_DATA_TYPE({{DATA_TYPE_IN}}, N0))0x80;
-)_";
- }
-
- if (kernel_name == "cast_down" &&
- (is_data_type_float(_src->data_type()) || _attributes.convert_policy() == ConvertPolicy::SATURATE))
- {
- code += R"_(
- {{dst}}[m0].v = CONVERT_SAT({{tmp}}[m0].v, VEC_DATA_TYPE({{DATA_TYPE_OUT}}, N0));
-)_";
- }
- else
- {
- code += R"_(
- {{dst}}[m0].v = CONVERT({{tmp}}[m0].v, VEC_DATA_TYPE({{DATA_TYPE_OUT}}, N0));
-)_";
- }
-
- code += R"_(
- })
-)_";
-
- if (is_root)
- {
- code += R"_(
- LOOP_UNROLLING(int, i, 0, 1, M0,
- {
- g_dst_indirect_y[i].v = (uint)min((int)(g_ind_1 + i), (int)({{arg_dst}}_w) - 1);
- g_dst_indirect_y[i].v += (int)(g_ind_2 % {{arg_dst}}_h) * (int)({{arg_dst}}_w);
- g_dst_indirect_y[i].v += (int)(g_ind_2 / {{arg_dst}}_h) * (int)({{arg_dst}}_w * {{arg_dst}}_h);
- })
-}
-)_";
- }
-
- code += R"_(
-//------------------ END KERNEL {{meta_kernel_id}} CAST ---------------------
-)_";
-
- return code;
-}
-
-void ClTemplateCast::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
-{
- vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "src");
-
- vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "dst");
-}
-
-TagLUT ClTemplateCast::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
-{
- const auto is_root = (comp_group.get_root_component()->id() == this->id());
-
- TagLUT lut{};
-
- // Arguments and global shared variables
- lut["src"] = vtable.get_variable(_src);
- lut["dst"] = vtable.get_variable(_dst);
- lut["tmp"] = (is_root) ? lut["src"].value + "_in_data" : lut["src"];
-
- const auto dst_argument = vtable.get_variable(comp_group.get_any_dst_tensor());
- lut["arg_dst"] = dst_argument.uniq_name;
-
- // Local build options
- lut["meta_kernel_id"] = id();
-
- lut["DATA_TYPE_IN"] = get_cl_type_from_data_type(_src->data_type());
- lut["DATA_TYPE_OUT"] = get_cl_type_from_data_type(_dst->data_type());
-
- return lut;
-}
-
-CLBuildOptions ClTemplateCast::get_build_options(const ComponentGroup &comp_group) const
-{
- ARM_COMPUTE_UNUSED(comp_group);
-
- const auto root_window = comp_group.get_root_component()->template_writer()->get_window();
- const unsigned int n0 = root_window.x().step();
- const unsigned int m0 = root_window.y().step();
-
- // Set build options
- CLBuildOptions build_opts{};
- build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
- build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(_src->dimension(0) % n0));
- build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
-
- return build_opts;
-}
-
-std::string ClTemplateCast::get_config_id() const
-{
- std::string config_id{};
-
- config_id += "_";
- config_id += lower_string(string_from_data_type(_src->data_type()));
- config_id += "_";
- config_id += lower_string(string_from_data_type(_dst->data_type()));
- config_id += "_";
- config_id += support::cpp11::to_string(_src->dimension(0));
- config_id += "_";
- config_id += support::cpp11::to_string(_src->dimension(1));
-
- return config_id;
-}
-
-std::set<std::string> ClTemplateCast::get_headers_list() const
-{
- return std::set<std::string>{"helpers.h", "tile_helpers.h"};
-}
-
-Window ClTemplateCast::get_window() const
-{
- ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
-
- const unsigned int n0 = adjust_vec_size(16 / _dst->element_size(), _dst->dimension(0));
- Window win = calculate_max_window(*_dst, Steps(n0));
- return win.collapse(win, Window::DimZ);
-}
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.h
deleted file mode 100644
index 3adca4edc9..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATECAST
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATECAST
-
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-class ClTemplateCast final : public IGpuTemplateComponentWriter
-{
-public:
- using Attributes = ClComponentCast::Attributes;
-
- /** Constructor
- *
- * @param[in] id Component id
- * @param[in] tensors Tensor arguments to the components
- * @param[in] attributes Component attributes
- */
- ClTemplateCast(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
- /** Prevent instances of this class from being copy constructed */
- ClTemplateCast(const ClTemplateCast &cast) = delete;
- /** Prevent instances of this class from being copied */
- ClTemplateCast &operator=(const ClTemplateCast &cast) = delete;
- /** Allow instances of this class to be move constructed */
- ClTemplateCast(ClTemplateCast &&cast) = default;
- /** Allow instances of this class to be moved */
- ClTemplateCast &operator=(ClTemplateCast &&cast) = default;
- /** Generate kernel component name */
- std::string get_name() const override;
- /** Generate kernel component code template
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return std::string Component code
- */
- std::string get_component_code(const ComponentGroup &comp_group) const override;
- /** Declare all variables used by the component in the @p vtable
- *
- * @param[out] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- */
- void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
- /** Generate the tag look-up table used to instantiate the component code.
- *
- * @param[in] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return TagLUT Tag lookup table
- */
- TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
- /** Generate the build options used in the component
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return CLBuildOptions Build options
- */
- CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override;
- /** Generate the component config id string used for tuning */
- std::string get_config_id() const override;
- /** Generate the header list used in the component */
- std::set<std::string> get_headers_list() const override;
- /** Generate the execution window for the component */
- Window get_window() const override;
-
-private:
- const ITensorInfo *_src;
- const ITensorInfo *_dst;
- Attributes _attributes;
-};
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATECAST */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp
deleted file mode 100644
index 8380620ab2..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp
+++ /dev/null
@@ -1,364 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "ClTemplateDepthwiseConv2d.h"
-
-#include "src/core/helpers/WindowHelpers.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-ClTemplateDepthwiseConv2d::ClTemplateDepthwiseConv2d(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes,
- const Settings &settings)
- : IGpuTemplateComponentWriter{id, tensors},
- _src{},
- _weight{},
- _bias{},
- _dst{},
- _attributes{attributes},
- _settings{settings}
-{
- _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
- _weight = this->tensors().get_const_tensor(TensorType::ACL_SRC_1);
- if (this->tensors().get_const_tensor(TensorType::ACL_SRC_2))
- {
- _bias = this->tensors().get_const_tensor(TensorType::ACL_SRC_2);
- }
- _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
- ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _weight, _dst);
-}
-
-std::string ClTemplateDepthwiseConv2d::get_name() const
-{
- return "depthwise_conv2d";
-}
-
-std::string ClTemplateDepthwiseConv2d::get_component_code(const ComponentGroup &comp_group) const
-{
- ARM_COMPUTE_UNUSED(comp_group);
-
- constexpr int height_idx = 2; // Data Layout is NHWC
-
- std::string code = R"_(
-//------------------ START KERNEL {{meta_kernel_id}} ---------------------
-// IN_0(src) {{src}}
-// IN_1(wei) {{weight}}
-)_";
-
- if (_bias != nullptr && _bias->has_valid_id())
- {
- code += R"_(
-// IN_1(bia) {{bias}}
-)_";
- }
-
- code += R"_(
-// OUT(dst, accum) {{dst}}
-
-TILE(uint, M0, 1, g_dst_indirect_y);
-
-{
-#define _IWEI_WIDTH {{WEI_WIDTH}}
-#define _IWEI_HEIGHT {{WEI_HEIGHT}}
-#define _IDST_WIDTH {{arg_dst}}_w
-#define _IDST_HEIGHT {{arg_dst}}_h
-#define _IM0_A M0_A
-#define _IN0_A N0_A
-#define _IM0_B _IWEI_WIDTH
-#define _IN0_B N0
-#define _IBOUNDARY_CHECK (!((_IWEI_WIDTH == 1 && _IWEI_HEIGHT == 1 && {{PAD_LEFT}} == 0 && {{PAD_TOP}} == 0 && M0 == 1)))
-)_";
-
- code += R"_(
- const int yo = g_ind_2 % {{arg_dst}}_h;
- const int bout = g_ind_2 / {{arg_dst}}_h;
-)_";
-
- code += R"_(
-
- int xi = g_ind_1 * {{STRIDE_X}};
- int yi = yo * {{STRIDE_Y}};
- xi -= {{PAD_LEFT}};
- yi -= {{PAD_TOP}};
-
- LOOP_UNROLLING(int, i, 0, 1, M0,
- {
- {{dst}}[i].v = 0;
- })
-)_";
-
- if (_weight->dimension(height_idx) < 5)
- {
- code += R"_(
- LOOP_UNROLLING(int, yk, 0, 1, _IWEI_HEIGHT,
-)_";
- }
- else
- {
- code += R"_(
- for(int yk = 0; yk < _IWEI_HEIGHT; ++yk)
-)_";
- }
-
- code += R"_(
- {
- TILE({{SRC_DATA_TYPE}}, _IM0_A, _IN0_A, a);
-
- LOOP_UNROLLING(int, i, 0, 1, _IM0_A,
- {
- a[i].v = 0;
- })
-
- T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, _IM0_A, _IN0_A, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi + yk * {{DILATION_Y}}, xi, (g_ind_0 / {{DEPTH_MULTIPLIER}}), {{src}}_w, {{src}}_h, {{DILATION_X}}, 1, _IBOUNDARY_CHECK, a);
-
- TILE({{WEI_DATA_TYPE}}, _IM0_B, _IN0_B, b);
-
- T_LOAD({{WEI_DATA_TYPE}}, _IM0_B, _IN0_B, {{WEI_TENSOR_TYPE}}, {{weight}}, g_ind_0, yk * _IM0_B, 1, {{weight}}_stride_y, b);
-
- LOOP_UNROLLING(int, m0, 0, 1, M0,
- {
- LOOP_UNROLLING(int, xk, 0, 1, _IWEI_WIDTH,
- {
-)_";
-
- if (!_settings.is_fma_available())
- {
- code += R"_(
- {{dst}}[m0].v += a[xk + m0].v * b[xk].v;
-)_";
- }
- else
- {
- code += R"_(
- {{dst}}[m0].v = fma(a[xk + m0].v, b[xk].v, {{dst}}[m0].v);
-)_";
- }
-
- code += R"_(
- })
- })
- }
-)_";
-
- if (_weight->dimension(height_idx) < 5)
- {
- code += R"_(
- )
-)_";
- }
-
- if (_bias && _bias->has_valid_id())
- {
- code += R"_(
- TILE({{BIA_DATA_TYPE}}, 1, N0, {{bias}});
-
- T_LOAD({{BIA_DATA_TYPE}}, 1, N0, BUFFER, {{bias}}, g_ind_0, 0, 0, 0, {{bias}});
-
- T_ELTWISE_BROADCAST_ADD_X({{ACC_DATA_TYPE}}, M0, N0, {{dst}}, {{bias}}, {{dst}});
-)_";
- }
-
- code += R"_(
- LOOP_UNROLLING(int, i, 0, 1, M0,
- {
- g_dst_indirect_y[i].v = (uint)min((int)(g_ind_1 + i), (int)({{arg_dst}}_w) - 1);
- g_dst_indirect_y[i].v += (int)(g_ind_2 % {{arg_dst}}_h) * (int)({{arg_dst}}_w);
- g_dst_indirect_y[i].v += (int)(g_ind_2 / {{arg_dst}}_h) * (int)({{arg_dst}}_w * {{arg_dst}}_h);
- })
-}
-//------------------ END KERNEL {{meta_kernel_id}} ---------------------
-)_";
-
- return code;
-}
-
-void ClTemplateDepthwiseConv2d::declare_variables(GpuKernelVariableTable &vtable,
- const ComponentGroup &comp_group) const
-{
- const GpuKernelArgumentInfo::Type input_type = _settings.export_input_to_cl_image()
- ? GpuKernelArgumentInfo::Type::Tensor_4D_t_Image
- : GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer;
-
- vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(input_type), "src");
-
- const GpuKernelArgumentInfo::Type weight_type = _settings.export_weights_to_cl_image()
- ? GpuKernelArgumentInfo::Type::Tensor_4D_t_Image
- : GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer;
-
- vtable.declare_variable(comp_group, _weight, GpuKernelArgumentInfo(weight_type), "weight");
-
- if (_bias != nullptr && _bias->has_valid_id()) // optional bias
- {
- vtable.declare_variable(comp_group, _bias, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Vector), "bias");
- }
- vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "dst");
-}
-
-TagLUT ClTemplateDepthwiseConv2d::get_tag_lut(const GpuKernelVariableTable &vtable,
- const ComponentGroup &comp_group) const
-{
- TagLUT lut{};
-
- // Arguments and global shared variables
- lut["src"] = vtable.get_variable(_src);
- lut["weight"] = vtable.get_variable(_weight);
-
- if (_bias != nullptr && _bias->has_valid_id()) // optional bias
- {
- lut["bias"] = vtable.get_variable(_bias);
- lut["BIA_DATA_TYPE"] = get_cl_type_from_data_type(_bias->data_type());
- }
- lut["dst"] = vtable.get_variable(_dst);
-
- const auto dst_argument = vtable.get_variable(comp_group.get_any_dst_tensor());
- lut["arg_dst"] = dst_argument.uniq_name;
-
- // Local build options
- lut["meta_kernel_id"] = id();
- lut["ACC_DATA_TYPE"] = _src->data_type();
- lut["SRC_DATA_TYPE"] = _src->data_type();
- lut["WEI_DATA_TYPE"] = _weight->data_type();
-
- switch (vtable.get_variable(_src).kernel_argument_info.type)
- {
- case GpuKernelArgumentInfo::Type::Image_Export_To_ClImage2D:
- case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D:
- case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image:
- lut["SRC_TENSOR_TYPE"] = "IMAGE";
- break;
- default:
- lut["SRC_TENSOR_TYPE"] = "BUFFER";
- break;
- }
-
- switch (vtable.get_variable(_weight).kernel_argument_info.type)
- {
- case GpuKernelArgumentInfo::Type::Image_Export_To_ClImage2D:
- case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D:
- case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image:
- lut["WEI_TENSOR_TYPE"] = "IMAGE";
- break;
- default:
- lut["WEI_TENSOR_TYPE"] = "BUFFER";
- break;
- }
-
- // Data Layout is NHWC
- constexpr int width_idx = 1;
- constexpr int height_idx = 2;
-
- lut["WEI_WIDTH"] = _weight->dimension(width_idx);
- lut["WEI_HEIGHT"] = _weight->dimension(height_idx);
-
- lut["STRIDE_X"] = _attributes.stride().x();
- lut["STRIDE_Y"] = _attributes.stride().y();
-
- lut["PAD_LEFT"] = _attributes.pad().left;
- lut["PAD_TOP"] = _attributes.pad().top;
-
- lut["DILATION_X"] = _attributes.dilation().x();
- lut["DILATION_Y"] = _attributes.dilation().y();
-
- lut["DEPTH_MULTIPLIER"] = _attributes.depth_multiplier();
-
- return lut;
-}
-
-CLBuildOptions ClTemplateDepthwiseConv2d::get_build_options(const ComponentGroup &comp_group) const
-{
- ARM_COMPUTE_UNUSED(comp_group);
-
- constexpr unsigned int width_idx = 1; // Data Layout is NHWC
-
- const unsigned int n0 = _settings.n0();
- const unsigned int m0 = _settings.m0();
- const unsigned int m0_a = _weight->dimension(width_idx) + m0 - 1;
- const unsigned int n0_a = _attributes.depth_multiplier() > 1 ? 1 : n0;
- const unsigned int partial_store_n0 = _dst->dimension(0) % n0;
-
- CLBuildOptions build_opts{};
-
- if (_settings.fast_relaxed_math())
- {
- build_opts.add_option("-cl-fast-relaxed-math");
- }
- else
- {
- // -cl-fast-relaxed-math also sets -cl-finite-math-only and -cl-unsafe-math-optimizations
- // to disable -cl-finite-math-only, we only include -cl-unsafe-math-optimizations
- build_opts.add_option("-cl-unsafe-math-optimizations");
- }
-
- build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
- build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
- build_opts.add_option("-DN0_A=" + support::cpp11::to_string(n0_a));
- build_opts.add_option("-DM0_A=" + support::cpp11::to_string(m0_a));
- build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0));
-
- return build_opts;
-}
-
-std::string ClTemplateDepthwiseConv2d::get_config_id() const
-{
- std::string config_id{};
-
- config_id += support::cpp11::to_string(_src->dimension(0));
- config_id += "_";
- config_id += support::cpp11::to_string(_src->dimension(1));
- config_id += "_";
- config_id += support::cpp11::to_string(_src->dimension(2));
- config_id += "_";
- config_id += support::cpp11::to_string(_dst->dimension(0));
- config_id += "_";
- config_id += support::cpp11::to_string(_dst->dimension(1));
- config_id += "_";
- config_id += support::cpp11::to_string(_dst->dimension(2));
- config_id += "_";
- config_id += string_from_data_type(_src->data_type());
-
- return config_id;
-}
-
-std::set<std::string> ClTemplateDepthwiseConv2d::get_headers_list() const
-{
- return std::set<std::string>{"helpers.h", "tile_helpers.h"};
-}
-
-Window ClTemplateDepthwiseConv2d::get_window() const
-{
- ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
-
- Window win = calculate_max_window(*_dst, Steps(_settings.n0(), _settings.m0()));
- return win.collapse(win, Window::DimZ);
-}
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h
deleted file mode 100644
index 5d04c687c3..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDEPTHWISECONV2D
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDEPTHWISECONV2D
-
-#include "arm_compute/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.h"
-
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-class ClTemplateDepthwiseConv2d final : public IGpuTemplateComponentWriter
-{
-public:
- using Attributes = ClComponentDepthwiseConv2d::Attributes;
- using Settings = ClComponentDepthwiseConv2d::Settings;
- /** Constructor
- *
- * Similar to @ref ClComponentDepthwiseConv2d::validate()
- *
- * @param[in] id Component id
- * @param[in] tensors Tensor arguments to the components
- * @param[in] attributes Component attributes
- * @param[in] settings Component settings
- */
- ClTemplateDepthwiseConv2d(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes,
- const Settings &settings);
- /** Prevent instances of this class from being copy constructed */
- ClTemplateDepthwiseConv2d(const ClTemplateDepthwiseConv2d &depthwise_conv2d) = delete;
- /** Prevent instances of this class from being copied */
- ClTemplateDepthwiseConv2d &operator=(const ClTemplateDepthwiseConv2d &depthwise_conv2d) = delete;
- /** Allow instances of this class to be move constructed */
- ClTemplateDepthwiseConv2d(ClTemplateDepthwiseConv2d &&depthwise_conv2d) = default;
- /** Allow instances of this class to be moved */
- ClTemplateDepthwiseConv2d &operator=(ClTemplateDepthwiseConv2d &&depthwise_conv2d) = default;
- /** Generate kernel component name */
- std::string get_name() const override;
- /** Generate kernel component code template
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return std::string Component code
- */
- std::string get_component_code(const ComponentGroup &comp_group) const override;
- /** Declare all variables used by the component in the @p vtable
- *
- * @param[out] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- */
- void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
- /** Generate the tag look-up table used to instantiate the component code.
- *
- * @param[in] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return TagLUT Tag lookup table
- */
- TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
- /** Generate the build options used in the component
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return CLBuildOptions Build options
- */
- CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override;
- /** Generate the component config id string used for tuning */
- std::string get_config_id() const override;
- /** Generate the header list used in the component */
- std::set<std::string> get_headers_list() const override;
- /** Generate the execution window for the component */
- Window get_window() const override;
-
-private:
- const ITensorInfo *_src;
- const ITensorInfo *_weight;
- const ITensorInfo *_bias;
- const ITensorInfo *_dst;
- Attributes _attributes;
- Settings _settings;
-};
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDEPTHWISECONV2D */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp
deleted file mode 100644
index f6a7a58d1d..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp
+++ /dev/null
@@ -1,393 +0,0 @@
-/*
- * Copyright (c) 2022-2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "ClTemplateDirectConv2d.h"
-
-#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/core/utils/StringUtils.h"
-
-#include "src/core/helpers/WindowHelpers.h"
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
-#include "support/StringSupport.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-ClTemplateDirectConv2d::ClTemplateDirectConv2d(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes,
- const Settings &settings)
- : IGpuTemplateComponentWriter{id, tensors},
- _src{},
- _weight{},
- _bias{},
- _dst{},
- _attributes{attributes},
- _settings{settings}
-{
- _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
- _weight = this->tensors().get_const_tensor(TensorType::ACL_SRC_1);
- if (this->tensors().get_const_tensor(TensorType::ACL_SRC_2))
- {
- _bias = this->tensors().get_const_tensor(TensorType::ACL_SRC_2);
- }
- _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
- ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _weight, _dst);
-}
-
-std::string ClTemplateDirectConv2d::get_name() const
-{
- return "direct_conv2d";
-}
-
-std::string ClTemplateDirectConv2d::get_component_code(const ComponentGroup &comp_group) const
-{
- ARM_COMPUTE_UNUSED(comp_group);
-
- const auto channel_idx = get_data_layout_dimension_index(_src->data_layout(), DataLayoutDimension::CHANNEL);
- const auto k0 = adjust_vec_size(_settings.direct_conv_descriptor().k0, _src->dimension(channel_idx));
- const bool leftover_loop = (_src->dimension(channel_idx) % k0) != 0;
-
- std::string code = R"_(
-//------------------ START KERNEL {{meta_kernel_id}} ---------------------
-// IN_0(src) {{src}}
-// IN_1(wei) {{weight}}
-)_";
- if (_bias && _bias->has_valid_id())
- {
- code += R"_(
-// IN_1(bia) {{bias}}
-)_";
- }
- code += R"_(
-// OUT(dst, accum) {{dst}}
-
-TILE(uint, M0, 1, g_dst_indirect_y);
-
-{
-#define _IWEI_WIDTH {{WEI_WIDTH}}
-#define _IWEI_HEIGHT {{WEI_HEIGHT}}
-#define _ISRC_WIDTH {{SRC_WIDTH}}
-#define _ISRC_HEIGHT {{SRC_HEIGHT}}
-#define _ISRC_CHANNELS {{SRC_CHANNELS}}
-#define _IDST_WIDTH {{DST_WIDTH}}
-#define _IDST_HEIGHT {{DST_HEIGHT}}
-#define _IDST_CHANNELS {{DST_CHANNELS}}
-#define _IY_MULTIPLIER (_IWEI_WIDTH * _IWEI_HEIGHT)
-
- TILE(int, M0, 1, xi);
- TILE(int, M0, 1, yi);
-
- // Convert the linear index to coordinate
- LOOP_UNROLLING(int, i, 0, 1, M0,
- {
- xi[0].s[i] = ((g_ind_1 + i) % _IDST_WIDTH) * {{STRIDE_X}};
- yi[0].s[i] = ((g_ind_1 + i) / _IDST_WIDTH) * {{STRIDE_Y}};
- xi[0].s[i] -= {{PAD_LEFT}};
- yi[0].s[i] -= {{PAD_TOP}};
- })
-
- LOOP_UNROLLING(int, i, 0, 1, M0,
- {
- {{dst}}[i].v = 0;
- })
-
- for(int i = 0; i < (_IWEI_WIDTH * _IWEI_HEIGHT); ++i)
- {
- int xk = i % _IWEI_WIDTH;
- int yk = i / _IWEI_WIDTH;
-
- TILE(int, 1, M0, my);
-
- LOOP_UNROLLING(int, i, 0, 1, M0,
- {
- int x_s = xi[0].s[i] + xk;
- int y_s = yi[0].s[i] + yk;
- my[0].s[i] = x_s + y_s *_ISRC_WIDTH;
- my[0].s[i] = my[0].s[i] + g_ind_2 * (int)(_ISRC_WIDTH * _ISRC_HEIGHT);
- my[0].s[i] = select(-1, my[0].s[i], x_s >= 0);
- my[0].s[i] = select(-1, my[0].s[i], x_s < _ISRC_WIDTH);
- my[0].s[i] = select(-1, my[0].s[i], y_s >= 0);
- my[0].s[i] = select(-1, my[0].s[i], y_s < _ISRC_HEIGHT);
- })
-
- int ck = 0;
- for(; ck <= (_ISRC_CHANNELS - K0); ck += K0)
- {
- TILE({{SRC_DATA_TYPE}}, M0, K0, a);
- TILE({{WEI_DATA_TYPE}}, N0, K0, b);
-
- LOOP_UNROLLING(int, i, 0, 1, M0,
- {
- a[i].v = {{ZERO_VALUE}};
- })
-
- LOOP_UNROLLING(int, i, 0, 1, N0,
- {
- b[i].v = {{ZERO_VALUE}};
- })
-
- T_LOAD2D_INDIRECT({{SRC_DATA_TYPE}}, M0, K0, {{SRC_TENSOR_TYPE}}, {{src}}, ck, {{src}}_stride_y, my, a);
-
- T_LOAD({{WEI_DATA_TYPE}}, N0, K0, {{WEI_TENSOR_TYPE}}, {{weight}}, ck, g_ind_0 * _IY_MULTIPLIER + i, _IY_MULTIPLIER, {{weight}}_stride_y, b);
-
- T_MMUL({{SRC_DATA_TYPE}}, {{WEI_DATA_TYPE}}, {{ACC_DATA_TYPE}}, M0, N0, K0, NT, T, a, b, {{dst}});
- }
-)_";
-
- if (leftover_loop)
- {
- code += R"_(
- for(; ck < _ISRC_CHANNELS; ++ck)
- {
- TILE({{SRC_DATA_TYPE}}, M0, 1, a);
- TILE({{WEI_DATA_TYPE}}, N0, 1, b);
-
- LOOP_UNROLLING(int, i, 0, 1, M0,
- {
- a[i].v = {{ZERO_VALUE}};
- })
-
- LOOP_UNROLLING(int, i, 0, 1, N0,
- {
- b[i].v = {{ZERO_VALUE}};
- })
-
- T_LOAD2D_INDIRECT({{SRC_DATA_TYPE}}, M0, 1, {{SRC_TENSOR_TYPE}}, {{src}}, ck, {{src}}_stride_y, my, a);
-
- T_LOAD({{WEI_DATA_TYPE}}, N0, 1, BUFFER, {{weight}}, ck, g_ind_0 * _IY_MULTIPLIER + i, _IY_MULTIPLIER, {{weight}}_stride_y, b);
-
- T_MMUL({{SRC_DATA_TYPE}}, {{WEI_DATA_TYPE}}, {{ACC_DATA_TYPE}}, M0, N0, 1, NT, T, a, b, {{dst}});
- }
- )_";
- }
-
- code += R"_(
-#undef _I_WEI_WIDTH
-#undef _I_WEI_HEIGHT
-#undef _ISRC_WIDTH
-#undef _ISRC_HEIGHT
-#undef _ISRC_CHANNELS
-#undef _IDST_WIDTH
-#undef _IDST_HEIGHT
-#undef _IDST_CHANNELS
-#undef _IY_MULTIPLIER
-
- }
-)_";
-
- if (_bias && _bias->has_valid_id())
- {
- code += R"_(
- TILE({{BIA_DATA_TYPE}}, 1, N0, bias0);
-
- T_LOAD({{BIA_DATA_TYPE}}, 1, N0, BUFFER, {{bias}}, g_ind_0, 0, 1, 0, bias0);
-
- T_ELTWISE_BROADCAST_ADD_X({{ACC_DATA_TYPE}}, M0, N0, {{dst}}, bias0, {{dst}});
- )_";
- }
-
- code += R"_(
- LOOP_UNROLLING(int, i, 0, 1, M0,
- {
- g_dst_indirect_y[i].v = (uint)min(g_ind_1 + i, (int)({{DST_WIDTH}} * {{DST_HEIGHT}}) - 1);
- g_dst_indirect_y[i].v += g_ind_2 * (int)({{DST_WIDTH}} * {{DST_HEIGHT}});
- })
-}
-//------------------ END KERNEL {{meta_kernel_id}} ---------------------
-)_";
- return code;
-}
-
-void ClTemplateDirectConv2d::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
-{
- vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "src");
-
- const GpuKernelArgumentInfo::Type weight_type = _settings.export_to_cl_image()
- ? GpuKernelArgumentInfo::Type::Tensor_4D_t_Image
- : GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer;
- vtable.declare_variable(comp_group, _weight, GpuKernelArgumentInfo(weight_type), "weight");
-
- if (_bias && _bias->has_valid_id()) // optional bias
- {
- vtable.declare_variable(comp_group, _bias, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Vector), "bias");
- }
- vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(common_tensor_type), "dst");
-}
-
-TagLUT ClTemplateDirectConv2d::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
-{
- TagLUT lut{};
- // Arguments and global shared variables
- lut["src"] = vtable.get_variable(_src);
- lut["weight"] = vtable.get_variable(_weight);
-
- if (_bias && _bias->has_valid_id()) // optional bias
- {
- lut["bias"] = vtable.get_variable(_bias);
- lut["BIA_DATA_TYPE"] = get_cl_type_from_data_type(_bias->data_type());
- }
- lut["dst"] = vtable.get_variable(_dst);
-
- const auto dst_argument = vtable.get_variable(comp_group.get_any_dst_tensor());
- lut["arg_dst"] = dst_argument.uniq_name;
-
- // Local build options
- lut["meta_kernel_id"] = id();
- lut["ACC_DATA_TYPE"] = _src->data_type();
- lut["SRC_DATA_TYPE"] = _src->data_type();
- lut["WEI_DATA_TYPE"] = _weight->data_type();
-
- lut["SRC_TENSOR_TYPE"] = "BUFFER";
- switch (vtable.get_variable(_weight).kernel_argument_info.type)
- {
- case GpuKernelArgumentInfo::Type::Image_Export_To_ClImage2D:
- case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D:
- case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image:
- {
- lut["WEI_TENSOR_TYPE"] = "IMAGE";
- break;
- }
- default:
- {
- lut["WEI_TENSOR_TYPE"] = "BUFFER";
- break;
- }
- }
- const auto width_idx = 1;
- const auto height_idx = 2;
- const auto channel_idx = 0;
-
- lut["SRC_WIDTH"] = _src->dimension(width_idx);
- lut["SRC_HEIGHT"] = _src->dimension(height_idx);
- lut["SRC_CHANNELS"] = _src->dimension(channel_idx);
-
- lut["WEI_WIDTH"] = _weight->dimension(width_idx);
- lut["WEI_HEIGHT"] = _weight->dimension(height_idx);
-
- lut["DST_WIDTH"] = _dst->dimension(width_idx);
- lut["DST_HEIGHT"] = _dst->dimension(height_idx);
- lut["DST_CHANNELS"] = _dst->dimension(channel_idx);
-
- lut["STRIDE_X"] = _attributes.stride().x();
- lut["STRIDE_Y"] = _attributes.stride().y();
-
- lut["PAD_LEFT"] = _attributes.pad().left;
- lut["PAD_TOP"] = _attributes.pad().top;
-
- lut["ZERO_VALUE"] = 0;
-
- return lut;
-}
-
-CLBuildOptions ClTemplateDirectConv2d::get_build_options(const ComponentGroup &comp_group) const
-{
- const unsigned int channel_idx = get_data_layout_dimension_index(_src->data_layout(), DataLayoutDimension::CHANNEL);
-
- const auto root_window = comp_group.get_root_component()->template_writer()->get_window();
- const unsigned int n0 = root_window.x().step();
- const unsigned int m0 = root_window.y().step();
- const unsigned int k0 = adjust_vec_size(_settings.direct_conv_descriptor().k0, _src->dimension(channel_idx));
- const unsigned int partial_store_n0 = _dst->dimension(0) % n0;
-
- CLBuildOptions build_opts{};
- if (_settings.fast_relaxed_math())
- {
- build_opts.add_option("-cl-fast-relaxed-math");
- }
- else
- {
- // -cl-fast-relaxed-math also sets -cl-finite-math-only and -cl-unsafe-math-optimizations
- // to disable -cl-finite-math-only, we only include -cl-unsafe-math-optimizations
- build_opts.add_option("-cl-unsafe-math-optimizations");
- }
-
- build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
- build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
- build_opts.add_option("-DK0=" + support::cpp11::to_string(k0));
- build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0));
-
- return build_opts;
-}
-
-std::string ClTemplateDirectConv2d::get_config_id() const
-{
- const DataType data_type = _src->data_type();
- const DataLayout data_layout = _src->data_layout();
-
- const unsigned int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
- const unsigned int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
-
- const unsigned int kernel_size = _weight->dimension(width_idx);
-
- std::string config_id{};
- config_id += lower_string(string_from_data_type(data_type));
- config_id += "_";
- config_id += support::cpp11::to_string(kernel_size);
- config_id += "_";
- config_id += support::cpp11::to_string(_attributes.stride().x());
- config_id += "_";
- config_id += support::cpp11::to_string(_attributes.stride().y());
- config_id += "_";
- config_id += support::cpp11::to_string(_dst->dimension(width_idx));
- config_id += "_";
- config_id += support::cpp11::to_string(_dst->dimension(height_idx));
- config_id += "_";
- config_id += lower_string(string_from_data_layout(data_layout));
- return config_id;
-}
-
-std::set<std::string> ClTemplateDirectConv2d::get_headers_list() const
-{
- return std::set<std::string>{"helpers.h", "tile_helpers.h"};
-}
-
-Window ClTemplateDirectConv2d::get_window() const
-{
- ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
-
- const auto output_shape = _dst->tensor_shape();
- const auto desc = _settings.direct_conv_descriptor();
-
- const unsigned int n0 = adjust_vec_size(desc.n0, output_shape[0]);
- const unsigned int m0 = adjust_vec_size(desc.m0, output_shape[1] * output_shape[2]);
-
- // Create and configure kernel window
- Window win = calculate_max_window(output_shape, Steps(n0, m0));
-
- const size_t dim_y_collapsed = ceil_to_multiple(output_shape[1] * output_shape[2], m0);
- win.set(Window::DimY, Window::Dimension(0, dim_y_collapsed, m0));
- win.set(Window::DimZ, Window::Dimension(0, output_shape.total_size_upper(3), 1));
-
- return win;
-}
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h
deleted file mode 100644
index 03c8cd2f15..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (c) 2022-2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDIRECTCONV2D
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDIRECTCONV2D
-
-#include "arm_compute/core/experimental/Types.h"
-#include "arm_compute/dynamic_fusion/sketch/attributes/Conv2dAttributes.h"
-
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-class ClTemplateDirectConv2d final : public IGpuTemplateComponentWriter
-{
-public:
- using Attributes = ClComponentDirectConv2d::Attributes;
- using Settings = ClComponentDirectConv2d::Settings;
- /** Constructor
- *
- * Similar to @ref ClComponentDirectConv2d::validate()
- *
- * @param[in] id Component id
- * @param[in] tensors Tensor arguments to the components
- * @param[in] attributes Component attributes
- * @param[in] settings Component settings
- */
- ClTemplateDirectConv2d(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes,
- const Settings &settings);
- /** Destructor */
- ~ClTemplateDirectConv2d() override = default;
- /** Prevent instances of this class from being copy constructed */
- ClTemplateDirectConv2d(const ClTemplateDirectConv2d &direct_conv2d) = delete;
- /** Prevent instances of this class from being copied */
- ClTemplateDirectConv2d &operator=(const ClTemplateDirectConv2d &direct_conv2d) = delete;
- /** Allow instances of this class to be move constructed */
- ClTemplateDirectConv2d(ClTemplateDirectConv2d &&direct_conv2d) = default;
- /** Allow instances of this class to be moved */
- ClTemplateDirectConv2d &operator=(ClTemplateDirectConv2d &&direct_conv2d) = default;
- /** Generate kernel component name */
- std::string get_name() const override;
- /** Generate kernel component code template
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return std::string Component code
- */
- std::string get_component_code(const ComponentGroup &comp_group) const override;
- /** Declare all variables used by the component in the @p vtable
- *
- * @param[out] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- */
- void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
- /** Generate the tag look-up table used to instantiate the component code.
- *
- * @param[in] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return TagLUT Tag lookup table
- */
- TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
- /** Generate the build options used in the component
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return CLBuildOptions Build options
- */
- CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override;
- /** Generate the component config id string used for tuning */
- std::string get_config_id() const override;
- /** Generate the header list used in the component */
- std::set<std::string> get_headers_list() const override;
- /** Generate the execution window for the component */
- Window get_window() const override;
-
-private:
- const ITensorInfo *_src;
- const ITensorInfo *_weight;
- const ITensorInfo *_bias;
- const ITensorInfo *_dst;
- Attributes _attributes;
- Settings _settings;
-};
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDIRECTCONV2D */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp
deleted file mode 100644
index 78bff3c3f3..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp
+++ /dev/null
@@ -1,274 +0,0 @@
-/*
- * Copyright (c) 2022-2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "ClTemplateElementwiseBinary.h"
-
-#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/core/utils/StringUtils.h"
-
-#include "src/core/helpers/WindowHelpers.h"
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
-#include "support/StringSupport.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-constexpr unsigned int vector_size_byte_opencl = 16;
-
-ClTemplateElementwiseBinary::ClTemplateElementwiseBinary(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes)
- : IGpuTemplateComponentWriter{id, tensors}, _lhs{}, _rhs{}, _dst{}, _attributes{attributes}
-{
- _lhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
- _rhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_1);
- _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
- ARM_COMPUTE_ERROR_ON_NULLPTR(_lhs, _rhs, _dst);
-}
-
-std::string ClTemplateElementwiseBinary::get_name() const
-{
- return "elementwise_binary";
-}
-
-std::string ClTemplateElementwiseBinary::get_component_code(const ComponentGroup &comp_group) const
-{
- std::string code;
- const bool is_root = (comp_group.get_root_component()->id() == this->id());
- const bool is_lhs_input = comp_group.is_input_tensor(_lhs);
- const bool is_rhs_input = comp_group.is_input_tensor(_rhs);
-
- code =
- R"_(
- //------------------ START KERNEL {{meta_kernel_id}} {{ELTWISE_OP}} ---------------------
-)_";
-
- if (is_root)
- {
- code +=
- R"_(
- TILE(uint, M0, 1, g_dst_indirect_y);
-)_";
- }
-
- if (is_lhs_input)
- {
- code +=
- R"_(
- TILE({{DATA_TYPE}}, {{lhs_m0}}, N0, {{lhs}});
-)_";
- }
-
- if (is_rhs_input)
- {
- code +=
- R"_(
- TILE({{DATA_TYPE}}, {{rhs_m0}}, N0, {{rhs}});
-)_";
- }
-
- code +=
- R"_(
- {
-)_";
-
- if (is_lhs_input)
- {
- code +=
- R"_(
- {{lhs}}_offset_first_element_in_bytes += g_ind_2 * {{lhs}}_stride_w;
- T_LOAD({{DATA_TYPE}}, {{lhs_m0}}, {{lhs_n0}}, BUFFER, {{lhs}}, {{lhs_start_ind_0}}, {{lhs_start_ind_1}}, 1, {{lhs}}_stride_y, {{lhs}});
-)_";
- }
-
- if (is_rhs_input)
- {
- code +=
- R"_(
- {{rhs}}_offset_first_element_in_bytes += g_ind_2 * {{rhs}}_stride_w;
- T_LOAD({{DATA_TYPE}}, {{rhs_m0}}, {{rhs_n0}}, BUFFER, {{rhs}}, {{rhs_start_ind_0}}, {{rhs_start_ind_1}}, 1, {{rhs}}_stride_y, {{rhs}});
-)_";
- }
-
- code +=
- R"_(
- T_ELTWISE_{{BROADCAST_OP}}{{ELTWISE_OP}}({{DATA_TYPE}}, M0, N0, {{lhs}}, {{rhs}}, {{dst}});
-)_";
-
- if (is_root)
- {
- // Calculate the destination indirect Y
- code +=
- R"_(
- LOOP_UNROLLING(int, i, 0, 1, M0,
- {
- g_dst_indirect_y[i].v = (uint)min(g_ind_1 + i, (int)({{arg_dst}}_w * {{arg_dst}}_h) - 1);
- g_dst_indirect_y[i].v += g_ind_2 * (int)({{arg_dst}}_w * {{arg_dst}}_h);
- })
-)_";
- }
-
- code +=
- R"_(
- }
- //------------------ END KERNEL {{meta_kernel_id}} {{ELTWISE_OP}} ---------------------
-)_";
-
- return code;
-}
-
-void ClTemplateElementwiseBinary::declare_variables(GpuKernelVariableTable &vtable,
- const ComponentGroup &comp_group) const
-{
- vtable.declare_variable(comp_group, _lhs, GpuKernelArgumentInfo(common_tensor_type), "lhs");
-
- vtable.declare_variable(comp_group, _rhs, GpuKernelArgumentInfo(common_tensor_type), "rhs");
-
- vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(common_tensor_type), "dst");
-}
-
-TagLUT ClTemplateElementwiseBinary::get_tag_lut(const GpuKernelVariableTable &vtable,
- const ComponentGroup &comp_group) const
-{
- TagLUT lut{};
-
- // Local build options
- lut["meta_kernel_id"] = id();
- lut["DATA_TYPE"] = get_cl_type_from_data_type(_lhs->data_type());
- // Arguments and global shared variables
-
- lut["lhs"] = vtable.get_variable(_lhs);
- lut["rhs"] = vtable.get_variable(_rhs);
- lut["dst"] = vtable.get_variable(_dst);
- lut["arg_dst"] = vtable.get_variable(comp_group.get_any_dst_tensor());
-
- switch (_attributes.operation())
- {
- case Attributes::ElementwiseOp::Add:
- lut["ELTWISE_OP"] = "ADD";
- break;
- case Attributes::ElementwiseOp::Sub:
- lut["ELTWISE_OP"] = "SUB";
- break;
- case Attributes::ElementwiseOp::Mul:
- lut["ELTWISE_OP"] = "MUL";
- break;
- default:
- ARM_COMPUTE_ERROR("Arithmetic Operation not supported");
- }
-
- ARM_COMPUTE_ERROR_ON(comp_group.is_intermediate_tensor(_lhs) &&
- detail::have_different_dimensions(_lhs->tensor_shape(), _dst->tensor_shape(), 0));
- ARM_COMPUTE_ERROR_ON(comp_group.is_intermediate_tensor(_rhs) &&
- detail::have_different_dimensions(_rhs->tensor_shape(), _dst->tensor_shape(), 0));
-
- // Set broadcast parameters
- // PRE: All tensors are broadcast-compatible
- const auto &lhs_dims = _lhs->tensor_shape();
- const auto &rhs_dims = _rhs->tensor_shape();
- const auto &dst_dims = _dst->tensor_shape();
-
- const auto lhs_broadcast_x = dst_dims[0] != 1 && lhs_dims[0] == 1;
- const auto rhs_broadcast_x = dst_dims[0] != 1 && rhs_dims[0] == 1;
- const auto lhs_broadcast_y = dst_dims[1] != 1 && lhs_dims[1] == 1;
- const auto rhs_broadcast_y = dst_dims[1] != 1 && rhs_dims[1] == 1;
- const auto lhs_broadcast_z = dst_dims[2] != 1 && lhs_dims[2] == 1;
- const auto rhs_broadcast_z = dst_dims[2] != 1 && rhs_dims[2] == 1;
-
- const auto lhs_broadcast_yz = lhs_broadcast_y && lhs_broadcast_z;
- const auto rhs_broadcast_yz = rhs_broadcast_y && rhs_broadcast_z;
-
- lut["lhs_n0"] = (lhs_broadcast_x) ? "1" : "N0";
- lut["lhs_start_ind_0"] = (lhs_broadcast_x) ? "0" : "g_ind_0";
- lut["rhs_n0"] = (rhs_broadcast_x) ? "1" : "N0";
- lut["rhs_start_ind_0"] = (rhs_broadcast_x) ? "0" : "g_ind_0";
-
- lut["lhs_m0"] = (lhs_broadcast_yz) ? "1" : "M0";
- lut["lhs_start_ind_1"] = (lhs_broadcast_yz) ? "0" : "g_ind_1";
- lut["rhs_m0"] = (rhs_broadcast_yz) ? "1" : "M0";
- lut["rhs_start_ind_1"] = (rhs_broadcast_yz) ? "0" : "g_ind_1";
-
- lut["BROADCAST_OP"] = (lhs_broadcast_yz) ? "BROADCAST_LHS_X_" : (rhs_broadcast_yz) ? "BROADCAST_RHS_X_" : "";
-
- return lut;
-}
-
-CLBuildOptions ClTemplateElementwiseBinary::get_build_options(const ComponentGroup &comp_group) const
-{
- CLBuildOptions build_opts{};
- /// NOTE: For now tile sizes (n0, m0) are set by the execution window. This may change in the future
- const auto root_window = comp_group.get_root_component()->template_writer()->get_window();
- const unsigned int n0 = root_window.x().step();
- const unsigned int m0 = root_window.y().step();
- const unsigned int partial_store_n0 = _dst->dimension(0) % n0;
-
- build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
- build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
- build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(_lhs->data_type()));
- build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0));
-
- return build_opts;
-}
-
-std::string ClTemplateElementwiseBinary::get_config_id() const
-{
- std::string config_id{};
- config_id += lower_string(string_from_data_type(_dst->data_type()));
- config_id += "_";
- config_id += support::cpp11::to_string(_dst->dimension(0));
- config_id += "_";
- config_id += support::cpp11::to_string(_dst->dimension(1));
- config_id += "_";
- config_id += lower_string(string_from_data_layout(_dst->data_layout()));
-
- return config_id;
-}
-
-std::set<std::string> ClTemplateElementwiseBinary::get_headers_list() const
-{
- return std::set<std::string>{"helpers.h", "tile_helpers.h"};
-}
-
-Window ClTemplateElementwiseBinary::get_window() const
-{
- ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
-
- TensorShape output_shape = _dst->tensor_shape();
- // Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) and upper dimensions unchanged
- // This is in line with the collapsing convention used by operators like Conv2d
- output_shape.collapse(2U, 1U);
- const unsigned int num_elems_processed_per_iteration =
- adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
- Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
-
- return win;
-}
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h
deleted file mode 100644
index 991c0eca44..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Copyright (c) 2022-2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEELEMENTWISEBINARY
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEELEMENTWISEBINARY
-
-#include "arm_compute/core/experimental/Types.h"
-
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-class ClTemplateElementwiseBinary final : public IGpuTemplateComponentWriter
-{
-public:
- using Attributes = ClComponentElementwiseBinary::Attributes;
-
- /** Constructor
- *
- * Similar to @ref ClComponentElementwiseBinary::validate()
- *
- * @param[in] id Component id
- * @param[in] tensors Tensor arguments to the components
- * @param[in] attributes Component attributes
- */
- ClTemplateElementwiseBinary(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
- /** Prevent instances of this class from being copy constructed */
- ClTemplateElementwiseBinary(const ClTemplateElementwiseBinary &elementwise) = delete;
- /** Prevent instances of this class from being copied */
- ClTemplateElementwiseBinary &operator=(const ClTemplateElementwiseBinary &elementwise) = delete;
- /** Allow instances of this class to be move constructed */
- ClTemplateElementwiseBinary(ClTemplateElementwiseBinary &&elementwise) = default;
- /** Allow instances of this class to be moved */
- ClTemplateElementwiseBinary &operator=(ClTemplateElementwiseBinary &&elementwise) = default;
-
- /** Generate kernel component name */
- std::string get_name() const override;
-
- /** Generate kernel component code template
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return std::string Component code
- */
- std::string get_component_code(const ComponentGroup &comp_group) const override;
-
- /** Declare all variables used by the component in the @p vtable
- *
- * @param[out] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- */
- void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
-
- /** Generate the tag look-up table used to instantiate the component code.
- *
- * @param[in] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return TagLUT Tag lookup table
- */
- TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
-
- /** Generate the build options used in the component
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return CLBuildOptions Build options
- */
- CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override;
-
- /** Generate the component config id string used for tuning */
- std::string get_config_id() const override;
-
- /** Generate the header list used in the component */
- std::set<std::string> get_headers_list() const override;
-
- /** Generate the execution window for the component */
- Window get_window() const override;
-
-private:
- const ITensorInfo *_lhs;
- const ITensorInfo *_rhs;
- const ITensorInfo *_dst;
- Attributes _attributes;
-};
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEELEMENTWISEBINARY */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp
deleted file mode 100644
index 522c33a022..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp
+++ /dev/null
@@ -1,267 +0,0 @@
-/*
- * Copyright (c) 2022-2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h"
-
-#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
-#include "arm_compute/core/utils/StringUtils.h"
-
-#include "src/core/helpers/WindowHelpers.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
-#include "support/StringSupport.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-namespace
-{
-constexpr unsigned int serial_vector_size = 8;
-} // namespace
-ClTemplateLogits1DMaxShiftExpSum::ClTemplateLogits1DMaxShiftExpSum(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes)
- : IGpuTemplateComponentWriter{id, tensors}, _src{}, _sum{}, _dst{}, _attributes{attributes}
-{
- _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
- _sum = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
- _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_1);
- ARM_COMPUTE_ERROR_ON_NULLPTR(_src);
- ARM_COMPUTE_ERROR_ON_NULLPTR(_sum);
- ARM_COMPUTE_ERROR_ON_NULLPTR(_dst);
-}
-
-std::string ClTemplateLogits1DMaxShiftExpSum::get_name() const
-{
- return "logits_1d_max_shift_exp_sum";
-}
-
-std::string ClTemplateLogits1DMaxShiftExpSum::get_component_code(const ComponentGroup &comp_group) const
-{
- ARM_COMPUTE_UNUSED(comp_group);
-
- std::string code = R"_(
-//------------------ START KERNEL {{meta_kernel_id}} ---------------------
-#define VEC_TYPE VEC_DATA_TYPE({{DATA_TYPE}}, N0)
-#define SELECT_TYPE SELECT_VEC_DATA_TYPE({{DATA_TYPE}}, N0)
-{
- __global uchar *src_addr = {{src}}_ptr + {{src}}_offset_first_element_in_bytes + g_ind_1 * {{src}}_stride_y + g_ind_2 * {{src}}_stride_z;
- __global uchar *dst_addr = {{dst}}_ptr + {{dst}}_offset_first_element_in_bytes + g_ind_1 * {{dst}}_stride_y + g_ind_2 * {{dst}}_stride_z;
- Image sum = CONVERT_TENSOR3D_TO_IMAGE_STRUCT({{sum}});
- VEC_TYPE max_val_vec = (VEC_TYPE)({{MINVAL}});
-)_";
-
- const bool beta_defined = (_attributes.beta() != 1.f);
-
- if (beta_defined)
- {
- code += R"_(
- VEC_TYPE beta = (VEC_TYPE){{BETA}};
-)_";
- }
-
- constexpr unsigned int _serial_vector_size = 8;
- const unsigned int reduction_dim_size = _src->dimension(0);
- const unsigned int vector_size = adjust_vec_size(_serial_vector_size, reduction_dim_size);
- const bool non_multiple_of_n0 = ((reduction_dim_size % vector_size) != 0);
-
- if (non_multiple_of_n0)
- {
- code += R"_(
- VEC_TYPE data = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)src_addr);
- SELECT_TYPE widx = (SELECT_TYPE)PARTIAL_N0 > VEC_OFFS(SELECT_DATA_TYPE({{DATA_TYPE}}), N0);
- max_val_vec = max(max_val_vec, select((VEC_TYPE)({{MINVAL}}), data, widx));
-)_";
- }
-
- code += R"_(
- for(uint i = PARTIAL_N0; i < {{SRC_WIDTH}}; i += N0)
- {
- VEC_TYPE data = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(src_addr + i * sizeof({{DATA_TYPE}})));
- max_val_vec = max(data, max_val_vec);
- }
-
- {{DATA_TYPE}} max_val = MAX_REDUCE(max_val_vec, N0);
- VEC_TYPE sum1D = 0;
-)_";
-
- if (non_multiple_of_n0)
- {
- code += R"_(
- data -= max_val;
-)_";
- if (beta_defined)
- {
- code += R"_(
- data *= beta;
-)_";
- }
-
- if (_attributes.is_log_softmax())
- {
- code += R"_(
- VSTORE_PARTIAL(N0, PARTIAL_N0)
- (data, 0, (__global {{DATA_TYPE}} *)dst_addr);
- data = exp(data);
- data = select(0, data, widx);
-)_";
- }
- else
- {
- code += R"_(
- data = exp(data);
- data = select(0, data, widx);
- VSTORE_PARTIAL(N0, PARTIAL_N0)
- (data, 0, (__global {{DATA_TYPE}} *)dst_addr);
-)_";
- }
-
- code += R"_(
- sum1D += data;
-)_";
- }
- code += R"_(
- for(uint i = PARTIAL_N0; i < {{SRC_WIDTH}}; i += N0)
- {
- VEC_TYPE data = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(src_addr + i * sizeof({{DATA_TYPE}})));
- data -= max_val;
-)_";
-
- if (beta_defined)
- {
- code += R"_(
- data *= beta;
-)_";
- }
-
- if (_attributes.is_log_softmax())
- {
- code += R"_(
- VSTORE(N0)
- (data, 0, (__global {{DATA_TYPE}} *)(dst_addr + i * sizeof({{DATA_TYPE}})));
- data = exp(data);
-)_";
- }
- else
- {
- code += R"_(
- data = exp(data);
- VSTORE(N0)
- (data, 0, (__global {{DATA_TYPE}} *)(dst_addr + i * sizeof({{DATA_TYPE}})));
-)_";
- }
-
- code += R"_(
- sum1D += data;
- }
-)_";
-
- code += R"_(
- *((__global {{DATA_TYPE}} *)sum.ptr) = SUM_REDUCE(sum1D, N0);
-}
-//------------------ END KERNEL {{meta_kernel_id}} ---------------------
-)_";
-
- return code;
-}
-
-void ClTemplateLogits1DMaxShiftExpSum::declare_variables(GpuKernelVariableTable &vtable,
- const ComponentGroup &comp_group) const
-{
- vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "src");
-
- vtable.declare_variable(comp_group, _sum, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "sum");
-
- vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "dst");
-}
-
-TagLUT ClTemplateLogits1DMaxShiftExpSum::get_tag_lut(const GpuKernelVariableTable &vtable,
- const ComponentGroup &comp_group) const
-{
- ARM_COMPUTE_UNUSED(comp_group);
-
- TagLUT lut{};
-
- // Arguments and global shared variables
- lut["src"] = vtable.get_variable(_src);
- lut["sum"] = vtable.get_variable(_sum);
- lut["dst"] = vtable.get_variable(_dst);
-
- // Local build options
- lut["meta_kernel_id"] = id();
-
- const DataType data_type = _src->data_type();
-
- lut["DATA_TYPE"] = get_cl_type_from_data_type(data_type);
- lut["BETA"] = float_to_string_with_full_precision(_attributes.beta());
- lut["MINVAL"] = (data_type == DataType::F16) ? std::string("-HALF_MAX") : std::string("-FLT_MAX");
- lut["SRC_WIDTH"] = support::cpp11::to_string(_src->dimension(0));
-
- return lut;
-}
-
-CLBuildOptions ClTemplateLogits1DMaxShiftExpSum::get_build_options(const ComponentGroup &comp_group) const
-{
- ARM_COMPUTE_UNUSED(comp_group);
- CLBuildOptions build_opts{};
-
- const unsigned int reduction_dim_size = _src->dimension(0);
- const unsigned int vector_size = adjust_vec_size(serial_vector_size, reduction_dim_size);
-
- build_opts.add_option("-DN0=" + support::cpp11::to_string(vector_size));
- build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string((reduction_dim_size % vector_size)));
-
- return build_opts;
-}
-
-std::string ClTemplateLogits1DMaxShiftExpSum::get_config_id() const
-{
- std::string config_id = get_name();
-
- config_id += "_";
- config_id += support::cpp11::to_string(_src->dimension(0));
- config_id += "_";
- config_id += string_from_data_type(_src->data_type());
-
- return config_id;
-}
-
-std::set<std::string> ClTemplateLogits1DMaxShiftExpSum::get_headers_list() const
-{
- return std::set<std::string>{"helpers.h", "tile_helpers.h"};
-}
-
-Window ClTemplateLogits1DMaxShiftExpSum::get_window() const
-{
- ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
-
- Window win = calculate_max_window(*_dst, Steps(_src->dimension(0)));
- return win.collapse(win, Window::DimZ);
-}
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h
deleted file mode 100644
index ac9ddaa9d4..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATELOGITS1DMAXSHIFTEXPSUM
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATELOGITS1DMAXSHIFTEXPSUM
-
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-class ClTemplateLogits1DMaxShiftExpSum final : public IGpuTemplateComponentWriter
-{
-public:
- using Attributes = ClComponentLogits1DMaxShiftExpSum::Attributes;
-
- /** Constructor
- *
- * @param[in] id Component id
- * @param[in] tensors Tensor arguments to the components
- * @param[in] attributes Component attributes
- */
- ClTemplateLogits1DMaxShiftExpSum(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes);
- /** Prevent instances of this class from being copy constructed */
- ClTemplateLogits1DMaxShiftExpSum(const ClTemplateLogits1DMaxShiftExpSum &) = delete;
- /** Prevent instances of this class from being copied */
- ClTemplateLogits1DMaxShiftExpSum &operator=(const ClTemplateLogits1DMaxShiftExpSum &) = delete;
- /** Allow instances of this class to be move constructed */
- ClTemplateLogits1DMaxShiftExpSum(ClTemplateLogits1DMaxShiftExpSum &&) = default;
- /** Allow instances of this class to be moved */
- ClTemplateLogits1DMaxShiftExpSum &operator=(ClTemplateLogits1DMaxShiftExpSum &&) = default;
- /** Generate kernel component name */
- std::string get_name() const override;
- /** Generate kernel component code template
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return std::string Component code
- */
- std::string get_component_code(const ComponentGroup &comp_group) const override;
- /** Declare all variables used by the component in the @p vtable
- *
- * @param[out] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- */
- void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
- /** Generate the tag look-up table used to instantiate the component code.
- *
- * @param[in] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return TagLUT Tag lookup table
- */
- TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
- /** Generate the build options used in the component
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return CLBuildOptions Build options
- */
- CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override;
- /** Generate the component config id string used for tuning */
- std::string get_config_id() const override;
- /** Generate the header list used in the component */
- std::set<std::string> get_headers_list() const override;
- /** Generate the execution window for the component */
- Window get_window() const override;
-
-private:
- const ITensorInfo *_src; // input
- const ITensorInfo *_sum; // exponentiated and summed input
- const ITensorInfo *_dst; // exponentiated input
- Attributes _attributes;
-};
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATELOGITS1DMAXSHIFTEXPSUM */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp
deleted file mode 100644
index 7d7c3e6673..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.h"
-
-#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
-
-#include "src/core/helpers/WindowHelpers.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
-#include "support/StringSupport.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-ClTemplateLogits1DNorm::ClTemplateLogits1DNorm(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes)
- : IGpuTemplateComponentWriter{id, tensors}, _src{}, _sum{}, _dst{}, _attributes{attributes}
-{
- _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
- _sum = this->tensors().get_const_tensor(TensorType::ACL_SRC_1);
- _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
- ARM_COMPUTE_ERROR_ON_NULLPTR(_src);
- ARM_COMPUTE_ERROR_ON_NULLPTR(_sum);
- ARM_COMPUTE_ERROR_ON_NULLPTR(_dst);
-}
-
-std::string ClTemplateLogits1DNorm::get_name() const
-{
- return "logits_1d_norm";
-}
-
-std::string ClTemplateLogits1DNorm::get_component_code(const ComponentGroup &comp_group) const
-{
- ARM_COMPUTE_UNUSED(comp_group);
-
- std::string code = R"_(
-//------------------ START KERNEL {{meta_kernel_id}} ---------------------
-{
- const int x_offs = g_ind_0 * sizeof({{DATA_TYPE}});
- __global uchar *src_addr = {{src}}_ptr + {{src}}_offset_first_element_in_bytes + x_offs + g_ind_1 * {{src}}_stride_y + g_ind_2 * {{src}}_stride_z;
- __global uchar *dst_addr = {{dst}}_ptr + {{dst}}_offset_first_element_in_bytes + x_offs + g_ind_1 * {{dst}}_stride_y + g_ind_2 * {{dst}}_stride_z;
- Image sum = CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP({{sum}});
-)_";
- // Load max value of 1D logits vector (row)
- code += R"_(
- {{DATA_TYPE}} sum_val = *((__global {{DATA_TYPE}} *)offset(&sum, 0, g_ind_1));
- VEC_DATA_TYPE({{DATA_TYPE}}, N0)
- data0 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)src_addr);
-)_";
-
- if (_attributes.is_log_softmax())
- {
- code += R"_(
- sum_val = log(sum_val);
- data0 -= sum_val;
-)_";
- }
- else
- {
- code += R"_(
- data0 /= sum_val;
-)_";
- }
-
- code += R"_(
- STORE_VECTOR_SELECT(data, {{DATA_TYPE}}, dst_addr, N0, PARTIAL_N0, PARTIAL_N0 != 0 && g_ind_0 == 0);
-}
-//------------------ END KERNEL {{meta_kernel_id}} ---------------------
-)_";
-
- return code;
-}
-
-void ClTemplateLogits1DNorm::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
-{
- vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "src");
-
- vtable.declare_variable(comp_group, _sum, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "sum");
-
- vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "dst");
-}
-
-TagLUT ClTemplateLogits1DNorm::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
-{
- ARM_COMPUTE_UNUSED(comp_group);
-
- TagLUT lut{};
-
- // Arguments and global shared variables
- lut["src"] = vtable.get_variable(_src);
- lut["sum"] = vtable.get_variable(_sum);
- lut["dst"] = vtable.get_variable(_dst);
-
- // Local build options
- lut["meta_kernel_id"] = id();
-
- const DataType data_type = _src->data_type();
-
- lut["DATA_TYPE"] = get_cl_type_from_data_type(data_type);
-
- return lut;
-}
-
-CLBuildOptions ClTemplateLogits1DNorm::get_build_options(const ComponentGroup &comp_group) const
-{
- ARM_COMPUTE_UNUSED(comp_group);
- CLBuildOptions build_opts{};
-
- const auto root_window = comp_group.get_root_component()->template_writer()->get_window();
- const unsigned int n0 = root_window.x().step();
- build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
- build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string((_src->dimension(0) % n0)));
-
- return build_opts;
-}
-
-std::string ClTemplateLogits1DNorm::get_config_id() const
-{
- std::string config_id = get_name();
-
- config_id += "_";
- config_id += support::cpp11::to_string(_src->dimension(0));
- config_id += "_";
- config_id += string_from_data_type(_src->data_type());
-
- return config_id;
-}
-
-std::set<std::string> ClTemplateLogits1DNorm::get_headers_list() const
-{
- return std::set<std::string>{"helpers.h", "tile_helpers.h"};
-}
-
-Window ClTemplateLogits1DNorm::get_window() const
-{
- ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
- constexpr unsigned int serial_vector_size = 16;
- const unsigned int vector_size = adjust_vec_size(serial_vector_size, _src->dimension(0));
-
- Window win = calculate_max_window(*_src, Steps(vector_size));
- return win.collapse(win, Window::DimZ);
-}
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.h
deleted file mode 100644
index 5a74be5842..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATELOGITS1DNORM
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATELOGITS1DNORM
-
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-class ClTemplateLogits1DNorm final : public IGpuTemplateComponentWriter
-{
-public:
- using Attributes = ClComponentLogits1DNorm::Attributes;
-
- /** Constructor
- *
- * @param[in] id Component id
- * @param[in] tensors Tensor arguments to the components
- * @param[in] attributes Component attributes
- */
- ClTemplateLogits1DNorm(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
- /** Prevent instances of this class from being copy constructed */
- ClTemplateLogits1DNorm(const ClTemplateLogits1DNorm &) = delete;
- /** Prevent instances of this class from being copied */
- ClTemplateLogits1DNorm &operator=(const ClTemplateLogits1DNorm &) = delete;
- /** Allow instances of this class to be move constructed */
- ClTemplateLogits1DNorm(ClTemplateLogits1DNorm &&) = default;
- /** Allow instances of this class to be moved */
- ClTemplateLogits1DNorm &operator=(ClTemplateLogits1DNorm &&) = default;
- /** Generate kernel component name */
- std::string get_name() const override;
- /** Generate kernel component code template
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return std::string Component code
- */
- std::string get_component_code(const ComponentGroup &comp_group) const override;
- /** Declare all variables used by the component in the @p vtable
- *
- * @param[out] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- */
- void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
- /** Generate the tag look-up table used to instantiate the component code.
- *
- * @param[in] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return TagLUT Tag lookup table
- */
- TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
- /** Generate the build options used in the component
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return CLBuildOptions Build options
- */
- CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override;
- /** Generate the component config id string used for tuning */
- std::string get_config_id() const override;
- /** Generate the header list used in the component */
- std::set<std::string> get_headers_list() const override;
- /** Generate the execution window for the component */
- Window get_window() const override;
-
-private:
- const ITensorInfo *_src; // exponentiated input
- const ITensorInfo *_sum; // exponentiated and summed input
- const ITensorInfo *_dst; // normalization of input with _sum
-
- Attributes _attributes;
-};
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATELOGITS1DNORM */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp
deleted file mode 100644
index 8936db6abe..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp
+++ /dev/null
@@ -1,470 +0,0 @@
-/*
- * Copyright (c) 2023-2024 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "ClTemplatePool2d.h"
-
-#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/core/utils/StringUtils.h"
-
-#include "src/core/helpers/WindowHelpers.h"
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
-#include "support/StringSupport.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-namespace
-{
-// Shape indexes for NHWC Datalayout
-constexpr static int32_t height_idx = 2;
-constexpr static int32_t width_idx = 1;
-constexpr static int32_t channel_idx = 0;
-} // namespace
-ClTemplatePool2d::ClTemplatePool2d(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes,
- const Settings &settings)
- : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}, _attributes{attributes}, _settings{settings}
-{
- _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
- _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
- ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
-}
-
-std::string ClTemplatePool2d::get_name() const
-{
- return "pool2d";
-}
-
-std::string ClTemplatePool2d::get_component_code(const ComponentGroup &comp_group) const
-{
- ARM_COMPUTE_UNUSED(comp_group);
-
- // Condition to use 2x2 optimized kernel
- if (_attributes.pool_size() == Size2D(2, 2))
- {
- return get_2x2_kernel_code();
- }
- else
- {
- return get_MxN_kernel_code();
- }
-}
-
-std::string ClTemplatePool2d::get_MxN_kernel_code() const
-{
- const auto pool_type = _attributes.pool_type();
- const bool fp_mixed_precision = (_src->data_type() == DataType::F16) && pool_type != PoolingType::MAX;
-
- // Define pool op macro.
- std::string pool_op = (pool_type == PoolingType::AVG) ? R"_(#define POOL_OP(x,y) ((x) + (y)))_"
- : R"_(#define POOL_OP(x,y) (fmax((x), (y))) )_";
-
- // Kernel start
- // Note: If C is not multiple of N0, we shift back of PARTIAL_N0 elements to compute the leftover elements for get_global_id(0) == 0
- // Note: If C is less than N0, N0 should be SHRINKED to the closest smaller N0. This operation is performed on the host side
- std::string code = R"_(
-//------------------ START KERNEL {{meta_kernel_id}} ---------------------
-// IN_0(src) {{src}}
-// OUT(dst, accum) {{dst}}
-
-{
- const int idx_out_c = g_ind_0;
- const int idx_out_w = g_ind_1;
-)_";
-
- // Add macro for POOL_OP
- code += "\n" + pool_op + "\n";
-
- code += R"_(
- const int idx_out_h = g_ind_2 % {{DST_HEIGHT}};
- const int idx_out_n = g_ind_2 / {{DST_HEIGHT}};
-)_";
-
- // Define common variables.
- code += R"_(
- __global unsigned char *in_base_ptr = {{src}}_ptr + {{src}}_offset_first_element_in_bytes + idx_out_c * sizeof({{DATA_TYPE}}) + idx_out_n * {{src}}_stride_w;
-
- __global unsigned char *out_base_ptr = {{dst}}_ptr + {{dst}}_offset_first_element_in_bytes + idx_out_c * sizeof({{DATA_TYPE}}) + idx_out_w * {{dst}}_stride_y + idx_out_h * {{dst}}_stride_z + idx_out_n * {{dst}}_stride_w;
-
- VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)
- res0 = {{INITIAL_VALUE}};
-
- const int idx_in_w = idx_out_w * {{STRIDE_X}} - {{PAD_X}};
- const int idx_in_h = idx_out_h * {{STRIDE_Y}} - {{PAD_Y}};
-
- const int pool_x_s = max((int)0, -idx_in_w);
- const int pool_x_e = min((int){{POOL_SIZE_X}}, (int){{SRC_WIDTH}} - idx_in_w);
- const int pool_y_s = max((int)0, -idx_in_h);
- const int pool_y_e = min((int){{POOL_SIZE_Y}}, (int){{SRC_HEIGHT}} - idx_in_h);
-)_";
-
- // Determine filter size depending on if padding is excluded or not
- if (_attributes.exclude_padding())
- {
- code += R"_(
- const int filter_size = (pool_y_e - pool_y_s) * (pool_x_e - pool_x_s);
-)_";
- }
- else
- {
- code += R"_(
- const int filter_size = {{POOL_SIZE_X}} * {{POOL_SIZE_Y}};
-)_";
- }
-
- // Loop through pool size
- // if global pooling
- if (_attributes.pool_size().x() == _src->dimension(width_idx) &&
- _attributes.pool_size().y() == _src->dimension(height_idx))
- {
- // Begin loop
- code += R"_(
- // Global pooling path
- for(int y = 0; y < {{POOL_SIZE_Y}}; ++y)
- {
- #pragma unroll 8
- for(int x = 0; x < {{POOL_SIZE_X}}; ++x)
- {
- VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)
- data0;
-)_";
- }
- else // if local pooling size
- {
- code += R"_(
- for(int y = pool_y_s; y < pool_y_e; ++y)
- {
- #pragma unroll 8
- for(int x = pool_x_s; x < pool_x_e; ++x)
- {
- VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)
- data0;
-)_";
- } // end else
-
- // if condition inside loop - use 32bit acc if mixed_precision.
- // End loop through pooling section.
- if (fp_mixed_precision)
- {
- // In case of FP_MIXED_PRECISION, ACC_DATA_TYPE is != DATA_TYPE
- code += R"_(
- data0 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + (x + idx_in_w) * {{src}}_stride_y + (y + idx_in_h) * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0));
- res0 = POOL_OP(res0, data0);
- }
- }
-)_";
- }
- else // load data, compute result and end loop
- {
- code += R"_(
- data0 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + (x + idx_in_w) * {{src}}_stride_y + (y + idx_in_h) * {{src}}_stride_z));
- res0 = POOL_OP(res0, data0);
- }
- }
-)_";
- }
-
- // For Pool AVG ONLY, divide pool output by filter size
- if (pool_type == PoolingType::AVG)
- {
- code += R"_(
- res0 /= (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0))filter_size;
-)_";
- }
-
- // If mixed precision convert datatype before storing. Then end kernel.
- if (fp_mixed_precision)
- {
- code += R"_(
- VEC_DATA_TYPE({{DATA_TYPE}}, N0)
- res_converted0 = CONVERT(res0, VEC_DATA_TYPE({{DATA_TYPE}}, N0));
- STORE_VECTOR_SELECT(res_converted, {{DATA_TYPE}}, out_base_ptr, N0, PARTIAL_N0, (PARTIAL_N0 != 0) && g_ind_0 == 0);
-)_";
- }
- else
- {
- // Store data
- code += R"_(
- STORE_VECTOR_SELECT(res, {{DATA_TYPE}}, out_base_ptr, N0, PARTIAL_N0, (PARTIAL_N0 != 0) && g_ind_0 == 0);
-)_";
- }
-
- code += R"_(
-//------------------ END KERNEL {{meta_kernel_id}} ---------------------
-}
-)_";
-
- return code;
-}
-
-std::string ClTemplatePool2d::get_2x2_kernel_code() const
-{
- const auto pool_type = _attributes.pool_type();
- const bool fp_mixed_precision = (_src->data_type() == DataType::F16) && pool_type != PoolingType::MAX;
- std::string pool_op = (pool_type == PoolingType::AVG) ? R"_(#define POOL_OP(x,y) ((x) + (y)))_"
- : R"_(#define POOL_OP(x,y) (fmax((x), (y))) )_";
-
- std::string code = R"_(
-//------------------ START KERNEL {{meta_kernel_id}} ---------------------
-// IN_0(src) {{src}}
-// OUT(dst, accum) {{dst}}
-
-#define SELECT_TYPE SELECT_VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)
-
-{
- const int idx_out_c = g_ind_0;
- const int idx_out_w = g_ind_1;
-)_";
-
- // Add pool op macro
- code += "\n" + pool_op + "\n";
-
- // If batch size != 1, the batch size dimension is collapsed over the height dimension
- code += R"_(
- const int idx_out_h = g_ind_2 % {{DST_HEIGHT}};
- const int idx_out_n = g_ind_2 / {{DST_HEIGHT}};
-)_";
-
- code += R"_(
- const int idx_in_w = idx_out_w * {{STRIDE_X}} - {{PAD_X}};
- const int idx_in_h = idx_out_h * {{STRIDE_Y}} - {{PAD_Y}};
-
- __global unsigned char *in_base_ptr = {{src}}_ptr + {{src}}_offset_first_element_in_bytes + idx_out_c * sizeof({{DATA_TYPE}}) + idx_out_n * {{src}}_stride_w;
- __global unsigned char *out_base_ptr = {{dst}}_ptr + {{dst}}_offset_first_element_in_bytes + idx_out_c * sizeof({{DATA_TYPE}}) + idx_out_w * {{dst}}_stride_y + idx_out_h * {{dst}}_stride_z + idx_out_n *
- {{dst}}_stride_w;
- const int pool_x_s = max((int)0, -idx_in_w);
- const int pool_x_e = min((int)2, (int){{SRC_WIDTH}} - idx_in_w);
- const int pool_y_s = max((int)0, -idx_in_h);
- const int pool_y_e = min((int)2, (int){{SRC_HEIGHT}} - idx_in_h);
-
- const int filter_size = (pool_x_e - pool_x_s) * (pool_y_e - pool_y_s);
- const int x0 = pool_x_s + idx_in_w;
- const int y0 = pool_y_s + idx_in_h;
- const int x1 = pool_x_e - 1 + idx_in_w;
- const int y1 = pool_y_e - 1 + idx_in_h;
-
- REPEAT_VAR_INIT_TO_CONST(4, VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0), data, 0);
-)_";
-
- if (fp_mixed_precision)
- {
- // In case of FP_MIXED_PRECISION, ACC_DATA_TYPE is != DATA_TYPE
- code += R"_(
- data0 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x0 * {{src}}_stride_y + y0 * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0));
- data1 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x1 * {{src}}_stride_y + y0 * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0));
- data2 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x0 * {{src}}_stride_y + y1 * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0));
- data3 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x1 * {{src}}_stride_y + y1 * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0));
-)_";
- }
- else
- {
- code += R"_(
- data0 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x0 * {{src}}_stride_y + y0 * {{src}}_stride_z));
- data1 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x1 * {{src}}_stride_y + y0 * {{src}}_stride_z));
- data2 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x0 * {{src}}_stride_y + y1 * {{src}}_stride_z));
- data3 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x1 * {{src}}_stride_y + y1 * {{src}}_stride_z));
-)_";
- }
-
- if (pool_type != PoolingType::MAX)
- {
- // Make invalid the values loaded if the x or y coordinate was clamped (out-of-bound)
- code += R"_(
- if(filter_size != 4)
- {
- SELECT_TYPE cond_w_s = (SELECT_TYPE)idx_in_w < (SELECT_TYPE)0;
- SELECT_TYPE cond_w_e = (SELECT_TYPE)idx_in_w >= (SELECT_TYPE)({{SRC_WIDTH}} - 1);
- SELECT_TYPE cond_h_s = (SELECT_TYPE)idx_in_h < (SELECT_TYPE)0;
- SELECT_TYPE cond_h_e = (SELECT_TYPE)idx_in_h >= (SELECT_TYPE)({{SRC_HEIGHT}} - 1);
-
- data0 = select(data0, (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)){{INITIAL_VALUE}}, (SELECT_TYPE)(cond_w_s | cond_h_s));
- data1 = select(data1, (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)){{INITIAL_VALUE}}, (SELECT_TYPE)(cond_w_e | cond_h_s));
- data2 = select(data2, (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)){{INITIAL_VALUE}}, (SELECT_TYPE)(cond_w_s | cond_h_e));
- data3 = select(data3, (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)){{INITIAL_VALUE}}, (SELECT_TYPE)(cond_w_e | cond_h_e));
- }
-)_";
- }
-
- code += R"_(
- VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)
- res0 = data0;
- res0 = POOL_OP(res0, data1);
- res0 = POOL_OP(res0, data2);
- res0 = POOL_OP(res0, data3);
-)_";
-
- if (pool_type == PoolingType::AVG)
- {
- // If avg pooling divide result accordingly.
- if (_attributes.exclude_padding())
- {
- code += R"_(
- res0 /= (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0))filter_size;
-)_";
- }
- else
- {
- code += R"_(
- res0 /= (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0))4;
-)_";
- }
- }
-
- // Store result
- if (fp_mixed_precision)
- {
- code += R"_(
- VEC_DATA_TYPE({{DATA_TYPE}}, N0)
- res_converted0 = CONVERT(res0, VEC_DATA_TYPE({{DATA_TYPE}}, N0));
- STORE_VECTOR_SELECT(res_converted, {{DATA_TYPE}}, out_base_ptr, N0, PARTIAL_N0, (PARTIAL_N0 != 0) && g_ind_0 == 0);
-)_";
- }
- else
- {
- code += R"_(
- STORE_VECTOR_SELECT(res, {{DATA_TYPE}}, out_base_ptr, N0, PARTIAL_N0, (PARTIAL_N0 != 0) && g_ind_0 == 0);
-)_";
- }
-
- code += R"_(
- //------------------ END KERNEL {{meta_kernel_id}} ---------------------
-}
-#undef SELECT_TYPE
-)_";
-
- return code;
-}
-
-void ClTemplatePool2d::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
-{
- vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "src");
-
- vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "dst");
-}
-
-TagLUT ClTemplatePool2d::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
-{
- ARM_COMPUTE_UNUSED(comp_group);
-
- TagLUT lut{};
- // Arguments and global shared variables
- lut["src"] = vtable.get_variable(_src);
- lut["dst"] = vtable.get_variable(_dst);
-
- // Local build options
- lut["meta_kernel_id"] = id();
-
- // Retrieve relevant data
- const auto padding = _attributes.pad();
- const auto stride = _attributes.stride();
- const auto pool_size = _attributes.pool_size();
- const auto data_type = _src->data_type();
- const auto use_fp_mixed_precision =
- (_src->data_type() == DataType::F16) && _attributes.pool_type() != PoolingType::MAX;
- const std::string max_initial_value =
- _settings.use_inf_as_limit() ? "(-INFINITY)"
- : float_to_string_with_full_precision(std::numeric_limits<float>::lowest());
-
- // pool specific
- lut["STRIDE_X"] = stride.x();
- lut["STRIDE_Y"] = stride.y();
- lut["PAD_X"] = padding.left;
- lut["PAD_Y"] = padding.top;
- lut["POOL_SIZE_X"] = pool_size.width;
- lut["POOL_SIZE_Y"] = pool_size.height;
-
- // Datatypes and variables
- lut["ACC_DATA_TYPE"] = get_cl_type_from_data_type(
- (use_fp_mixed_precision) ? (DataType::F32) : (data_type)); // Type of accumulators to use.
- lut["DATA_TYPE"] = get_cl_type_from_data_type(data_type);
- lut["SRC_WIDTH"] = _src->dimension(width_idx);
- lut["SRC_HEIGHT"] = _src->dimension(height_idx);
- lut["INITIAL_VALUE"] = (_attributes.pool_type() == PoolingType::MAX) ? max_initial_value : std::string("0");
-
- // Tensor specific data
- lut["DST_HEIGHT"] = _dst->dimension(height_idx);
-
- return lut;
-}
-
-CLBuildOptions ClTemplatePool2d::get_build_options(const ComponentGroup &comp_group) const
-{
- const auto root_window = comp_group.get_root_component()->template_writer()->get_window();
- const unsigned int n0 = root_window.x().step();
- const unsigned int partial_store_n0 = _dst->dimension(0) % n0;
-
- CLBuildOptions build_opts{};
- build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
- build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0));
-
- return build_opts;
-}
-
-std::string ClTemplatePool2d::get_config_id() const
-{
- const DataType data_type = _src->data_type();
- const DataLayout data_layout = _src->data_layout();
-
- std::string config_id{};
- config_id += "pooling_layer_2d_";
- config_id += lower_string(string_from_data_type(data_type));
- config_id += "_";
- config_id += lower_string(string_from_data_layout(data_layout));
- config_id += "_";
- config_id += support::cpp11::to_string(_dst->dimension(width_idx));
- config_id += "_";
- config_id += support::cpp11::to_string(_dst->dimension(height_idx));
- config_id += "_";
- config_id += support::cpp11::to_string(_dst->dimension(channel_idx));
-
- return config_id;
-}
-
-std::set<std::string> ClTemplatePool2d::get_headers_list() const
-{
- return std::set<std::string>{"helpers.h", "tile_helpers.h", "repeat.h"};
-}
-
-Window ClTemplatePool2d::get_window() const
-{
- ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
- const auto output_shape = _dst->tensor_shape();
- const unsigned int vec_size = adjust_vec_size(((_dst->data_type() == DataType::F32) ? 2 : 4), _dst->dimension(0));
-
- // Create and configure kernel window
- auto win = calculate_max_window(output_shape, Steps(vec_size));
- win = win.collapse_if_possible(win, Window::DimZ); // collapse window on batch size.
- return win;
-}
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h
deleted file mode 100644
index d1d3c01669..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEPOOL2D
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEPOOL2D
-
-#include "arm_compute/core/experimental/Types.h"
-#include "arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h"
-#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h"
-
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-class ClTemplatePool2d final : public IGpuTemplateComponentWriter
-{
-public:
- using Attributes = ClComponentPool2d::Attributes;
- using Settings = ClComponentPool2d::Settings;
- /** Constructor
- *
- * @param[in] id Component id
- * @param[in] tensors Tensor arguments to the components
- * @param[in] attributes Component attributes
- * @param[in] settings Component settings
- */
- ClTemplatePool2d(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes,
- const Settings &settings);
-
- /** Prevent instances of this class from being copy constructed */
- ClTemplatePool2d(const ClTemplatePool2d &direct_conv2d) = delete;
-
- /** Prevent instances of this class from being copied */
- ClTemplatePool2d &operator=(const ClTemplatePool2d &direct_conv2d) = delete;
-
- /** Allow instances of this class to be move constructed */
- ClTemplatePool2d(ClTemplatePool2d &&direct_conv2d) = default;
-
- /** Allow instances of this class to be moved */
- ClTemplatePool2d &operator=(ClTemplatePool2d &&direct_conv2d) = default;
-
- /** Generate kernel component name */
- std::string get_name() const override;
-
- /** Generate kernel component code template
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return std::string Component code
- */
- std::string get_component_code(const ComponentGroup &comp_group) const override;
- /** Declare all variables used by the component in the @p vtable
- *
- * @param[out] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- */
- void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
- /** Generate the tag look-up table used to instantiate the component code.
- *
- * @param[in] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return TagLUT Tag lookup table
- */
- TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
- /** Generate the build options used in the component
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return CLBuildOptions Build options
- */
- CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override;
-
- /** Generate the component config id string used for tuning */
- std::string get_config_id() const override;
-
- /** Generate the header list used in the component */
- std::set<std::string> get_headers_list() const override;
-
- /** Generate the execution window for the component */
- Window get_window() const override;
-
-private:
- /** Generate pooling kernel template code optimized for 2x2 pooling
- *
- * @return std::String Component code
- */
- std::string get_2x2_kernel_code() const;
-
- /** Generate generalised pooling kernel template code for MxN pooling
- *
- * @return std::String Component code
- */
- std::string get_MxN_kernel_code() const;
-
- const ITensorInfo *_src;
- const ITensorInfo *_dst;
- Attributes _attributes;
- Settings _settings;
-};
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEPOOL2D */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp
deleted file mode 100644
index c882353fcb..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "ClTemplateReshape.h"
-
-#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
-#include "arm_compute/core/utils/StringUtils.h"
-
-#include "src/core/helpers/WindowHelpers.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-constexpr unsigned int vector_size_byte_opencl = 16;
-
-ClTemplateReshape::ClTemplateReshape(ComponentId id, const ArgumentPack<ITensorInfo> &tensors)
- : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}
-{
- _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
- _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
- ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
-}
-
-std::string ClTemplateReshape::get_name() const
-{
- return "reshape";
-}
-
-std::string ClTemplateReshape::get_component_code(const ComponentGroup &comp_group) const
-{
- ARM_COMPUTE_UNUSED(comp_group);
- std::string code;
-
- code = R"_(
-//------------------ START KERNEL {{meta_kernel_id}} ---------------------
-
-// IN(src) {{src}}
-// OUT(dst, accum) {{dst}}
-
-TILE(uint, M0, 1, g_dst_indirect_y);
-{
- __global uchar * base_src_ptr = {{src}}_ptr + {{src}}_offset_first_element_in_bytes;
- const int tile_vertical_idx = g_ind_1 * {{arg_dst}}_c + g_ind_2 * {{arg_dst}}_c * {{arg_dst}}_w;
- LOOP_UNROLLING(int, _m0, 0, 1, M0,
- {
- const int row_idx = _m0 * {{arg_dst}}_c + tile_vertical_idx;
- const int tile_horizontal_idx = g_ind_0 + row_idx;
- LOOP_UNROLLING(int, _n0, 0, 1, N0,
- {
- {{src}}_ptr = base_src_ptr;
- const int linear_idx = tile_horizontal_idx + _n0;
- const int in_id_x = linear_idx % {{src}}_c;
- const int in_id_y = (linear_idx / {{src}}_c) % {{src}}_w;
- const int in_id_z = linear_idx / ({{src}}_c * {{src}}_w);
- {{src}}_ptr += in_id_x * sizeof({{DATA_TYPE}}) + in_id_y * {{src}}_stride_y + in_id_z * {{src}}_stride_z;
- {{dst}}[_m0].s[_n0] = *((__global {{DATA_TYPE}} *){{src}}_ptr);
- })
- })
-
- LOOP_UNROLLING(int, i, 0, 1, M0,
- {
- g_dst_indirect_y[i].v = (uint)min((int)(g_ind_1 + i), (int)({{arg_dst}}_w) - 1);
- g_dst_indirect_y[i].v += (int)(g_ind_2 % {{arg_dst}}_h) * (int)({{arg_dst}}_w);
- g_dst_indirect_y[i].v += (int)(g_ind_2 / {{arg_dst}}_h) * (int)({{arg_dst}}_w * {{arg_dst}}_h);
- })
-}
-//------------------ END KERNEL {{meta_kernel_id}} ---------------------
-)_";
- return code;
-}
-
-void ClTemplateReshape::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
-{
- vtable.declare_variable(comp_group, _src,
- GpuKernelArgumentInfo(common_tensor_type), // GpuKernelArgumentInfo::Type::Image_3D
- "src");
-
- vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(common_tensor_type), "dst");
-}
-
-TagLUT ClTemplateReshape::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
-{
- ARM_COMPUTE_UNUSED(comp_group);
- TagLUT lut{};
-
- // Arguments and global shared variables
- lut["src"] = vtable.get_variable(_src);
- lut["dst"] = vtable.get_variable(_dst);
- lut["arg_dst"] = vtable.get_variable(comp_group.get_any_dst_tensor());
- lut["meta_kernel_id"] = id();
- lut["DATA_TYPE"] = get_cl_type_from_data_type(_dst->data_type());
-
- return lut;
-}
-
-CLBuildOptions ClTemplateReshape::get_build_options(const ComponentGroup &comp_group) const
-{
- CLBuildOptions build_opts{};
- const auto root_window = comp_group.get_root_component()->template_writer()->get_window();
- const unsigned int n0 = root_window.x().step();
- const unsigned int m0 = root_window.y().step();
- const unsigned int partial_store_n0 = _dst->dimension(0) % n0;
- build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
- build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
- build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0));
-
- return build_opts;
-}
-
-std::string ClTemplateReshape::get_config_id() const
-{
- std::string config_id{};
- config_id += lower_string(string_from_data_type(_dst->data_type()));
- config_id += "_";
- config_id += support::cpp11::to_string(_dst->dimension(0));
- config_id += "_";
- config_id += support::cpp11::to_string(_dst->dimension(1));
-
- return config_id;
-}
-
-std::set<std::string> ClTemplateReshape::get_headers_list() const
-{
- return std::set<std::string>{"helpers.h", "tile_helpers.h"};
-}
-
-Window ClTemplateReshape::get_window() const
-{
- ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
- const unsigned int n0 = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
- Window win = calculate_max_window(*_dst, Steps(n0));
- return win.collapse(win, Window::DimZ);
-}
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.h
deleted file mode 100644
index 838a21db6d..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATERESHAPE
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATERESHAPE
-
-#include "arm_compute/core/experimental/Types.h"
-
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-class ClTemplateReshape final : public IGpuTemplateComponentWriter
-{
-public:
- /** Constructor
- *
- * @param[in] id Component id
- * @param[in] tensors Tensor arguments to the components
- */
- ClTemplateReshape(ComponentId id, const ArgumentPack<ITensorInfo> &tensors);
- /** Prevent instances of this class from being copy constructed */
- ClTemplateReshape(const ClTemplateReshape &reshape) = delete;
- /** Prevent instances of this class from being copied */
- ClTemplateReshape &operator=(const ClTemplateReshape &reshape) = delete;
- /** Allow instances of this class to be move constructed */
- ClTemplateReshape(ClTemplateReshape &&reshape) = default;
- /** Allow instances of this class to be moved */
- ClTemplateReshape &operator=(ClTemplateReshape &&reshape) = default;
-
- /** Generate kernel component name */
- std::string get_name() const override;
-
- /** Generate kernel component code template
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return std::string Component code
- */
- std::string get_component_code(const ComponentGroup &comp_group) const override;
-
- /** Declare all variables used by the component in the @p vtable
- *
- * @param[out] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- */
- void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
-
- /** Generate the tag look-up table used to instantiate the component code.
- *
- * @param[in] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return TagLUT Tag lookup table
- */
- TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
-
- /** Generate the build options used in the component
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return CLBuildOptions Build options
- */
- CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override;
-
- /** Generate the component config id string used for tuning */
- std::string get_config_id() const override;
-
- /** Generate the header list used in the component */
- std::set<std::string> get_headers_list() const override;
-
- /** Generate the execution window for the component */
- Window get_window() const override;
-
-private:
- const ITensorInfo *_src;
- const ITensorInfo *_dst;
-};
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATERESHAPE */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp
deleted file mode 100644
index 846c712ceb..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp
+++ /dev/null
@@ -1,279 +0,0 @@
-/*
- * Copyright (c) 2022-2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "ClTemplateResize.h"
-
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
-#include "arm_compute/core/utils/StringUtils.h"
-
-#include "src/core/helpers/WindowHelpers.h"
-#include "src/core/utils/ScaleUtils.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-ClTemplateResize::ClTemplateResize(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const ClTemplateResize::Attributes &attributes)
- : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}, _attributes{attributes}
-{
- _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
- _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
-
- ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
-}
-
-std::string ClTemplateResize::get_name() const
-{
- return _attributes.interpolation_policy() == InterpolationPolicy::BILINEAR ? "resize_bilinear" : "resize_nearest";
-}
-
-std::string ClTemplateResize::get_component_code(const IGpuTemplateComponentWriter::ComponentGroup &comp_group) const
-{
- ARM_COMPUTE_UNUSED(comp_group);
-
- std::string code = R"_(
-//------------------ START KERNEL {{meta_kernel_id}} ---------------------
-TILE(uint, 1, 1, g_dst_indirect_y);
-{
- const int yo = g_ind_2 % {{arg_dst}}_h;
- const int bout = g_ind_2 / {{arg_dst}}_h;
-)_";
-
- if (_attributes.interpolation_policy() == InterpolationPolicy::NEAREST_NEIGHBOR)
- {
- if (_attributes.sampling_policy() == SamplingPolicy::TOP_LEFT)
- {
- code += R"_(
- float xi_f = (g_ind_1 * {{SCALE_X}});
- float yi_f = (yo * {{SCALE_Y}});
-)_";
- }
- else
- {
- code += R"_(
- float xi_f = ((g_ind_1 + 0.5f) * {{SCALE_X}});
- float yi_f = ((yo + 0.5f) * {{SCALE_Y}});
-)_";
- }
-
- if (_attributes.align_corners())
- {
- code += R"_(
- xi_f = round(xi_f);
- yi_f = round(yi_f);
-)_";
- }
-
- code += R"_(
- const int xi0 = clamp((int)xi_f, 0, (int){{src}}_w - 1);
- const int yi0 = clamp((int)yi_f, 0, (int){{src}}_h - 1);
-
- T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, 1, N0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi0, xi0, g_ind_0, {{src}}_w, {{src}}_h, 1, 1, false, {{dst}});
-)_";
- }
- else if (_attributes.interpolation_policy() == InterpolationPolicy::BILINEAR)
- {
- if (_attributes.sampling_policy() == SamplingPolicy::TOP_LEFT)
- {
- code += R"_(
- float xi_f = (g_ind_1 * {{SCALE_X}});
- float yi_f = (yo * {{SCALE_Y}});
-)_";
- }
- else
- {
- code += R"_(
- float xi_f = ((g_ind_1 + 0.5f) * {{SCALE_X}} - 0.5f);
- float yi_f = ((yo + 0.5f) * {{SCALE_Y}} - 0.5f);
-)_";
- }
-
- code += R"_(
- const int xi = (int)floor(xi_f);
- const int yi = (int)floor(yi_f);
-
- TILE({{SRC_DATA_TYPE}}, 1, N0, in00);
- TILE({{SRC_DATA_TYPE}}, 1, N0, in01);
- TILE({{SRC_DATA_TYPE}}, 1, N0, in10);
- TILE({{SRC_DATA_TYPE}}, 1, N0, in11);
-
- in00[0].v = {{CONSTANT_VALUE}};
- in01[0].v = {{CONSTANT_VALUE}};
- in10[0].v = {{CONSTANT_VALUE}};
- in11[0].v = {{CONSTANT_VALUE}};
-
- const int xi0 = clamp(xi, 0, (int){{src}}_w - 1);
- const int yi0 = clamp(yi, 0, (int){{src}}_h - 1);
- const int xi1 = clamp(xi + 1, 0, (int){{src}}_w - 1);
- const int yi1 = clamp(yi + 1, 0, (int){{src}}_h - 1);
-
- T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, 1, N0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi0, xi0, g_ind_0, {{src}}_w, {{src}}_h, 1, 1, false, in00);
- T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, 1, N0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi0, xi1, g_ind_0, {{src}}_w, {{src}}_h, 1, 1, false, in01);
- T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, 1, N0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi1, xi0, g_ind_0, {{src}}_w, {{src}}_h, 1, 1, false, in10);
- T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, 1, N0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi1, xi1, g_ind_0, {{src}}_w, {{src}}_h, 1, 1, false, in11);
-)_";
-
- if (is_data_type_float(_src->data_type()))
- {
- code += R"_(
- const {{SRC_DATA_TYPE}} a = ({{SRC_DATA_TYPE}})(xi_f - (float)xi);
- const {{SRC_DATA_TYPE}} b = ({{SRC_DATA_TYPE}})(1.f - a);
- const {{SRC_DATA_TYPE}} a1 = ({{SRC_DATA_TYPE}})(yi_f - (float)yi);
- const {{SRC_DATA_TYPE}} b1 = ({{SRC_DATA_TYPE}})(1.f - a1);
-
- // Calculate the output
- {{dst}}[0].v = ((in00[0].v * b * b1) + (in01[0].v * a * b1) + (in10[0].v * b * a1) + (in11[0].v * a * a1));
-)_";
- }
- else
- {
- code += R"_(
- const float a = (xi_f - (float)xi);
- const float b = (1.f - a);
- const float a1 = (yi_f - (float)yi);
- const float b1 = (1.f - a1);
-
- {{dst}}[0].v = CONVERT_SAT(
- (CONVERT(in00[0].v, VEC_DATA_TYPE(float, N0)) * b * b1) +
- (CONVERT(in01[0].v, VEC_DATA_TYPE(float, N0)) * a * b1) +
- (CONVERT(in10[0].v, VEC_DATA_TYPE(float, N0)) * b * a1) +
- (CONVERT(in11[0].v, VEC_DATA_TYPE(float, N0)) * a * a1), VEC_DATA_TYPE({{DST_DATA_TYPE}}, N0));
-)_";
- }
- }
- else
- {
- ARM_COMPUTE_ERROR("Unsupported interpolation policy");
- }
-
- code += R"_(
- g_dst_indirect_y[0].v = g_ind_1 + (yo * (int)({{arg_dst}}_w)) + bout * (int)({{arg_dst}}_w * {{arg_dst}}_h);
-}
-//------------------ END KERNEL {{meta_kernel_id}} ---------------------
-)_";
-
- return code;
-}
-
-void ClTemplateResize::declare_variables(GpuKernelVariableTable &vtable,
- const IGpuTemplateComponentWriter::ComponentGroup &comp_group) const
-{
- vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "src");
-
- vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "dst");
-}
-
-TagLUT ClTemplateResize::get_tag_lut(const GpuKernelVariableTable &vtable,
- const IGpuTemplateComponentWriter::ComponentGroup &comp_group) const
-{
- TagLUT lut{};
-
- // Arguments and global shared variables
- lut["src"] = vtable.get_variable(_src);
- lut["dst"] = vtable.get_variable(_dst);
-
- const auto dst_argument = vtable.get_variable(comp_group.get_any_dst_tensor());
- lut["arg_dst"] = dst_argument.uniq_name;
-
- // Local build options
- lut["meta_kernel_id"] = id();
- lut["SRC_DATA_TYPE"] = get_cl_type_from_data_type(_src->data_type());
- lut["SRC_TENSOR_TYPE"] = "BUFFER";
- lut["DST_DATA_TYPE"] = get_cl_type_from_data_type(_dst->data_type());
- lut["CONSTANT_VALUE"] = string_from_pixel_value(0, _src->data_type());
-
- const float scale_x =
- scale_utils::calculate_resize_ratio(_src->dimension(1), _dst->dimension(1), _attributes.align_corners());
- const float scale_y =
- scale_utils::calculate_resize_ratio(_src->dimension(2), _dst->dimension(2), _attributes.align_corners());
-
- lut["SCALE_X"] = float_to_string_with_full_precision(scale_x);
- lut["SCALE_Y"] = float_to_string_with_full_precision(scale_y);
-
- return lut;
-}
-
-CLBuildOptions ClTemplateResize::get_build_options(const IGpuTemplateComponentWriter::ComponentGroup &comp_group) const
-{
- const Window root_window = comp_group.get_root_component()->template_writer()->get_window();
- const unsigned int n0 = root_window.x().step();
- const unsigned int m0 = root_window.y().step();
- const unsigned int partial_n0 = _dst->dimension(0) % n0;
-
- CLBuildOptions build_opts;
-
- build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
- build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
- build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_n0));
-
- return build_opts;
-}
-
-std::string ClTemplateResize::get_config_id() const
-{
- std::string config_id{};
-
- config_id += "resize_";
- config_id +=
- (_attributes.interpolation_policy() == InterpolationPolicy::NEAREST_NEIGHBOR ? "NEAREST_NEIGHBOR" : "");
- config_id += (_attributes.interpolation_policy() == InterpolationPolicy::BILINEAR ? "BILINEAR" : "");
- config_id += "_";
- config_id += (_attributes.sampling_policy() == SamplingPolicy::CENTER ? "center" : "topleft");
- config_id += "_";
- config_id += support::cpp11::to_string(_dst->dimension(0));
- config_id += "_";
- config_id += support::cpp11::to_string(_dst->dimension(1));
- config_id += "_";
- config_id += support::cpp11::to_string(_dst->dimension(2));
- config_id += "_";
- config_id += support::cpp11::to_string(_dst->dimension(3));
-
- return config_id;
-}
-
-std::set<std::string> ClTemplateResize::get_headers_list() const
-{
- return std::set<std::string>{"helpers.h", "tile_helpers.h"};
-}
-
-Window ClTemplateResize::get_window() const
-{
- ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
-
- const unsigned int n0 = adjust_vec_size(16 / _src->element_size(), _src->dimension(0));
- Window win = calculate_max_window(*_dst, Steps(n0));
- return win.collapse(win, Window::DimZ);
-}
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.h
deleted file mode 100644
index 4c69007185..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATERESIZE
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATERESIZE
-
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-class ClTemplateResize final : public IGpuTemplateComponentWriter
-{
-public:
- using Attributes = ClComponentResize::Attributes;
-
- /** Constructor
- *
- * @param[in] id Component id
- * @param[in] tensors Tensor arguments to the components
- * @param[in] attributes Component attributes
- */
- ClTemplateResize(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
-
- /** Destructor */
- ~ClTemplateResize() override = default;
-
- /** Prevent instances of this class from being copy constructed */
- ClTemplateResize(const ClTemplateResize &resize) = delete;
-
- /** Prevent instances of this class from being copied */
- ClTemplateResize &operator=(const ClTemplateResize &resize) = delete;
-
- /** Allow instances of this class to be move constructed */
- ClTemplateResize(ClTemplateResize &&resize) = default;
-
- /** Allow instances of this class to be moved */
- ClTemplateResize &operator=(ClTemplateResize &&resize) = default;
-
- /** Generate kernel component name */
- std::string get_name() const override;
-
- /** Generate kernel component code template
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return std::string Component code
- */
- std::string get_component_code(const ComponentGroup &comp_group) const override;
-
- /** Declare all variables used by the component in the @p vtable
- *
- * @param[out] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- */
- void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
-
- /** Generate the tag look-up table used to instantiate the component code.
- *
- * @param[in] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return TagLUT Tag lookup table
- */
- TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
-
- /** Generate the build options used in the component
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return CLBuildOptions Build options
- */
- CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override;
-
- /** Generate the component config id string used for tuning */
- std::string get_config_id() const override;
-
- /** Generate the header list used in the component */
- std::set<std::string> get_headers_list() const override;
-
- /** Generate the execution window for the component */
- Window get_window() const override;
-
-private:
- const ITensorInfo *_src;
- const ITensorInfo *_dst;
- Attributes _attributes;
-};
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATERESIZE */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp
deleted file mode 100644
index d0ec91e0a9..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "ClTemplateStore.h"
-
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-ClTemplateStore::ClTemplateStore(ComponentId id, const ArgumentPack<ITensorInfo> &tensors)
- : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}
-{
- _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
- _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
-}
-
-std::string ClTemplateStore::get_name() const
-{
- return "store";
-}
-
-std::string ClTemplateStore::get_component_code(const ComponentGroup &comp_group) const
-{
- ARM_COMPUTE_UNUSED(comp_group);
-
- return R"_(
-//------------------ START KERNEL {{meta_kernel_id}} STORE ---------------------
-{
- bool x_cond = PARTIAL_N0 != 0 && get_global_id(0) == 0;
-
- T_STORE_INDIRECT_WIDTH_SELECT({{DST_DATA_TYPE}}, M0, N0, PARTIAL_N0, {{DST_TENSOR_TYPE}}, {{dst}}, g_ind_0, {{dst}}_stride_y, x_cond, {{src}}, g_dst_indirect_y);
-//------------------ END KERNEL {{meta_kernel_id}} STORE ---------------------
-}
-
-)_";
-}
-
-void ClTemplateStore::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
-{
- vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "src");
- vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "dst");
-}
-
-TagLUT ClTemplateStore::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
-{
- TagLUT lut{};
-
- // Arguments and global shared variables
- lut["src"] = vtable.get_variable(_src);
- lut["dst"] = vtable.get_variable(_dst);
-
- // Local build options
- lut["meta_kernel_id"] = id();
- lut["DST_TENSOR_TYPE"] = "BUFFER";
- lut["DST_DATA_TYPE"] = _dst->data_type();
-
- ARM_COMPUTE_UNUSED(comp_group);
- return lut;
-}
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h
deleted file mode 100644
index b8c82ceadd..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATESTORE
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATESTORE
-
-#include "arm_compute/core/experimental/Types.h"
-
-#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-class ClTemplateStore final : public IGpuTemplateComponentWriter
-{
-public:
- /** Constructor
- *
- * @param[in] id Component id
- * @param[in] tensors Tensor arguments to the components
- */
- ClTemplateStore(ComponentId id, const ArgumentPack<ITensorInfo> &tensors);
- /** Prevent instances of this class from being copy constructed */
- ClTemplateStore(const ClTemplateStore &store) = delete;
- /** Prevent instances of this class from being copied */
- ClTemplateStore &operator=(const ClTemplateStore &store) = delete;
- /** Allow instances of this class to be move constructed */
- ClTemplateStore(ClTemplateStore &&store) = default;
- /** Allow instances of this class to be moved */
- ClTemplateStore &operator=(ClTemplateStore &&store) = default;
- /** Generate kernel component name */
- std::string get_name() const override;
- /** Generate kernel component code template
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return std::string Component code
- */
- std::string get_component_code(const ComponentGroup &comp_group) const override;
- /** Declare all variables used by the component in the @p vtable
- *
- * @param[out] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- */
- void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
- /** Generate the tag look-up table used to instantiate the component code.
- *
- * @param[in] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return TagLUT Tag lookup table
- */
- TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
-
-private:
- const ITensorInfo *_src;
- const ITensorInfo *_dst;
-};
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATESTORE */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp
deleted file mode 100644
index d3d7c8db83..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp
+++ /dev/null
@@ -1,325 +0,0 @@
-/*
- * Copyright (c) 2022-2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "ClTemplateWriter.h"
-
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-
-#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-/// @note: some tags can be unused since they could be used only for the macros, or only for the component code
-std::string ClTemplateWriter::replace_tags(const std::string &code_template, const TagLUT &tags)
-{
- std::string replaced_code = "";
- bool scanning_pattern = false;
- std::string pattern_found = "";
- for (size_t i = 0; i < code_template.size() - 1; ++i)
- {
- if (!scanning_pattern)
- {
- if (code_template[i] == '{' && code_template[i + 1] == '{')
- {
- i += 1;
- scanning_pattern = true;
- pattern_found = "";
- }
- else
- {
- replaced_code += code_template[i];
- }
- }
- else
- {
- if (code_template[i] == '}' && code_template[i + 1] == '}')
- {
- i += 1;
- scanning_pattern = false;
- std::string err = "Pattern " + pattern_found + " not found in tags";
- ARM_COMPUTE_ERROR_ON_MSG(tags.find(pattern_found) == tags.end(), err.c_str());
- replaced_code += tags.find(pattern_found)->second.value;
- }
- else
- {
- pattern_found += code_template[i];
- }
- }
- }
-
- return replaced_code;
-}
-ClTemplateWriter::~ClTemplateWriter()
-{
-}
-ClTemplateWriter::ClTemplateWriter(const GpuKernelComponentGroup &components) : _components{components}
-{
-}
-std::string ClTemplateWriter::get_name()
-{
- return write_kernel_name();
-}
-std::string ClTemplateWriter::get_code()
-{
- return write_code();
-}
-std::string ClTemplateWriter::get_config_id()
-{
- std::string config_id = get_name();
- for (const auto &comp : _components)
- {
- config_id += "--" + comp->template_writer()->get_config_id() + "--";
- }
-
- return config_id;
-}
-
-CLBuildOptions ClTemplateWriter::get_build_options()
-{
- CLBuildOptions build_opts{};
-
- for (const auto &comp : _components)
- {
- build_opts.add_options(comp->template_writer()->get_build_options(_components).options());
- }
-
- return build_opts;
-}
-
-Window ClTemplateWriter::get_window() const
-{
- const auto root_comp = _components.get_root_component();
- ARM_COMPUTE_ERROR_ON_MSG(root_comp == nullptr, "No root component found");
- return root_comp->template_writer()->get_window();
-}
-
-std::map<ITensorInfo::Id, GpuKernelArgument> ClTemplateWriter::get_tensors()
-{
- // Assemble GpuKernelArguments
- std::map<ITensorInfo::Id, GpuKernelArgument> tensors;
- for (const auto t : _components.get_argument_tensors())
- {
- tensors.emplace(t->id(), GpuKernelArgument{*t, _vtable.get_variable(t).kernel_argument_info});
- }
- return tensors;
-}
-
-std::string ClTemplateWriter::write_code()
-{
- ARM_COMPUTE_ERROR_ON_MSG(_components.empty(), "No components found");
-
- // These data structures will hold the data from all the components in the blueprint
- std::set<std::string> headers_list{};
- std::set<std::string> additional_macros{};
- std::vector<std::string> component_codes{}; // vector because order matters
-
- // Pass 1: Declare all kernel variables
- for (auto &component : _components)
- {
- component->template_writer()->declare_variables(_vtable, _components);
- }
- // Pass 2: Generate component codes
- for (auto &component : _components)
- {
- const auto component_writer = component->template_writer();
- auto curr_headers_list = component_writer->get_headers_list();
- auto curr_additional_macros = component_writer->get_additional_macros();
- auto curr_component_code = component_writer->get_component_code(_components);
- const auto var_lut = component_writer->get_tag_lut(
- _vtable,
- _components); // Ideally can be merged with get_component_code once we have finer-grained code generation technique
- component_codes.push_back(replace_tags(curr_component_code, var_lut));
-
- headers_list.insert(curr_headers_list.begin(), curr_headers_list.end());
- if (!additional_macros.empty()) // Some components might not have any
- {
- additional_macros.insert(replace_tags(curr_additional_macros, var_lut));
- }
- }
-
- // Step 3: Assemble the data gathered by traversing the graph into the string "code"
- std::string code = "";
-
- for (auto &header : headers_list)
- {
-#if defined(EMBEDDED_KERNELS)
- code += CLKernelLibrary::get().get_program(header).first;
-#else // defined(EMBEDDED_KERNELS)
- code += "#include \"" + header + "\"\n";
-#endif // defined(EMBEDDED_KERNELS)
- }
-
- for (auto &macros : additional_macros)
- {
- code += macros;
- }
-
- auto arguments = _components.get_argument_tensors();
- std::sort(arguments.begin(), arguments.end(),
- [](const ITensorInfo *l, const ITensorInfo *r) { return l->id() < r->id(); });
- code += write_kernel_signature(_vtable.get_variable_list(arguments));
-
- code += "\n{\n\n";
-
- code += " //------------------ START KERNEL_BUILDER_COORDINATE ---------------------\n\n";
- code += write_global_section();
- code += " //------------------ END KERNEL_BUILDER_COORDINATE ---------------------\n";
-
- {
- const auto tiles = _components.get_tiles();
- std::stringstream tiles_ss;
-
- tiles_ss << " //------------------ START TILE DECLARATION ---------------------\n";
-
- for (auto tile : tiles)
- {
- const auto var = _vtable.get_variable(tile);
- const auto data_type = get_cl_type_from_data_type(tile->data_type());
- const auto var_name = var.uniq_name;
-
- tiles_ss << " TILE(" << data_type << ", M0, N0, " << var_name << ");\n";
- }
-
- tiles_ss << " //------------------ END TILE DECLARATION ---------------------\n";
-
- code += tiles_ss.str();
- }
-
- for (const auto &component_code : component_codes)
- {
- code += component_code;
- code += "\n";
- }
-
- code += "}\n";
-
- return code;
-}
-std::string ClTemplateWriter::write_global_section() const
-{
- const auto dst_info = _components.get_any_dst_tensor();
- const auto dst_w = dst_info->dimension(0);
- const auto tile_w = std::max(1, get_window().x().step());
- const auto tile_h = std::max(1, get_window().y().step());
- auto leftover_w = dst_w % tile_w;
-
- std::string code = "";
- code += std::string(" int g_ind_0 = GET_SPATIAL_IDX(0, ") + std::to_string(tile_w) + ", " +
- std::to_string(leftover_w) + ");\n";
- code += std::string(" int g_ind_1 = GET_SPATIAL_IDX(1, ") + std::to_string(tile_h) + ", " + "0);\n";
- code += std::string(" int g_ind_2 = GET_SPATIAL_IDX(2, 1, 0);\n\n");
-
- code += " const bool g_cond_x = (g_ind_0 == 0);\n";
- code += " const bool g_cond_y = (g_ind_1 == 0);\n";
-
- return code;
-}
-std::string ClTemplateWriter::write_argument_declaration(const GpuKernelVariableTable::TensorVariable &var) const
-{
- std::string code;
- switch (var.kernel_argument_info.type)
- {
- case GpuKernelArgumentInfo::Type::Vector:
- {
- code += "\n VECTOR_DECLARATION(" + var.uniq_name + ")";
- break;
- }
- case GpuKernelArgumentInfo::Type::Image:
- {
- code += "\n IMAGE_DECLARATION(" + var.uniq_name + ")";
- break;
- }
- case GpuKernelArgumentInfo::Type::Image_3D:
- {
- code += "\n IMAGE_DECLARATION(" + var.uniq_name + "),";
- code += "\n unsigned int " + var.uniq_name + "_stride_z";
- break;
- }
- case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D:
- {
- code += "\n __read_only image2d_t " + var.uniq_name + "_img,";
- code += "\n unsigned int " + var.uniq_name + "_stride_z";
- break;
- }
- case GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer:
- {
- code += "\n TENSOR4D_T(" + var.uniq_name + ", BUFFER)";
- break;
- }
- case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image:
- {
- code += "\n TENSOR4D_T(" + var.uniq_name + ", IMAGE)";
- break;
- }
- case GpuKernelArgumentInfo::Type::Tensor_3D:
- {
- code += "\n TENSOR3D_DECLARATION(" + var.uniq_name + ")";
- break;
- }
- default:
- {
- ARM_COMPUTE_ERROR("Unsupported declaration generation for GpuKernelArgumentInfo::Type");
- }
- }
- return code;
-}
-std::string ClTemplateWriter::write_kernel_signature(const GpuKernelVariableTable::VariableList &argument_list) const
-{
- std::string code = "\n__kernel void " + write_kernel_name() + "(";
-
- for (int i = 0; i < static_cast<int>(argument_list.size()) - 1; ++i)
- {
- code += write_argument_declaration(argument_list[i]) + ",";
- }
- if (static_cast<int>(argument_list.size()) - 1 >= 0)
- {
- code += write_argument_declaration(argument_list[argument_list.size() - 1]);
- }
-
- code += ')';
-
- return code;
-}
-std::string ClTemplateWriter::write_kernel_name() const
-{
- if (_components.empty())
- {
- return "empty_kernel";
- }
- std::string name = _components.empty() ? "" : _components[0]->template_writer()->get_name();
- for (size_t i = 1; i < _components.size(); ++i)
- {
- name += "___";
- name += _components[i]->template_writer()->get_name();
- }
-
- return name;
-}
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h
deleted file mode 100644
index 83f617b6c6..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEWRITER
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEWRITER
-
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
-#include "src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
-
-#include <map>
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-/** Use a templated-string-based method to write kernel code
- * It stitches the component code templates together based on the valid fusion configuration.
- * It then instantiates the actual kernel code from the template and the generated tag lookup table.
- */
-class ClTemplateWriter : public IGpuKernelWriter
-{
-public:
- /** Instantiates a kernel code string from the kernel code template
- * @note: some tags can be unused since they could be used only for the macros, or only for the component code
- *
- * @param[in] code_template Kernel code template
- * @param[in] tags Tag lookup table
- *
- * @return std::string Instantiated kernel string
- */
- static std::string replace_tags(const std::string &code_template, const TagLUT &tags);
- /** Default constructor */
- ClTemplateWriter() = default;
- /** Constructor
- *
- * @param[in] components Kernel component group from which the kernel will be generated
- */
- ClTemplateWriter(const GpuKernelComponentGroup &components);
- /** Destructor */
- ~ClTemplateWriter() override;
- /** Generate kernel name */
- std::string get_name() override;
- /** Generate kernel code */
- std::string get_code() override;
- /** Generate build options */
- CLBuildOptions get_build_options() override;
- /** Generate config id string of the entire kernel. This is used for tuning */
- std::string get_config_id() override;
- /** Generate execution window */
- Window get_window() const override;
- /** Get the kernel argument lists of the kernel*/
- std::map<ITensorInfo::Id, GpuKernelArgument> get_tensors() override;
-
-private:
- std::string write_kernel_name() const;
- std::string write_code();
- std::string write_global_section() const;
- std::string write_argument_declaration(const GpuKernelVariableTable::TensorVariable &var) const;
- std::string write_kernel_signature(const GpuKernelVariableTable::VariableList &argument_list) const;
-
-private:
- GpuKernelComponentGroup _components{};
- GpuKernelVariableTable _vtable{};
-};
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEWRITER */