aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGunes Bayir <gunes.bayir@arm.com>2024-02-07 15:34:45 +0000
committerGunes Bayir <gunes.bayir@arm.com>2024-02-09 15:59:45 +0000
commit0ee13afc4429411de9a05ba4c2ff8a580784b568 (patch)
treec9ee1acf684d52b92ffb7500b0b65eee8377ce45 /src
parenta3e1b50588b89a2c0c67da2679728a422fc16402 (diff)
downloadComputeLibrary-0ee13afc4429411de9a05ba4c2ff8a580784b568.tar.gz
Remove CKW prototype and Template Writer
Gpu code in dynamic fusion is now written by stable CKW. We do not need CKW protoype and the older writer implementation, i.e. TemplateWriter. It also removes the need for the flag -DACL_INTERNAL_TEST_CKW_IN_DF to compile and test dynamic fusion operator. Resolves: COMPMID-6715 Signed-off-by: Gunes Bayir <gunes.bayir@arm.com> Change-Id: I9f9453311e79d9be612bd4754240d832f98503e8 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11116 Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Jakub Sujak <jakub.sujak@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp139
-rw-r--r--src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h24
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelArgument.cpp37
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h99
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h16
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp22
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.h10
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h71
-rw-r--r--src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h17
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h3
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h10
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h12
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp14
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h22
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp16
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h22
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp14
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h19
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp15
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h22
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp14
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h23
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp93
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h130
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.cpp95
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h127
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp5
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp12
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h16
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp10
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h13
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp19
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.h20
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp17
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h22
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp3
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/GpuMatMul.cpp4
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp11
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp3
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp114
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h135
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h140
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp181
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h120
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp212
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.h103
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp364
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h112
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp393
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h116
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp274
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h115
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp267
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h107
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp171
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.h106
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp470
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h132
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp161
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.h107
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp279
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.h120
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp89
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h86
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp325
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h92
66 files changed, 110 insertions, 6022 deletions
diff --git a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp
index 9ca20fa152..eab5cddd07 100644
--- a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp
+++ b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,12 +26,11 @@
#include "arm_compute/core/CL/ICLTensor.h"
#include "src/core/CL/CLUtils.h"
-#ifdef ACL_INTERNAL_TEST_CKW_IN_DF
#include "src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.h"
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
#include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h"
#include "src/gpu/cl/ClKernelLibrary.h"
#include "support/Cast.h"
+
namespace arm_compute
{
namespace experimental
@@ -61,128 +60,6 @@ void ClKernelRuntime::configure(const ClCompileContext &compile_ctx, const GpuKe
_arguments = code.arguments();
}
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-
-inline void ClKernelRuntime::add_tensor_argument(unsigned int &idx,
- const GpuKernelArgumentInfo &arg,
- const ICLTensor *tensor,
- const Window &arg_slice,
- std::vector<cl::Image2D> &cl_images)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(tensor);
-
- switch (arg.type)
- {
- case GpuKernelArgumentInfo::Type::Scalar:
- {
- ARM_COMPUTE_ERROR("Unsupported yet");
- break;
- }
-
- case GpuKernelArgumentInfo::Type::Vector:
- {
- add_1D_tensor_argument(idx, tensor, arg_slice);
- break;
- }
-
- case GpuKernelArgumentInfo::Type::Image:
- {
- add_2D_tensor_argument(idx, tensor, arg_slice);
- break;
- }
- case GpuKernelArgumentInfo::Type::Image_Reinterpret_As_3D:
- {
- add_2D_tensor_argument(idx, tensor, arg_slice);
- const unsigned int total_cross_plane_pad = tensor->info()->padding().top + tensor->info()->padding().bottom;
- _kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(total_cross_plane_pad));
- break;
- }
- case GpuKernelArgumentInfo::Type::Image_Export_To_ClImage2D:
- {
- const TensorShape shape2d(tensor->info()->dimension(0) / 4, tensor->info()->dimension(1) *
- tensor->info()->dimension(2) *
- tensor->info()->dimension(3));
- const size_t image_row_pitch = tensor->info()->strides_in_bytes()[1];
- cl::Image2D tensor_image2d =
- create_image2d_from_buffer(CLKernelLibrary::get().context(), tensor->cl_buffer(), shape2d,
- tensor->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly);
- cl_images.push_back(tensor_image2d);
- _kernel.setArg(idx++, tensor_image2d);
- break;
- }
-
- case GpuKernelArgumentInfo::Type::Image_3D:
- {
- add_2D_tensor_argument(idx, tensor, arg_slice);
- _kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(tensor->info()->strides_in_bytes()[2]));
- break;
- }
- case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D:
- {
- const TensorShape shape2d(tensor->info()->dimension(0) / 4, tensor->info()->dimension(1) *
- tensor->info()->dimension(2) *
- tensor->info()->dimension(3));
- const size_t image_row_pitch = tensor->info()->strides_in_bytes()[1];
- cl::Image2D tensor_image2d =
- create_image2d_from_buffer(CLKernelLibrary::get().context(), tensor->cl_buffer(), shape2d,
- tensor->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly);
- cl_images.push_back(tensor_image2d);
- _kernel.setArg(idx++, tensor_image2d);
- _kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(tensor->info()->strides_in_bytes()[2]));
- break;
- }
-
- case GpuKernelArgumentInfo::Type::Tensor_3D:
- {
- add_3D_tensor_argument(idx, tensor, arg_slice);
- break;
- }
-
- case GpuKernelArgumentInfo::Type::Tensor_4D:
- {
- add_4D_tensor_argument(idx, tensor, arg_slice);
- break;
- }
- case GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer:
- {
- add_4d_tensor_nhwc_argument(idx, tensor);
- break;
- }
- case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image:
- {
- const size_t image_w = tensor->info()->dimension(0) / 4;
- const size_t image_h = tensor->info()->tensor_shape().total_size_upper(1);
- const size_t image_stride_y = tensor->info()->strides_in_bytes()[1];
-
- cl::Image2D tensor_image2d = create_image2d_from_buffer(
- CLKernelLibrary::get().context(), tensor->cl_buffer(), TensorShape(image_w, image_h),
- tensor->info()->data_type(), image_stride_y, CLImage2DType::ReadOnly);
- cl_images.push_back(tensor_image2d);
-
- _kernel.setArg(idx++, tensor_image2d);
- add_4d_tensor_nhwc_argument(idx, tensor);
- break;
- }
- case GpuKernelArgumentInfo::Type::Tensor_Special_0:
- {
- const ITensorInfo *info = tensor->info();
- const Strides &strides = info->strides_in_bytes();
-
- _kernel.setArg(idx++, tensor->cl_buffer());
- const size_t dim1xdim2 = info->tensor_shape()[1] * info->tensor_shape()[2];
- _kernel.setArg<cl_int>(idx++, static_cast<int32_t>(dim1xdim2));
- const size_t stride1 = strides[1];
- _kernel.setArg<cl_int>(idx++, static_cast<int32_t>(stride1));
- break;
- }
- default:
- {
- ARM_COMPUTE_ERROR("Unsupported");
- }
- }
-}
-
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
inline void ClKernelRuntime::add_kernel_argument(unsigned int &idx,
const GpuKernelArgumentBinding &arg,
const ICLTensor *tensor,
@@ -234,7 +111,6 @@ inline void ClKernelRuntime::add_kernel_argument(unsigned int
}
}
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
void ClKernelRuntime::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
{
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
@@ -253,17 +129,7 @@ void ClKernelRuntime::run_op(ITensorPack &tensors, const Window &window, cl::Com
// Set kernel arguments
// CLImages created from tensor arguments. Need to be retained until enqueue
std::vector<cl::Image2D> cl_images;
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- for (auto id_arg : _arguments)
- {
- const auto arg = id_arg.second;
- auto tensor = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(id_arg.first));
- ARM_COMPUTE_ERROR_ON_NULLPTR(tensor);
- ARM_COMPUTE_ERROR_ON_NULLPTR(tensor->info());
- add_tensor_argument(idx, *arg.kernel_argument_info(), tensor, slice, cl_images);
- }
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
for (const auto &arg : _arguments)
{
auto tensor = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(arg.id()));
@@ -271,7 +137,6 @@ void ClKernelRuntime::run_op(ITensorPack &tensors, const Window &window, cl::Com
ARM_COMPUTE_ERROR_ON_NULLPTR(tensor->info());
add_kernel_argument(idx, arg, tensor, cl_images);
}
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
// Dispatch kernel
enqueue(queue, *this, slice, lws_hint(), use_dummy_work_items);
diff --git a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h
index e78567eb9d..148e4db581 100644
--- a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h
+++ b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_DYNAMIC_FUSION_RUNTIME_GPU_CL_CLKERNELRUNTIME
-#define SRC_DYNAMIC_FUSION_RUNTIME_GPU_CL_CLKERNELRUNTIME
+#ifndef ACL_SRC_DYNAMIC_FUSION_RUNTIME_GPU_CL_CLKERNELRUNTIME_H
+#define ACL_SRC_DYNAMIC_FUSION_RUNTIME_GPU_CL_CLKERNELRUNTIME_H
#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h"
@@ -59,21 +59,6 @@ public:
virtual void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
private:
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- /** Set a kernel tensor argument
- *
- * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
- * @param[in] arg Kernel argument descriptor accompanying @p tensor
- * @param[in] tensor Tensor to set as an argument of the object's kernel
- * @param[in] arg_slice Window the kernel will be run on
- * @param[out] cl_images Extra cl images created from the tensor (will need to be retained until the kernel is enqueued)
- */
- inline void add_tensor_argument(unsigned int &idx,
- const GpuKernelArgumentInfo &arg,
- const ICLTensor *tensor,
- const Window &arg_slice,
- std::vector<cl::Image2D> &cl_images);
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
/** Set a kernel argument as part of a tensor
*
* @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
@@ -85,7 +70,6 @@ private:
const GpuKernelArgumentBinding &arg,
const ICLTensor *tensor,
std::vector<cl::Image2D> &cl_images);
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
private:
GpuKernelArgumentList _arguments{};
@@ -94,4 +78,4 @@ private:
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_RUNTIME_GPU_CL_CLKERNELRUNTIME */
+#endif // ACL_SRC_DYNAMIC_FUSION_RUNTIME_GPU_CL_CLKERNELRUNTIME_H
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.cpp b/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.cpp
deleted file mode 100644
index 9cecfc2ffd..0000000000
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-bool operator==(const GpuKernelArgumentInfo &info0, const GpuKernelArgumentInfo &info1)
-{
- return info0.type == info1.type;
-}
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h b/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h
index 03817173f4..c923bf9c16 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELARGUMENT
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELARGUMENT
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELARGUMENT_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELARGUMENT_H
#include "arm_compute/core/TensorInfo.h"
@@ -32,96 +32,6 @@ namespace experimental
{
namespace dynamic_fusion
{
-/** Contain information required to set up a kernel argument at run time
- * @deprecated To be removed along with ClTemplateWriter
- */
-struct GpuKernelArgumentInfo
-{
- /** Enumerate all the tensor arguments variants used by all kernel implementations. */
- enum class Type : int
- {
- Scalar,
-
- Vector,
-
- Image,
- Image_Reinterpret_As_3D,
- Image_Export_To_ClImage2D,
-
- Image_3D, // 3D Tensor represented as a 2D Image + stride_z
- Image_3D_Export_To_ClImage2D,
-
- Tensor_3D,
- Tensor_4D,
- Tensor_4D_t_Buffer,
- Tensor_4D_t_Image,
-
- Tensor_Special_0,
- };
- /** Default constructor */
- GpuKernelArgumentInfo() = default;
- /** Constructor */
- GpuKernelArgumentInfo(Type type) : type{type}
- {
- }
- Type type{Type::Tensor_4D_t_Buffer};
-};
-bool operator==(const GpuKernelArgumentInfo &info0, const GpuKernelArgumentInfo &info1);
-/** Kernel argument information linked with its corresponding @ref ITensorInfo
- * @deprecated To be removed along with ClTemplateWriter
- */
-class GpuKernelArgument
-{
-public:
- /** Constructor
- *
- * @param[in] tensor_info Associated @ref ITensorInfo
- * @param[in] kernel_arg_info Associated @ref GpuKernelArgumentInfo
- */
- GpuKernelArgument(const ITensorInfo &tensor_info, const GpuKernelArgumentInfo &kernel_arg_info)
- : _tensor_info{tensor_info}, _kernel_arg_info{kernel_arg_info}
- {
- }
- /** Get workload tensor id */
- ITensorInfo::Id id() const
- {
- return _tensor_info.id();
- }
- /** Get associated @ref ITensorInfo */
- ITensorInfo *tensor_info()
- {
- return &_tensor_info;
- }
- /** Get associated @ref ITensorInfo */
- const ITensorInfo *tensor_info() const
- {
- return &_tensor_info;
- }
- /** Get associated @ref GpuKernelArgumentInfo */
- GpuKernelArgumentInfo *kernel_argument_info()
- {
- return &_kernel_arg_info;
- }
- /** Get associated @ref GpuKernelArgumentInfo */
- const GpuKernelArgumentInfo *kernel_argument_info() const
- {
- return &_kernel_arg_info;
- }
- /** Check if the associated workload tensor has valid id
- *
- * @return true if has valid id
- * @return false otherwise
- */
- bool has_valid_id() const
- {
- return _tensor_info.has_valid_id();
- }
-
-private:
- TensorInfo _tensor_info{};
- GpuKernelArgumentInfo _kernel_arg_info{};
-};
-#ifdef ACL_INTERNAL_TEST_CKW_IN_DF
/** Describe how the tensor runtime memory can be accessed
*
* Please see documentation under @ref GpuKernelArgumentBinding
@@ -243,9 +153,8 @@ private:
};
Value _value;
};
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELARGUMENT */
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELARGUMENT_H
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h b/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h
index 24812cd8a7..11d916eec9 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,19 +21,15 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE
-#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE_H
#include "arm_compute/core/CL/CLCompileContext.h"
#include "arm_compute/core/Window.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-#include <map>
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
#include <deque>
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
#include <string>
namespace arm_compute
@@ -43,11 +39,7 @@ namespace experimental
namespace dynamic_fusion
{
/** The argument list of a @ref GpuKernelSourceCode */
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-using GpuKernelArgumentList = std::map<ITensorInfo::Id, GpuKernelArgument>;
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
using GpuKernelArgumentList = std::deque<GpuKernelArgumentBinding>;
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
/** Container of kernel code to be compiled and run in a @ref GpuUnitWorkload
*/
@@ -132,4 +124,4 @@ private:
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-#endif /* ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE */
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE_H
diff --git a/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp
index 502ceab807..725a46e91c 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp
+++ b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,14 +26,10 @@
#include "arm_compute/core/experimental/Types.h"
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h"
#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h"
#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
#include "src/dynamic_fusion/sketch/gpu/GpuComponentServices.h"
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h"
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h"
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
namespace arm_compute
{
@@ -41,8 +37,8 @@ namespace experimental
{
namespace dynamic_fusion
{
-GpuLogicalKernel::GpuLogicalKernel(GpuComponentServices *services, const GpuKernelComponentGroup &components)
- : _comp_group{components}, _store_components{}
+GpuLogicalKernel::GpuLogicalKernel(GpuComponentServices *services, GpuKernelComponentGroup components) // NOLINT
+ : _comp_group{std::move(components)}, _store_components{}
{
ARM_COMPUTE_UNUSED(services);
}
@@ -50,19 +46,11 @@ GpuLogicalKernel::GpuLogicalKernel(GpuComponentServices *services, const GpuKern
GpuKernelSourceCode GpuLogicalKernel::write_kernel_code()
{
GpuKernelSourceCode code;
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- ClTemplateWriter writer{_comp_group};
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
- GpuCkwDriver writer{_comp_group};
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
+ GpuCkwDriver writer{_comp_group};
code.name(writer.get_name());
code.code(writer.get_code());
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- code.arguments(writer.get_tensors());
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
code.arguments(writer.get_kernel_arguments());
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
code.build_options(writer.get_build_options());
code.config_id(writer.get_config_id());
code.window(writer.get_window());
diff --git a/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.h b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.h
index 1fd40f0acd..e2bc83b286 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPULOGICALKERNEL
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPULOGICALKERNEL
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPULOGICALKERNEL_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPULOGICALKERNEL_H
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h"
@@ -52,7 +52,7 @@ public:
* @param[in] services @ref GpuComponentServices to be used
* @param[in] components Component group from which this logical kernel is initialized
*/
- explicit GpuLogicalKernel(GpuComponentServices *services, const GpuKernelComponentGroup &components);
+ explicit GpuLogicalKernel(GpuComponentServices *services, GpuKernelComponentGroup components); // NOLINT
/** Allow instances of this class to be copy constructed */
GpuLogicalKernel(const GpuLogicalKernel &) = default;
/** Allow instances of this class to be copied */
@@ -71,4 +71,4 @@ private:
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPULOGICALKERNEL */
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPULOGICALKERNEL_H
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h
index 43bcc47fa0..5d75bcaaa0 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE_H
#include "arm_compute/core/experimental/Types.h"
#include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h"
@@ -36,7 +36,6 @@ namespace experimental
{
namespace dynamic_fusion
{
-#ifdef ACL_INTERNAL_TEST_CKW_IN_DF
namespace
{
/** Extract kernel arguments of one tensor from a flat list of kernel arguments.
@@ -70,7 +69,6 @@ GpuKernelArgumentList extract_kernel_args_for_one_tensor(GpuKernelArgumentList &
return tensor_kargs;
}
} // namespace
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
/** Uniquely identifies a @ref GpuUnitWorkload within a @ref GpuWorkloadSourceCode */
using UnitWorkloadId = int32_t;
@@ -83,25 +81,11 @@ class GpuWorkloadArgument
public:
/** Default constructor */
GpuWorkloadArgument() = default;
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
/** Constructor
*
- * @param[in] tensor_info @ref ITensorInfo of the workload argument
- * @param[in] mem_desc @ref MemoryDescriptor of the workload argument
- * @param[in] kernel_arg_info @ref GpuKernelArgumentInfo of the workload argument
- */
- GpuWorkloadArgument(const ITensorInfo &tensor_info,
- const MemoryDescriptor &mem_desc,
- const GpuKernelArgumentInfo &kernel_arg_info)
- : _tensor_info{tensor_info}, _mem_desc{mem_desc}, _kernel_arg_info{kernel_arg_info}
- {
- }
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
- /** Constructor
- *
- * @param[in] tensor_info @ref ITensorInfo of the workload argument
- * @param[in] mem_desc @ref MemoryDescriptor of the workload argument
- * @param[in] kernel_arg_list @ref GpuKernelArgumentList of the workload argument
+ * @param[in] tensor_info @ref ITensorInfo of the workload argument
+ * @param[in] mem_desc @ref MemoryDescriptor of the workload argument
+ * @param[in] kernel_args @ref GpuKernelArgumentList of the workload argument
*/
GpuWorkloadArgument(const ITensorInfo &tensor_info,
const MemoryDescriptor &mem_desc,
@@ -109,7 +93,6 @@ public:
: _tensor_info{tensor_info}, _mem_desc{mem_desc}, _kernel_args{kernel_args}
{
}
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
/** Get tensor id within workload */
ITensorInfo::Id id() const
{
@@ -135,18 +118,6 @@ public:
{
return &_mem_desc;
}
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- /** Get @ref GpuKernelArgumentInfo of the argument */
- GpuKernelArgumentInfo *kernel_argument_info()
- {
- return &_kernel_arg_info;
- }
- /** Get @ref GpuKernelArgumentInfo of the argument */
- const GpuKernelArgumentInfo *kernel_argument_info() const
- {
- return &_kernel_arg_info;
- }
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
/** Get @ref GpuKernelArgumentList of the workload tensor */
GpuKernelArgumentList *kernel_argument_list()
{
@@ -157,7 +128,6 @@ public:
{
return &_kernel_args;
}
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
/** Check if the workload argument has valid id
*
* @return true If has valid id
@@ -169,13 +139,9 @@ public:
}
private:
- TensorInfo _tensor_info{};
- MemoryDescriptor _mem_desc{};
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- GpuKernelArgumentInfo _kernel_arg_info{};
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
+ TensorInfo _tensor_info{};
+ MemoryDescriptor _mem_desc{};
GpuKernelArgumentList _kernel_args{};
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
};
/** Describes when a unit workload is run.
@@ -259,22 +225,7 @@ public:
const auto uwk_id = static_cast<UnitWorkloadId>(_unit_workloads.size());
const auto unit_work = GpuUnitWorkload(uwk_id, kernel_code, stage);
_unit_workloads.push_back(unit_work);
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- ARM_COMPUTE_UNUSED(context);
- // Assemble kernel argument with memory descriptor to form workload argument
- for (const auto &id_arg : kernel_code.arguments())
- {
- const auto arg_id = id_arg.first;
- const auto arg = id_arg.second;
- _workload_arguments[arg_id] =
- GpuWorkloadArgument{*arg.tensor_info(), mem_map.at(arg_id), *arg.kernel_argument_info()};
- if (_tensor_uwork_map.find(arg_id) == _tensor_uwork_map.end())
- {
- _tensor_uwork_map[arg_id] = std::set<UnitWorkloadId>();
- }
- _tensor_uwork_map[arg_id].insert(uwk_id);
- }
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
+
GpuKernelArgumentList flat_kernel_args = kernel_code.arguments();
GpuKernelArgumentList tensor_kargs{};
while (true)
@@ -296,7 +247,7 @@ public:
_tensor_uwork_map[tensor_id].insert(uwk_id);
}
}
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
+
return uwk_id;
}
/** Get a unit workload from its id */
@@ -346,4 +297,4 @@ private:
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE */
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE_H
diff --git a/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h b/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h
index ad474674f9..84972501de 100644
--- a/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h
+++ b/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_IGPUKERNELWRITER
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_IGPUKERNELWRITER
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_IGPUKERNELWRITER_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_IGPUKERNELWRITER_H
#include "arm_compute/core/CL/CLCompileContext.h"
#include "arm_compute/core/Window.h"
@@ -62,23 +62,14 @@ public:
virtual std::string get_config_id() = 0;
/** Generate execution window */
virtual Window get_window() const = 0;
- /** Get the kernel argument lists of the kernel
- * @deprecated To be removed along with ClTemplateWriter
- */
- virtual std::map<ITensorInfo::Id, GpuKernelArgument> get_tensors()
- {
- return {};
- }
-#ifdef ACL_INTERNAL_TEST_CKW_IN_DF
/** Get the flat list of arguments of the kernel*/
virtual GpuKernelArgumentList get_kernel_arguments()
{
return {};
}
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
};
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_IGPUKERNELWRITER */
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_IGPUKERNELWRITER_H
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h
index b80ce0d816..f8770920b7 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h
@@ -24,15 +24,12 @@
#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWDRIVER_H
#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWDRIVER_H
-#include "ckw/Kernel.h"
-
#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
#include "src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h"
#include "compute_kernel_writer/include/ckw/Kernel.h"
#include "compute_kernel_writer/include/ckw/KernelArgument.h"
-#include <map>
#include <string>
namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h
index f1f0e6747b..c9ce7eb269 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2023 Arm Limited.
+ * Copyright (c) 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWSTORE
-#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWSTORE
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWSTORE_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWSTORE_H
#include "src/core/common/Macros.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h"
@@ -33,8 +33,6 @@ namespace experimental
{
namespace dynamic_fusion
{
-/** An interface used by @ref ClTemplateWriter to write source code for a kernel component
- */
class GpuCkwStore : public IGpuCkwComponentDriver
{
public:
@@ -61,4 +59,4 @@ private:
} // namespace experimental
} // namespace arm_compute
-#endif /* ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWSTORE */
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWSTORE_H
diff --git a/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h b/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h
index 4b8eea2f57..6678c929e9 100644
--- a/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h
+++ b/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_IGPUKERNELCOMPONENT
-#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_IGPUKERNELCOMPONENT
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_IGPUKERNELCOMPONENT_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_IGPUKERNELCOMPONENT_H
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h"
@@ -100,10 +100,6 @@ public:
return _properties;
}
/** Get writer for the component */
- virtual const IGpuTemplateComponentWriter *template_writer() const
- {
- return nullptr;
- }
virtual const IGpuCkwComponentDriver *ckw_component_driver() const
{
return nullptr;
@@ -119,4 +115,4 @@ private:
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-#endif /* ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_IGPUKERNELCOMPONENT */
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_IGPUKERNELCOMPONENT_H
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp
index fdf528a65d..e316bdf46d 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,11 +24,7 @@
#include "ClComponentActivation.h"
#include "src/core/CL/CLValidate.h"
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h"
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.h"
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
namespace arm_compute
{
@@ -69,11 +65,7 @@ ClComponentActivation::ClComponentActivation(ComponentId
const ArgumentPack<ITensorInfo> &tensors,
const Attributes &attributes)
: IGpuKernelComponent{id, properties, tensors},
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- _component_writer{std::make_unique<ClTemplateActivation>(id, tensors, attributes)}
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
_component_writer{std::make_unique<GpuCkwActivation>(id, tensors, attributes)}
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
}
@@ -81,11 +73,7 @@ ClComponentActivation::~ClComponentActivation()
{
}
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-const IGpuTemplateComponentWriter *ClComponentActivation::template_writer() const
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuCkwComponentDriver *ClComponentActivation::ckw_component_driver() const
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
return _component_writer.get();
}
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h
index 02c854356a..b8185158f3 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTACTIVATION
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTACTIVATION
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTACTIVATION_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTACTIVATION_H
#include "arm_compute/function_info/ActivationLayerInfo.h"
@@ -41,11 +41,7 @@ template <typename T>
class ArgumentPack;
/** Forward declaration */
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-class ClTemplateActivation;
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
class GpuCkwActivation;
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
class ClComponentActivation final : public IGpuKernelComponent
{
@@ -106,11 +102,7 @@ public:
ClComponentActivation &operator=(ClComponentActivation &&component) = default;
/** Get writer for the component */
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- const IGpuTemplateComponentWriter *template_writer() const override;
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
- const IGpuCkwComponentDriver *ckw_component_driver() const override;
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
+ const IGpuCkwComponentDriver *ckw_component_driver() const override;
/** Get component type */
GpuComponentType type() const override
@@ -119,13 +111,9 @@ public:
}
private:
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- std::unique_ptr<ClTemplateActivation> _component_writer;
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
std::unique_ptr<GpuCkwActivation> _component_writer;
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
};
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTACTIVATION */
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTACTIVATION_H
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp
index b1636795a3..e1850d78c4 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,11 +27,7 @@
#include "src/core/CL/CLValidate.h"
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.h"
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h"
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
namespace arm_compute
{
@@ -72,22 +68,16 @@ ClComponentCast::ClComponentCast(ComponentId id,
const Attributes &attributes,
const Settings &settings)
: IGpuKernelComponent{id, properties, tensors},
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- _component_writer{std::make_unique<ClTemplateCast>(id, tensors, attributes)}
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
_component_writer{std::make_unique<GpuCkwCast>(id, tensors, attributes)}
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
ARM_COMPUTE_UNUSED(attributes, settings);
}
+
ClComponentCast::~ClComponentCast()
{
}
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-const IGpuTemplateComponentWriter *ClComponentCast::template_writer() const
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
+
const IGpuCkwComponentDriver *ClComponentCast::ckw_component_driver() const
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
return _component_writer.get();
}
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h
index ed77b1203b..201dacc288 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTCAST
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTCAST
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTCAST_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTCAST_H
#include "arm_compute/dynamic_fusion/sketch/attributes/CastAttributes.h"
@@ -49,11 +49,7 @@ private:
};
/** Forward declaration */
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-class ClTemplateCast;
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
class GpuCkwCast;
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
class ClComponentCast final : public IGpuKernelComponent
{
@@ -120,11 +116,7 @@ public:
/** Allow instances of this class to be moved */
ClComponentCast &operator=(ClComponentCast &&component) = default;
/** Get writer for the component */
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- const IGpuTemplateComponentWriter *template_writer() const override;
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuCkwComponentDriver *ckw_component_driver() const override;
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
/** Get component type */
GpuComponentType type() const override
{
@@ -132,14 +124,10 @@ public:
}
private:
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- std::unique_ptr<ClTemplateCast> _component_writer;
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
- std::unique_ptr<GpuCkwCast> _component_writer;
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
+ std::unique_ptr<GpuCkwCast> _component_writer;
};
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTCAST */
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTCAST_H
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp
index ca8037c393..7cd23d6115 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -28,11 +28,7 @@
#include "arm_compute/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.h"
#include "src/core/CL/CLValidate.h"
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h"
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDepthwiseConv2d.h"
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
namespace arm_compute
{
@@ -212,22 +208,14 @@ ClComponentDepthwiseConv2d::ClComponentDepthwiseConv2d(ComponentId
const Attributes &attributes,
const Settings &settings)
: IGpuKernelComponent{id, properties, tensors},
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- _component_writer{std::make_unique<ClTemplateDepthwiseConv2d>(id, tensors, attributes, settings)}
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
_component_writer{std::make_unique<GpuCkwDepthwiseConv2d>(id, tensors, attributes, settings)}
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
ARM_COMPUTE_UNUSED(attributes, settings);
}
ClComponentDepthwiseConv2d::~ClComponentDepthwiseConv2d()
{
}
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-const IGpuTemplateComponentWriter *ClComponentDepthwiseConv2d::template_writer() const
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuCkwComponentDriver *ClComponentDepthwiseConv2d::ckw_component_driver() const
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
return _component_writer.get();
}
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h
index 01168e9ded..7526361f1c 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -44,11 +44,7 @@ class ArgumentPack;
class DepthwiseConv2dAttributes;
/** Forward declaration */
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-class ClTemplateDepthwiseConv2d;
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
class GpuCkwDepthwiseConv2d;
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
/** Component specific settings
*/
@@ -161,13 +157,8 @@ public:
ClComponentDepthwiseConv2d(ClComponentDepthwiseConv2d &&component) = default;
/** Allow instances of this class to be moved */
ClComponentDepthwiseConv2d &operator=(ClComponentDepthwiseConv2d &&component) = default;
- /** Get template writer for the component */
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- const IGpuTemplateComponentWriter *template_writer() const override;
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
- const IGpuCkwComponentDriver *ckw_component_driver() const override;
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
-
+ /** Get writer for the component */
+ const IGpuCkwComponentDriver *ckw_component_driver() const override;
/** Get component type */
GpuComponentType type() const override
{
@@ -175,11 +166,7 @@ public:
}
private:
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- std::unique_ptr<ClTemplateDepthwiseConv2d> _component_writer;
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
std::unique_ptr<GpuCkwDepthwiseConv2d> _component_writer;
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
};
} // namespace dynamic_fusion
} // namespace experimental
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp
index 98f3d6a882..783a17df30 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -28,12 +28,7 @@
#include "arm_compute/dynamic_fusion/sketch/attributes/Conv2dAttributes.h"
#include "src/core/CL/CLValidate.h"
-
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h"
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDirectConv2d.h"
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
namespace arm_compute
{
@@ -153,11 +148,7 @@ ClComponentDirectConv2d::ClComponentDirectConv2d(ComponentId
const Attributes &attributes,
const Settings &settings)
: IGpuKernelComponent{id, properties, tensors},
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- _component_writer{std::make_unique<ClTemplateDirectConv2d>(id, tensors, attributes, settings)}
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
_component_writer{std::make_unique<GpuCkwDirectConv2d>(id, tensors, attributes, settings)}
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
{
}
@@ -165,11 +156,7 @@ ClComponentDirectConv2d::~ClComponentDirectConv2d()
{
}
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-const IGpuTemplateComponentWriter *ClComponentDirectConv2d::template_writer() const
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuCkwComponentDriver *ClComponentDirectConv2d::ckw_component_driver() const
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
{
return _component_writer.get();
}
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h
index d6d9705d3c..c50b0fa0ce 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTDIRECTCONV2D
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTDIRECTCONV2D
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTDIRECTCONV2D_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTDIRECTCONV2D_H
#include "arm_compute/core/Error.h"
#include "arm_compute/core/KernelDescriptors.h"
@@ -68,11 +68,7 @@ private:
};
/** Forward declaration */
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-class ClTemplateDirectConv2d;
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
class GpuCkwDirectConv2d;
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
class ClComponentDirectConv2d final : public IGpuKernelComponent
{
@@ -139,11 +135,7 @@ public:
/** Allow instances of this class to be moved */
ClComponentDirectConv2d &operator=(ClComponentDirectConv2d &&component) = default;
/** Get writer for the component */
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- const IGpuTemplateComponentWriter *template_writer() const override;
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
- const IGpuCkwComponentDriver *ckw_component_driver() const override;
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
+ const IGpuCkwComponentDriver *ckw_component_driver() const override;
/** Get component type */
GpuComponentType type() const override
{
@@ -151,13 +143,9 @@ public:
}
private:
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- std::unique_ptr<ClTemplateDirectConv2d> _component_writer;
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
std::unique_ptr<GpuCkwDirectConv2d> _component_writer;
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
};
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTDIRECTCONV2D */
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTDIRECTCONV2D_H
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp
index 5b136427e4..209c73dbee 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,11 +26,7 @@
#include "arm_compute/core/Validate.h"
#include "src/core/CL/CLValidate.h"
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h"
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.h"
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
namespace arm_compute
{
@@ -117,19 +113,11 @@ ClComponentElementwiseBinary::ClComponentElementwiseBinary(ComponentId
const ArgumentPack<ITensorInfo> &tensors,
const Attributes &attributes)
: IGpuKernelComponent{id, properties, tensors},
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- _component_writer{std::make_unique<ClTemplateElementwiseBinary>(id, tensors, attributes)}
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
_component_writer{std::make_unique<GpuCkwElementwiseBinary>(id, tensors, attributes)}
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
}
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-const IGpuTemplateComponentWriter *ClComponentElementwiseBinary::template_writer() const
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuCkwComponentDriver *ClComponentElementwiseBinary::ckw_component_driver() const
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
return _component_writer.get();
}
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h
index 7589b9732c..a4395a6219 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY
-#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY_H
#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
#include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h"
@@ -40,11 +40,7 @@ template <typename T>
class ArgumentPack;
/** Forward declaration */
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-class ClTemplateElementwiseBinary;
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
class GpuCkwElementwiseBinary;
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
class ClComponentElementwiseBinary final : public IGpuKernelComponent
{
@@ -105,12 +101,7 @@ public:
/** Allow instances of this class to be moved */
ClComponentElementwiseBinary &operator=(ClComponentElementwiseBinary &&component) = default;
/** Get writer for the component */
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- const IGpuTemplateComponentWriter *template_writer() const override;
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
- const IGpuCkwComponentDriver *ckw_component_driver() const override;
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
-
+ const IGpuCkwComponentDriver *ckw_component_driver() const override;
/** Get component type */
GpuComponentType type() const override
{
@@ -118,13 +109,9 @@ public:
}
private:
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- std::unique_ptr<ClTemplateElementwiseBinary> _component_writer;
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
std::unique_ptr<GpuCkwElementwiseBinary> _component_writer;
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
};
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-#endif /* ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY */
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY_H
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp
deleted file mode 100644
index 27c13bd654..0000000000
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright (c) 2022-2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/dynamic_fusion/sketch/attributes/SoftmaxAttributes.h"
-
-#include "src/core/CL/CLValidate.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-Status ClComponentLogits1DMaxShiftExpSum::validate(const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes)
-{
- ARM_COMPUTE_UNUSED(properties, attributes);
-
- const ITensorInfo *src = tensors.get_const_tensor(TensorType::ACL_SRC_0);
- const ITensorInfo *sum = tensors.get_const_tensor(TensorType::ACL_DST_0);
- const ITensorInfo *dst = tensors.get_const_tensor(TensorType::ACL_DST_1);
-
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(sum);
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(dst);
-
- // 1. Check validity
- // All tensor infos are initialized
- ARM_COMPUTE_RETURN_ERROR_ON(src->tensor_shape().total_size() == 0);
- ARM_COMPUTE_RETURN_ERROR_ON(sum->tensor_shape().total_size() == 0);
- ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0);
-
- // Check for mismatches in shapes and data types
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst, sum);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(src, dst);
-
- // Device requirements are met
- ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src);
-
- // 2. Check support level
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32);
-
- return Status{};
-}
-
-ClComponentLogits1DMaxShiftExpSum::ClComponentLogits1DMaxShiftExpSum(ComponentId id,
- const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes)
- : IGpuKernelComponent{id, properties, tensors},
- _component_writer{std::make_unique<ClTemplateLogits1DMaxShiftExpSum>(id, tensors, attributes)}
-{
-}
-
-ClComponentLogits1DMaxShiftExpSum::~ClComponentLogits1DMaxShiftExpSum()
-{
-}
-
-const IGpuTemplateComponentWriter *ClComponentLogits1DMaxShiftExpSum::template_writer() const
-{
- return _component_writer.get();
-}
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h
deleted file mode 100644
index 91ab5de3b5..0000000000
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (c) 2022-2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTLOGITS1DMAXSHIFTEXPSUM
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTLOGITS1DMAXSHIFTEXPSUM
-
-#include "arm_compute/dynamic_fusion/sketch/attributes/SoftmaxAttributes.h"
-
-#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
-
-namespace arm_compute
-{
-/** Forward declaration */
-class ITensorInfo;
-namespace experimental
-{
-namespace dynamic_fusion
-{
-/** Forward declaration */
-template <typename T>
-class ArgumentPack;
-
-/** Forward declaration */
-class ClTemplateLogits1DMaxShiftExpSum;
-
-/** Component to calculate max-shifted exponentials and their sum
- *
- * 1D example:
- * input: [x1, x2, ... , xn], shape: (1 x d)
- *
- * Let max(x1...xn) = m
- *
- * (output) sum: [exp(x1-m) + ... + exp(xn-m)], shape: (1 x 1)
- * (output) dst: [exp(x1-m) ... exp(xn-m)], shape: (1 x d)
- *
- * This component is used by the softmax operator. The subsequent
- * operation normalizes dst with sum, therefore the max-shifting
- * since exp(m) will be cancelled in numerator and denominator.
-*/
-class ClComponentLogits1DMaxShiftExpSum final : public IGpuKernelComponent
-{
-public:
- /** Attributes are a set of backend-agnostic parameters that define what a component does */
- using Attributes = SoftmaxAttributes;
-
- /** Validate the component
- *
- * @param[in] properties Component properties @ref Properties
- * @param[in] tensors Tensor arguments to the component
- * @param[in] attributes Component attributes @ref Attributes
- *
- * @return Status Validation results
- *
- * Tensor argument names:
- * - ACL_SRC_0: Input
- * - ACL_DST_0: Output
- * - ACL_DST_1: Output
- *
- * Tensor argument constness:
- * - ACL_SRC_0: Const
- * - ACL_DST_0: Const
- * - ACL_DST_1: Const
- *
- * Valid data layouts:
- * - All
- *
- ** Valid data type configurations:
- * |ACL_SRC_0 |ACL_DST_0 |ACL_DST_1 |
- * |:----------|:----------|:----------|
- * |F16 | F16 | F16 |
- * |F32 | F32 | F32 |
- */
- static Status
- validate(const Properties &properties, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
-
- /** Constructor
- *
- * Similar to @ref ClComponentLogits1DMaxShiftExpSum::validate()
- */
- ClComponentLogits1DMaxShiftExpSum(ComponentId id,
- const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes);
-
- /** Destructor */
- ~ClComponentLogits1DMaxShiftExpSum() override;
- /** Prevent instances of this class from being copy constructed */
- ClComponentLogits1DMaxShiftExpSum(const ClComponentLogits1DMaxShiftExpSum &component) = delete;
- /** Prevent instances of this class from being copied */
- ClComponentLogits1DMaxShiftExpSum &operator=(const ClComponentLogits1DMaxShiftExpSum &component) = delete;
- /** Allow instances of this class to be move constructed */
- ClComponentLogits1DMaxShiftExpSum(ClComponentLogits1DMaxShiftExpSum &&component) = default;
- /** Allow instances of this class to be moved */
- ClComponentLogits1DMaxShiftExpSum &operator=(ClComponentLogits1DMaxShiftExpSum &&component) = default;
- /** Get template writer for the component */
- const IGpuTemplateComponentWriter *template_writer() const override;
- /** Get component type */
- GpuComponentType type() const override
- {
- return GpuComponentType::Unfusable;
- }
-
-private:
- std::unique_ptr<ClTemplateLogits1DMaxShiftExpSum> _component_writer;
-};
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTLOGITS1DMAXSHIFTEXPSUM */
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.cpp
deleted file mode 100644
index fb2544385c..0000000000
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.cpp
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/dynamic_fusion/sketch/attributes/SoftmaxAttributes.h"
-
-#include "src/core/CL/CLValidate.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-Status ClComponentLogits1DNorm::validate(const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes)
-{
- ARM_COMPUTE_UNUSED(properties, attributes);
-
- const ITensorInfo *src = tensors.get_const_tensor(TensorType::ACL_SRC_0);
- const ITensorInfo *sum = tensors.get_const_tensor(TensorType::ACL_SRC_1);
- const ITensorInfo *dst = tensors.get_const_tensor(TensorType::ACL_DST_0);
-
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(sum);
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(dst);
-
- // 1. Check validity
- // All tensor infos are initialized
- ARM_COMPUTE_RETURN_ERROR_ON(src->tensor_shape().total_size() == 0);
- ARM_COMPUTE_RETURN_ERROR_ON(sum->tensor_shape().total_size() == 0);
- ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0);
-
- // Check for mismatches in shapes and data types
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst, sum);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(src, dst);
-
- ARM_COMPUTE_RETURN_ERROR_ON(attributes.is_log_softmax() && !is_data_type_float(src->data_type()));
-
- // Device requirements are met
- ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src);
-
- // 2. Check support level
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32);
-
- return Status{};
-}
-
-ClComponentLogits1DNorm::ClComponentLogits1DNorm(ComponentId id,
- const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes)
- : IGpuKernelComponent{id, properties, tensors},
- _component_writer{std::make_unique<ClTemplateLogits1DNorm>(id, tensors, attributes)}
-{
-}
-
-ClComponentLogits1DNorm::~ClComponentLogits1DNorm()
-{
-}
-
-const IGpuTemplateComponentWriter *ClComponentLogits1DNorm::template_writer() const
-{
- return _component_writer.get();
-}
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h
deleted file mode 100644
index 74c0273604..0000000000
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTLOGITS1DNORM
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTLOGITS1DNORM
-
-#include "arm_compute/dynamic_fusion/sketch/attributes/SoftmaxAttributes.h"
-
-#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
-
-namespace arm_compute
-{
-/** Forward declaration */
-class ITensorInfo;
-namespace experimental
-{
-namespace dynamic_fusion
-{
-/** Forward declaration */
-template <typename T>
-class ArgumentPack;
-
-/** Forward declaration */
-class ClTemplateLogits1DNorm;
-
-/** Component to calculate the final step of the Softmax Layer
- * where each logit value is multiplied by the inverse of the sum of the logits.
- *
- * 1D example:
- *
- * (input) src: [x1 x2 ... xn], shape: (1 x d)
- * (input) sum: [x1 + x2 + ... + xn], shape: (1 x 1)
- * (output) dst: [x1/sum x2/sum ... xn/sum], shape: (1 x d)
- *
- * This component is used by the softmax operator to get the final result.
-*/
-class ClComponentLogits1DNorm final : public IGpuKernelComponent
-{
-public:
- /** Attributes are a set of backend-agnostic parameters that define what a component does */
- using Attributes = SoftmaxAttributes;
-
- /** Validate the component
- *
- * @param[in] properties Component properties @ref Properties
- * @param[in] tensors Tensor arguments to the component
- * @param[in] attributes Component attributes @ref Attributes
- *
- * @return Status Validation results
- *
- * Tensor argument names:
- * - ACL_SRC_0: Input
- * - ACL_SRC_1: Input
- * - ACL_DST_0: Output
- *
- * Tensor argument constness:
- * - ACL_SRC_0: Const
- * - ACL_SRC_1: Const
- * - ACL_DST_0: Const
- *
- * Valid data layouts:
- * - All
- *
- ** Valid data type configurations:
- * |ACL_SRC_0 |ACL_SRC_1 |ACL_DST_0 |
- * |:----------|:----------|:----------|
- * |F16 | F16 | F16 |
- * |F32 | F32 | F32 |
- */
- static Status
- validate(const Properties &properties, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
-
- /** Constructor
- *
- * Similar to @ref ClComponentLogits1DNorm::validate()
- */
- ClComponentLogits1DNorm(ComponentId id,
- const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes);
-
- /** Destructor */
- ~ClComponentLogits1DNorm() override;
- /** Prevent instances of this class from being copy constructed */
- ClComponentLogits1DNorm(const ClComponentLogits1DNorm &component) = delete;
- /** Prevent instances of this class from being copied */
- ClComponentLogits1DNorm &operator=(const ClComponentLogits1DNorm &component) = delete;
- /** Allow instances of this class to be move constructed */
- ClComponentLogits1DNorm(ClComponentLogits1DNorm &&component) = default;
- /** Allow instances of this class to be moved */
- ClComponentLogits1DNorm &operator=(ClComponentLogits1DNorm &&component) = default;
- /** Get template writer for the component */
- const IGpuTemplateComponentWriter *template_writer() const override;
- /** Get component type */
- GpuComponentType type() const override
- {
- return GpuComponentType::Unfusable;
- }
-
-private:
- std::unique_ptr<ClTemplateLogits1DNorm> _component_writer;
-};
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTLOGITS1DNORM */
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp
index f238d42d98..53ac8da41f 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2023 Arm Limited.
+ * Copyright (c) 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,7 +21,6 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifdef ACL_INTERNAL_TEST_CKW_IN_DF
#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.h"
@@ -147,5 +146,3 @@ const IGpuCkwComponentDriver *ClComponentMatMul::ckw_component_driver() const
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp
index 5544963b3f..6e7243dc04 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp
@@ -30,7 +30,6 @@
#include "src/core/CL/CLValidate.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h"
#include "src/dynamic_fusion/utils/Utils.h"
#include <memory>
@@ -93,27 +92,16 @@ ClComponentPool2d::ClComponentPool2d(ComponentId id,
const Attributes &attributes,
const Settings &settings)
: IGpuKernelComponent{id, properties, tensors},
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- _component_writer{std::make_unique<ClTemplatePool2d>(id, tensors, attributes, settings)}
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
_component_writer{std::make_unique<GpuCkwPool2d>(id, tensors, attributes, settings)}
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
}
ClComponentPool2d::~ClComponentPool2d()
{
}
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-const IGpuTemplateComponentWriter *ClComponentPool2d::template_writer() const
-{
- return _component_writer.get();
-}
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuCkwComponentDriver *ClComponentPool2d::ckw_component_driver() const
{
return _component_writer.get();
}
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h
index 98fed65004..d33e601f18 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2023 Arm Limited.
+ * Copyright (c) 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -42,11 +42,7 @@ class ArgumentPack;
class Pool2dAttributes;
/** Forward declaration */
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-class ClTemplatePool2d;
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
class GpuCkwPool2d;
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
class ClComponentPool2d final : public IGpuKernelComponent
{
@@ -116,13 +112,9 @@ public:
/** Allow instances of this class to be moved */
ClComponentPool2d &operator=(ClComponentPool2d &&component) = default;
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- /** Get template writer for the component */
- const IGpuTemplateComponentWriter *template_writer() const override;
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
+
/** Get GPU kernel writer for the component */
const IGpuCkwComponentDriver *ckw_component_driver() const override;
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
/** Get component type */
GpuComponentType type() const override
@@ -131,11 +123,7 @@ public:
}
private:
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- std::unique_ptr<ClTemplatePool2d> _component_writer;
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
std::unique_ptr<GpuCkwPool2d> _component_writer;
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
};
} // namespace dynamic_fusion
} // namespace experimental
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp
index 0ece9de970..dce85c424e 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2023 Arm Limited.
+ * Copyright (c) 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,7 +27,6 @@
#include "arm_compute/core/Validate.h"
#include "src/core/CL/CLValidate.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.h"
namespace arm_compute
{
@@ -54,15 +53,16 @@ Status ClComponentReshape::validate(const ArgumentPack<ITensorInfo> &tensors)
ClComponentReshape::ClComponentReshape(ComponentId id,
const Properties &properties,
const ArgumentPack<ITensorInfo> &tensors)
- : IGpuKernelComponent{id, properties, tensors}, _component_writer{std::make_unique<ClTemplateReshape>(id, tensors)}
+ : IGpuKernelComponent{id, properties, tensors}
{
}
ClComponentReshape::~ClComponentReshape()
{
}
-const IGpuTemplateComponentWriter *ClComponentReshape::template_writer() const
+const IGpuCkwComponentDriver *ClComponentReshape::ckw_component_driver() const
{
- return _component_writer.get();
+ /* NOT IMPLEMENTED */
+ return nullptr;
}
} // namespace dynamic_fusion
} // namespace experimental
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h
index 78163d6603..fd0f966da1 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2023 Arm Limited.
+ * Copyright (c) 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESHAPE
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESHAPE
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESHAPE_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESHAPE_H
#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
@@ -85,8 +85,8 @@ public:
ClComponentReshape(ClComponentReshape &&component) = default;
/** Allow instances of this class to be moved */
ClComponentReshape &operator=(ClComponentReshape &&component) = default;
- /** Get template writer for the component */
- const IGpuTemplateComponentWriter *template_writer() const override;
+ /** Get writer for the component */
+ const IGpuCkwComponentDriver *ckw_component_driver() const override;
/** Get component type */
GpuComponentType type() const override
{
@@ -94,10 +94,9 @@ public:
}
private:
- std::unique_ptr<ClTemplateReshape> _component_writer;
};
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESHAPE */
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESHAPE_H
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp
index b05eb04698..411eeca802 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -29,12 +29,7 @@
#include "src/core/CL/CLValidate.h"
#include "src/core/utils/ScaleUtils.h"
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
-
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.h"
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwResize.h"
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
namespace arm_compute
{
@@ -43,11 +38,7 @@ namespace experimental
namespace dynamic_fusion
{
/** Forward declaration */
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-class ClTemplateResize;
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
class GpuCkwResize;
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
Status ClComponentResize::validate(const IGpuKernelComponent::Properties &properties,
const ArgumentPack<ITensorInfo> &tensors,
@@ -82,11 +73,7 @@ ClComponentResize::ClComponentResize(ComponentId id,
const ArgumentPack<ITensorInfo> &tensors,
const ClComponentResize::Attributes &attributes)
: IGpuKernelComponent{id, properties, tensors},
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- _component_writer{std::make_unique<ClTemplateResize>(id, tensors, attributes)}
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
_component_writer{std::make_unique<GpuCkwResize>(id, tensors, attributes)}
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
{
}
@@ -94,11 +81,7 @@ ClComponentResize::~ClComponentResize()
{
}
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-const IGpuTemplateComponentWriter *ClComponentResize::template_writer() const
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuCkwComponentDriver *ClComponentResize::ckw_component_driver() const
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
{
return _component_writer.get();
}
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.h
index 29276c3257..9a1169c45f 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -22,8 +22,8 @@
* SOFTWARE.
*/
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESIZE
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESIZE
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESIZE_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESIZE_H
#include "arm_compute/dynamic_fusion/sketch/attributes/ResizeAttributes.h"
@@ -42,11 +42,7 @@ template <typename T>
class ArgumentPack;
/** Forward declaration */
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-class ClTemplateResize;
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
class GpuCkwResize;
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
class ClComponentResize final : public IGpuKernelComponent
{
@@ -111,11 +107,7 @@ public:
ClComponentResize &operator=(ClComponentResize &&component) = default;
/** Get writer for the component */
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- const IGpuTemplateComponentWriter *template_writer() const override;
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuCkwComponentDriver *ckw_component_driver() const override;
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
/** Get component type */
GpuComponentType type() const override
@@ -124,15 +116,11 @@ public:
}
private:
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- std::unique_ptr<ClTemplateResize> _component_writer;
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
std::unique_ptr<GpuCkwResize> _component_writer;
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
};
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESIZE */
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESIZE_H
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp
index dcbecaff35..3db6c5cd2d 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,11 +24,7 @@
#include "ClComponentStore.h"
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h"
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h"
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
#include <memory>
@@ -46,22 +42,13 @@ Status ClComponentStore::validate(const Properties &properties, const ArgumentPa
ClComponentStore::ClComponentStore(ComponentId id,
const Properties &properties,
const ArgumentPack<ITensorInfo> &tensors)
- : IGpuKernelComponent{id, properties, tensors},
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- _component_writer{std::make_unique<ClTemplateStore>(id, tensors)}
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
- _component_writer{std::make_unique<GpuCkwStore>(id, tensors)}
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
+ : IGpuKernelComponent{id, properties, tensors}, _component_writer{std::make_unique<GpuCkwStore>(id, tensors)}
{
}
ClComponentStore::~ClComponentStore()
{
}
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-const IGpuTemplateComponentWriter *ClComponentStore::template_writer() const
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuCkwComponentDriver *ClComponentStore::ckw_component_driver() const
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
return _component_writer.get();
}
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h
index 948785c480..2c1dd0f6fc 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTSTORE
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTSTORE
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTSTORE_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTSTORE_H
#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
@@ -39,11 +39,7 @@ namespace dynamic_fusion
/** Forward declaration */
template <typename T>
class ArgumentPack;
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-class ClTemplateStore;
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
class GpuCkwStore;
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
class ClComponentStore final : public IGpuKernelComponent
{
@@ -88,11 +84,7 @@ public:
/** Allow instances of this class to be moved */
ClComponentStore &operator=(ClComponentStore &&component) = default;
/** Get writer for the component */
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- const IGpuTemplateComponentWriter *template_writer() const override;
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuCkwComponentDriver *ckw_component_driver() const override;
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
/** Get component type */
GpuComponentType type() const override
{
@@ -100,13 +92,9 @@ public:
}
private:
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- std::unique_ptr<ClTemplateStore> _component_writer;
-#else //ACL_INTERNAL_TEST_CKW_IN_DF
- std::unique_ptr<GpuCkwStore> _component_writer;
-#endif //ACL_INTERNAL_TEST_CKW_IN_DF
+ std::unique_ptr<GpuCkwStore> _component_writer;
};
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTSTORE */
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTSTORE_H
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp
index 697b7d4e1f..4d6e7f81bb 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,7 +30,6 @@
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h"
#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h"
namespace arm_compute
{
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuMatMul.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuMatMul.cpp
index e24629a036..2997b28ec1 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuMatMul.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuMatMul.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2023 Arm Limited.
+ * Copyright (c) 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,7 +21,6 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifdef ACL_INTERNAL_TEST_CKW_IN_DF
#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMatMul.h"
@@ -244,4 +243,3 @@ ITensorInfo *GpuMatMul::create_op(GpuWorkloadSketch &sketch,
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp
index 431c9110fc..d385752201 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp
@@ -28,8 +28,6 @@
#include "src/common/utils/Log.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h"
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h"
#include "src/dynamic_fusion/sketch/gpu/GpuOperatorProperties.h"
#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
@@ -88,9 +86,8 @@ Status GpuSoftmax::is_supported_op(const GpuWorkloadContext &context,
arguments_norm.add_const_tensor(ACL_SRC_1, &sum);
arguments_norm.add_const_tensor(ACL_DST_0, &dst_info_to_validate);
- ARM_COMPUTE_RETURN_ON_ERROR(
- ClComponentLogits1DMaxShiftExpSum::validate(properties, arguments_exp_sum, attributes));
- ARM_COMPUTE_RETURN_ON_ERROR(ClComponentLogits1DNorm::validate(properties, arguments_norm, attributes));
+ ARM_COMPUTE_UNUSED(properties, attributes);
+ return Status(ErrorCode::RUNTIME_ERROR, "GpuSoftmax is not implemented");
}
else
{
@@ -177,8 +174,8 @@ void GpuSoftmax::create_op(GpuWorkloadSketch &sketch, ITensorInfo *src, ITensorI
arguments_norm.add_const_tensor(ACL_SRC_1, sum);
arguments_norm.add_const_tensor(ACL_DST_0, dst);
- comp_graph.add_new_component<ClComponentLogits1DMaxShiftExpSum>(properties, arguments_exp_sum, attributes);
- comp_graph.add_new_component<ClComponentLogits1DNorm>(properties, arguments_norm, attributes);
+ // Add to component graph -- NOT IMPLEMENTED
+ ARM_COMPUTE_UNUSED(comp_graph, attributes);
}
}
else
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp
index bf0f274c5c..b9d01966b3 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2023 Arm Limited.
+ * Copyright (c) 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -31,7 +31,6 @@
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h"
#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h"
namespace arm_compute
{
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp
deleted file mode 100644
index 775b0a0c8c..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (c) 2022-2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "GpuKernelVariableTable.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/ITensorInfo.h"
-
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-void GpuKernelVariableTable::declare_variable(const GpuKernelComponentGroup &comp_group,
- const ITensorInfo *tensor,
- GpuKernelArgumentInfo argument_info,
- const std::string &alias)
-{
- ARM_COMPUTE_ERROR_ON_MSG(!tensor->has_valid_id(), "Tensor info with valid id expected");
-
- // Do not re-declare if the variable associated with the tensor has already been declared
- auto it = _vars.find(tensor->id());
-
- if (it != _vars.end())
- {
- ARM_COMPUTE_ERROR_ON(!(it->second.kernel_argument_info == argument_info));
- return;
- }
-
- const auto target = comp_group.get_tile_for_tensor(tensor);
-
- if (target != tensor)
- {
- // If the tensor uses a shared tile, don't declare another variable.
- it = _vars.find(target->id());
-
- ARM_COMPUTE_ERROR_ON_MSG(it == _vars.end(), "The variable used for this tensor must have been declared.");
-
- _vars[tensor->id()] = it->second;
- }
- else
- {
- // Declare variable associated with the tensor
- std::stringstream ss;
- ss << alias << "_t" << abs(tensor->id());
- const auto uniq_name = ss.str();
- TensorVariable var{tensor->id(), uniq_name, argument_info};
-
- _vars.emplace(tensor->id(), var);
- }
-}
-
-GpuKernelVariableTable::TensorVariable GpuKernelVariableTable::get_variable(const ITensorInfo *tensor) const
-{
- const auto var = _vars.at(tensor->id());
- return var;
-}
-
-GpuKernelVariableTable::VariableList
-GpuKernelVariableTable::get_variable_list(const std::vector<const ITensorInfo *> &tensors) const
-{
- VariableList vars{};
- for (const auto &tensor : tensors)
- {
- if (!tensor->has_valid_id())
- {
- continue;
- }
- vars.push_back(get_variable(tensor));
- }
- return vars;
-}
-
-TagVal::TagVal(const GpuKernelVariableTable::TensorVariable &var) : value{var.uniq_name}
-{
-}
-
-TagVal::TagVal(const std::string &val) : value{val}
-{
-}
-
-TagVal::TagVal(const char *val) : value{std::string(val)}
-{
-}
-
-TagVal::TagVal(const DataType &data_type) : value{get_cl_type_from_data_type(data_type)}
-{
-}
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h b/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h
deleted file mode 100644
index c17f131ada..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Copyright (c) 2022-2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_GPUKERNELVARIABLETABLE
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_GPUKERNELVARIABLETABLE
-
-#include "arm_compute/core/ITensorInfo.h"
-
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
-#include "support/AclRequires.h"
-#include "support/StringSupport.h"
-
-#include <set>
-#include <string>
-#include <type_traits>
-#include <unordered_map>
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-class GpuKernelComponentGroup;
-
-/** A table of all the variables used in the kernel.
- * Each kernel has exactly one variable table.
- */
-class GpuKernelVariableTable
-{
-public:
- /** A tensor variable whose main purposes are:
- * - Hold the newly assigned @ref GpuKernelArgumentInfo for the associated tensor info
- * - Hold the generated variable name for the associated tensor info
- */
- struct TensorVariable
- {
- public:
- TensorVariable() = default;
- TensorVariable(const TensorVariable &) = default;
- TensorVariable &operator=(const TensorVariable &) = default;
- ITensorInfo::Id id{ITensorInfo::invalid_tensor_id};
- std::string uniq_name{"empty"}; // Unique name, also the final variable name used in the built code
- GpuKernelArgumentInfo kernel_argument_info{};
- bool has_valid_id() const
- {
- return id != ITensorInfo::invalid_tensor_id;
- }
- };
- using VariableList = std::vector<TensorVariable>;
-
-public:
- /** Declare a @ref TensorVariable for a corresponding tensor info.
- *
- * @param[in] comp_group Component group the tensor belongs to
- * @param[in] tensor Tensor info with which the new variable is associated
- * @param[in] argument_info Kernel argument information
- * @param[in] alias Alias for the variable. Will be used as part of the variable name
- */
- void declare_variable(const GpuKernelComponentGroup &comp_group,
- const ITensorInfo *tensor,
- GpuKernelArgumentInfo argument_info,
- const std::string &alias = "unnamed");
- /** Get the @ref TensorVariable associated with @p tensor
- *
- * @param[in] tensor Tensor info to be queried
- *
- * @return TensorVariable
- */
- TensorVariable get_variable(const ITensorInfo *tensor) const;
- /** Get the @ref TensorVariable list associated with @p tensors
- * @note Empty tensors are skipped
- *
- * @param[in] tensors List of tensor infos to be queried
- *
- * @return VariableList
- */
- VariableList get_variable_list(const std::vector<const ITensorInfo *> &tensors) const;
-
-private:
- std::map<ITensorInfo::Id, TensorVariable> _vars{};
-};
-
-/** A tag value will substitute a tag in a string template during its instantiation */
-struct TagVal
-{
- /** Default constructor */
- TagVal() = default;
- /** Construct a @ref TagVal from a @ref GpuKernelVariableTable::TensorVariable */
- TagVal(const GpuKernelVariableTable::TensorVariable &var);
- /** Construct a @ref TagVal from an integral type */
- template <typename T, ARM_COMPUTE_REQUIRES_TA(std::is_integral<T>::value)>
- TagVal(T val) : value{support::cpp11::to_string(val)}
- {
- }
- /** Construct a @ref TagVal from a string */
- TagVal(const std::string &val);
- /** Construct a @ref TagVal from a c-style string */
- TagVal(const char *val);
- /** Construct a @ref TagVal from a @ref DataType */
- TagVal(const DataType &data_type);
- /** Get the value of the TagVal as a converted string */
- std::string value{};
-};
-
-/** A tag used in a string template is a placeholder string to be substituted by real values during template instantiation */
-using Tag = std::string;
-
-/** Tag lookup table. It is used to instantiate a string template */
-using TagLUT = std::unordered_map<Tag, TagVal>;
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_GPUKERNELVARIABLETABLE */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h b/src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h
deleted file mode 100644
index 9d0b4f592a..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_IGPUTEMPLATECOMPONENTWRITER
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_IGPUTEMPLATECOMPONENTWRITER
-
-#include "arm_compute/core/CL/CLCompileContext.h"
-#include "arm_compute/core/ITensorInfo.h"
-#include "arm_compute/core/Window.h"
-
-#include "src/dynamic_fusion/sketch/ArgumentPack.h"
-#include "src/dynamic_fusion/sketch/gpu/components/Types.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-/** Forward declaration */
-class GpuKernelComponentGroup;
-class GpuKernelVariableTable;
-
-/** An interface used by @ref ClTemplateWriter to write source code for a kernel component
- */
-class IGpuTemplateComponentWriter
-{
-public:
- using ComponentGroup = GpuKernelComponentGroup;
-
- /**For now all kernel intermeditate/destination tensors are expected to be of type Tensor_4D_t_Buffer*/
- static constexpr GpuKernelArgumentInfo::Type common_tensor_type = GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer;
-
-public:
- /** Constructor
- *
- * @param[in] id Component id
- * @param[in] tensors Tensor arguments to the components
- */
- IGpuTemplateComponentWriter(ComponentId id, const ArgumentPack<ITensorInfo> &tensors) : _id{id}, _tensors{tensors}
- {
- }
- /** Destructor */
- virtual ~IGpuTemplateComponentWriter()
- {
- }
- /** Generate kernel component name */
- virtual std::string get_name() const = 0;
- /** Generate kernel component code template
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return std::string Component code
- */
- virtual std::string get_component_code(const ComponentGroup &comp_group) const = 0;
- /** Declare all variables used by the component in the @p vtable
- *
- * @param[out] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- */
- virtual void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const = 0;
- /** Generate the tag look-up table used to instantiate the component code.
- *
- * @param[in] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return TagLUT Tag lookup table
- */
- virtual TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const = 0;
- /** Generate additional macros used in the component */
- virtual std::string get_additional_macros() const
- {
- return "";
- }
- /** Generate the build options used in the component
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return CLBuildOptions Build options
- */
- virtual CLBuildOptions get_build_options(const ComponentGroup &comp_group) const
- {
- ARM_COMPUTE_UNUSED(comp_group);
- return CLBuildOptions{};
- }
- /** Generate the component config id string used for tuning */
- virtual std::string get_config_id() const
- {
- return "";
- }
- /** Generate the header list used in the component */
- virtual std::set<std::string> get_headers_list() const
- {
- return std::set<std::string>{};
- }
- /** Generate the execution window for the component */
- virtual Window get_window() const
- {
- return Window{};
- }
- /** Get tensor arguments */
- ArgumentPack<ITensorInfo> tensors() const
- {
- return _tensors;
- }
- /** Get component id */
- ComponentId id() const
- {
- return _id;
- }
-
-private:
- ComponentId _id{-1};
- ArgumentPack<ITensorInfo> _tensors{};
-};
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_IGPUTEMPLATECOMPONENTWRITER */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp
deleted file mode 100644
index c165fb5f33..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp
+++ /dev/null
@@ -1,181 +0,0 @@
-/*
- * Copyright (c) 2022-2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "ClTemplateActivation.h"
-
-#include "arm_compute/core/utils/ActivationFunctionUtils.h"
-#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
-#include "arm_compute/core/utils/StringUtils.h"
-
-#include "src/core/helpers/WindowHelpers.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
-#include "support/StringSupport.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-ClTemplateActivation::ClTemplateActivation(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes)
- : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}, _attributes{attributes}
-{
- _src = this->tensors().get_const_tensor(TensorType::ACL_SRC);
- _dst = this->tensors().get_const_tensor(TensorType::ACL_DST);
- ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
-}
-
-std::string ClTemplateActivation::get_name() const
-{
- return "activation";
-}
-
-std::string ClTemplateActivation::get_component_code(const ComponentGroup &comp_group) const
-{
- std::string code;
- const bool is_root = (comp_group.get_root_component()->id() == this->id());
-
- code = R"_(
-//------------------ START KERNEL {{meta_kernel_id}} ---------------------
-)_";
- if (is_root)
- {
- code += R"_(
-// IN(src) {{src}}
-// OUT(dst, accum) {{dst}}
-
-TILE({{DATA_TYPE}}, M0, N0, {{src}});
-TILE(uint, M0, 1, g_dst_indirect_y);
-{
- {{src}}_offset_first_element_in_bytes += g_ind_2 * {{src}}_stride_z;
-
- T_LOAD({{DATA_TYPE}}, M0, N0, {{TENSOR_TYPE}}, {{src}}, g_ind_0, g_ind_1, 1, {{src}}_stride_y, {{src}});
-
- T_ACTIVATION({{DATA_TYPE}}, M0, N0, {{ACT}}, {{A_VAL}}, {{B_VAL}}, {{src}}, {{dst}});
-}
-
-LOOP_UNROLLING(int, i, 0, 1, M0,
-{
- g_dst_indirect_y[i].v = (uint)min((int)(g_ind_1 + i), (int)({{arg_dst}}_w) - 1);
- g_dst_indirect_y[i].v += (int)(g_ind_2 % {{arg_dst}}_h) * (int)({{arg_dst}}_w);
- g_dst_indirect_y[i].v += (int)(g_ind_2 / {{arg_dst}}_h) * (int)({{arg_dst}}_w * {{arg_dst}}_h);
-})
-)_";
- }
- else
- {
- code += R"_(
-// IN/OUT(src, accum) {{src}}
-
-{
- T_ACTIVATION({{DATA_TYPE}}, M0, N0, {{ACT}}, {{A_VAL}}, {{B_VAL}}, {{src}}, {{dst}});
-}
-)_";
- }
- code += R"_(
-//------------------ END KERNEL {{meta_kernel_id}} ---------------------
-)_";
- return code;
-}
-
-void ClTemplateActivation::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
-{
- vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "src");
-
- vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "dst");
-}
-
-TagLUT ClTemplateActivation::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
-{
- ARM_COMPUTE_UNUSED(comp_group);
-
- TagLUT lut{};
- // Arguments and global shared variables
- lut["src"] = vtable.get_variable(_src);
- lut["dst"] = vtable.get_variable(_dst);
-
- const auto dst_argument = vtable.get_variable(comp_group.get_any_dst_tensor());
- lut["arg_dst"] = dst_argument.uniq_name;
-
- // Local build options
- lut["meta_kernel_id"] = id();
- lut["DATA_TYPE"] = get_cl_type_from_data_type(_src->data_type());
- lut["TENSOR_TYPE"] = "BUFFER";
-
- const auto f_act = lower_string(string_from_activation_func(_attributes.activation()));
-
- lut["ACT"] = f_act;
- lut["A_VAL"] = float_to_string_with_full_precision(_attributes.a());
- lut["B_VAL"] = float_to_string_with_full_precision(_attributes.b());
-
- return lut;
-}
-
-CLBuildOptions ClTemplateActivation::get_build_options(const ComponentGroup &comp_group) const
-{
- /// NOTE: For now tile sizes (n0, m0) are set by the execution window. This may change in the future
- const auto root_window = comp_group.get_root_component()->template_writer()->get_window();
- const unsigned int n0 = root_window.x().step();
- const unsigned int m0 = root_window.y().step();
- const unsigned int partial_store_n0 = _dst->dimension(0) % n0;
-
- CLBuildOptions build_opts;
- build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
- build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
- build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0));
-
- return build_opts;
-}
-
-std::string ClTemplateActivation::get_config_id() const
-{
- std::string config_id{};
- config_id += "activation_";
- config_id += lower_string(string_from_data_type(_src->data_type()));
- config_id += "_";
- config_id += support::cpp11::to_string(_src->dimension(0));
- config_id += "_";
- config_id += support::cpp11::to_string(_src->dimension(1));
- return config_id;
-}
-
-std::set<std::string> ClTemplateActivation::get_headers_list() const
-{
- return std::set<std::string>{"helpers.h", "tile_helpers.h", "activation_float_helpers.h"};
-}
-
-Window ClTemplateActivation::get_window() const
-{
- ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
- const unsigned int n0 = adjust_vec_size(16 / _dst->element_size(), _dst->dimension(0));
- Window win = calculate_max_window(*_dst, Steps(n0));
- return win.collapse(win, Window::DimZ);
-}
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h
deleted file mode 100644
index 88ee370342..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright (c) 2022-2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEACTIVATION
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEACTIVATION
-
-#include "arm_compute/core/experimental/Types.h"
-#include "arm_compute/function_info/ActivationLayerInfo.h"
-
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-class ClTemplateActivation final : public IGpuTemplateComponentWriter
-{
-public:
- using Attributes = ClComponentActivation::Attributes;
-
- /** Constructor
- *
- * @param[in] id Component id
- * @param[in] tensors Tensor arguments to the components
- * @param[in] attributes Component attributes
- */
- ClTemplateActivation(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
-
- /** Destructor */
- ~ClTemplateActivation() override = default;
-
- /** Prevent instances of this class from being copy constructed */
- ClTemplateActivation(const ClTemplateActivation &activation) = delete;
-
- /** Prevent instances of this class from being copied */
- ClTemplateActivation &operator=(const ClTemplateActivation &activation) = delete;
-
- /** Allow instances of this class to be move constructed */
- ClTemplateActivation(ClTemplateActivation &&activation) = default;
-
- /** Allow instances of this class to be moved */
- ClTemplateActivation &operator=(ClTemplateActivation &&activation) = default;
-
- /** Generate kernel component name */
- std::string get_name() const override;
-
- /** Generate kernel component code template
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return std::string Component code
- */
- std::string get_component_code(const ComponentGroup &comp_group) const override;
-
- /** Declare all variables used by the component in the @p vtable
- *
- * @param[out] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- */
- void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
-
- /** Generate the tag look-up table used to instantiate the component code.
- *
- * @param[in] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return TagLUT Tag lookup table
- */
- TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
-
- /** Generate the build options used in the component
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return CLBuildOptions Build options
- */
- CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override;
-
- /** Generate the component config id string used for tuning */
- std::string get_config_id() const override;
-
- /** Generate the header list used in the component */
- std::set<std::string> get_headers_list() const override;
-
- /** Generate the execution window for the component */
- Window get_window() const override;
-
-private:
- const ITensorInfo *_src;
- const ITensorInfo *_dst;
- Attributes _attributes;
-};
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEACTIVATION */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp
deleted file mode 100644
index 0da3a73801..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * Copyright (c) 2022-2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "ClTemplateCast.h"
-
-#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
-#include "arm_compute/core/utils/StringUtils.h"
-
-#include "src/core/helpers/WindowHelpers.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-ClTemplateCast::ClTemplateCast(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes)
- : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}, _attributes{attributes}
-{
- _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
- _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
-
- ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
-}
-
-std::string ClTemplateCast::get_name() const
-{
- const size_t src_size = data_size_from_type(_src->data_type());
- const size_t dst_size = data_size_from_type(_dst->data_type());
-
- return (src_size >= dst_size) ? "cast_down" : "cast_up";
-}
-
-std::string ClTemplateCast::get_component_code(const ComponentGroup &comp_group) const
-{
- ARM_COMPUTE_UNUSED(comp_group);
-
- const std::string kernel_name = get_name();
- const auto is_root = (comp_group.get_root_component()->id() == this->id());
-
- std::string code = R"_(
-//------------------ START KERNEL {{meta_kernel_id}} CAST ---------------------
-)_";
-
- if (is_root)
- {
- code += R"_(
-// IN_0(src) {{src}}
-// OUT(dst, accum) {{dst}}
-
-TILE(uint, M0, 1, g_dst_indirect_y);
-{
- {{src}}_offset_first_element_in_bytes += get_global_id(2) * {{src}}_stride_z;
-
- TILE({{DATA_TYPE_IN}}, M0, N0, {{tmp}});
- T_LOAD({{DATA_TYPE_IN}}, M0, N0, BUFFER, {{src}}, g_ind_0, g_ind_1, 1, {{src}}_stride_y, {{tmp}});
-)_";
- }
-
- code += R"_(
- LOOP_UNROLLING(int, m0, 0, 1, M0,
- {
-)_";
-
- if (kernel_name == "cast_down" && is_data_type_quantized(_src->data_type()))
- {
- code += R"_(
- {{tmp}}[m0].v ^= (VEC_DATA_TYPE({{DATA_TYPE_IN}}, N0))0x80;
-)_";
- }
-
- if (kernel_name == "cast_down" &&
- (is_data_type_float(_src->data_type()) || _attributes.convert_policy() == ConvertPolicy::SATURATE))
- {
- code += R"_(
- {{dst}}[m0].v = CONVERT_SAT({{tmp}}[m0].v, VEC_DATA_TYPE({{DATA_TYPE_OUT}}, N0));
-)_";
- }
- else
- {
- code += R"_(
- {{dst}}[m0].v = CONVERT({{tmp}}[m0].v, VEC_DATA_TYPE({{DATA_TYPE_OUT}}, N0));
-)_";
- }
-
- code += R"_(
- })
-)_";
-
- if (is_root)
- {
- code += R"_(
- LOOP_UNROLLING(int, i, 0, 1, M0,
- {
- g_dst_indirect_y[i].v = (uint)min((int)(g_ind_1 + i), (int)({{arg_dst}}_w) - 1);
- g_dst_indirect_y[i].v += (int)(g_ind_2 % {{arg_dst}}_h) * (int)({{arg_dst}}_w);
- g_dst_indirect_y[i].v += (int)(g_ind_2 / {{arg_dst}}_h) * (int)({{arg_dst}}_w * {{arg_dst}}_h);
- })
-}
-)_";
- }
-
- code += R"_(
-//------------------ END KERNEL {{meta_kernel_id}} CAST ---------------------
-)_";
-
- return code;
-}
-
-void ClTemplateCast::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
-{
- vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "src");
-
- vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "dst");
-}
-
-TagLUT ClTemplateCast::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
-{
- const auto is_root = (comp_group.get_root_component()->id() == this->id());
-
- TagLUT lut{};
-
- // Arguments and global shared variables
- lut["src"] = vtable.get_variable(_src);
- lut["dst"] = vtable.get_variable(_dst);
- lut["tmp"] = (is_root) ? lut["src"].value + "_in_data" : lut["src"];
-
- const auto dst_argument = vtable.get_variable(comp_group.get_any_dst_tensor());
- lut["arg_dst"] = dst_argument.uniq_name;
-
- // Local build options
- lut["meta_kernel_id"] = id();
-
- lut["DATA_TYPE_IN"] = get_cl_type_from_data_type(_src->data_type());
- lut["DATA_TYPE_OUT"] = get_cl_type_from_data_type(_dst->data_type());
-
- return lut;
-}
-
-CLBuildOptions ClTemplateCast::get_build_options(const ComponentGroup &comp_group) const
-{
- ARM_COMPUTE_UNUSED(comp_group);
-
- const auto root_window = comp_group.get_root_component()->template_writer()->get_window();
- const unsigned int n0 = root_window.x().step();
- const unsigned int m0 = root_window.y().step();
-
- // Set build options
- CLBuildOptions build_opts{};
- build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
- build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(_src->dimension(0) % n0));
- build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
-
- return build_opts;
-}
-
-std::string ClTemplateCast::get_config_id() const
-{
- std::string config_id{};
-
- config_id += "_";
- config_id += lower_string(string_from_data_type(_src->data_type()));
- config_id += "_";
- config_id += lower_string(string_from_data_type(_dst->data_type()));
- config_id += "_";
- config_id += support::cpp11::to_string(_src->dimension(0));
- config_id += "_";
- config_id += support::cpp11::to_string(_src->dimension(1));
-
- return config_id;
-}
-
-std::set<std::string> ClTemplateCast::get_headers_list() const
-{
- return std::set<std::string>{"helpers.h", "tile_helpers.h"};
-}
-
-Window ClTemplateCast::get_window() const
-{
- ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
-
- const unsigned int n0 = adjust_vec_size(16 / _dst->element_size(), _dst->dimension(0));
- Window win = calculate_max_window(*_dst, Steps(n0));
- return win.collapse(win, Window::DimZ);
-}
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.h
deleted file mode 100644
index 3adca4edc9..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATECAST
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATECAST
-
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-class ClTemplateCast final : public IGpuTemplateComponentWriter
-{
-public:
- using Attributes = ClComponentCast::Attributes;
-
- /** Constructor
- *
- * @param[in] id Component id
- * @param[in] tensors Tensor arguments to the components
- * @param[in] attributes Component attributes
- */
- ClTemplateCast(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
- /** Prevent instances of this class from being copy constructed */
- ClTemplateCast(const ClTemplateCast &cast) = delete;
- /** Prevent instances of this class from being copied */
- ClTemplateCast &operator=(const ClTemplateCast &cast) = delete;
- /** Allow instances of this class to be move constructed */
- ClTemplateCast(ClTemplateCast &&cast) = default;
- /** Allow instances of this class to be moved */
- ClTemplateCast &operator=(ClTemplateCast &&cast) = default;
- /** Generate kernel component name */
- std::string get_name() const override;
- /** Generate kernel component code template
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return std::string Component code
- */
- std::string get_component_code(const ComponentGroup &comp_group) const override;
- /** Declare all variables used by the component in the @p vtable
- *
- * @param[out] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- */
- void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
- /** Generate the tag look-up table used to instantiate the component code.
- *
- * @param[in] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return TagLUT Tag lookup table
- */
- TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
- /** Generate the build options used in the component
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return CLBuildOptions Build options
- */
- CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override;
- /** Generate the component config id string used for tuning */
- std::string get_config_id() const override;
- /** Generate the header list used in the component */
- std::set<std::string> get_headers_list() const override;
- /** Generate the execution window for the component */
- Window get_window() const override;
-
-private:
- const ITensorInfo *_src;
- const ITensorInfo *_dst;
- Attributes _attributes;
-};
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATECAST */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp
deleted file mode 100644
index 8380620ab2..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp
+++ /dev/null
@@ -1,364 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "ClTemplateDepthwiseConv2d.h"
-
-#include "src/core/helpers/WindowHelpers.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-ClTemplateDepthwiseConv2d::ClTemplateDepthwiseConv2d(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes,
- const Settings &settings)
- : IGpuTemplateComponentWriter{id, tensors},
- _src{},
- _weight{},
- _bias{},
- _dst{},
- _attributes{attributes},
- _settings{settings}
-{
- _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
- _weight = this->tensors().get_const_tensor(TensorType::ACL_SRC_1);
- if (this->tensors().get_const_tensor(TensorType::ACL_SRC_2))
- {
- _bias = this->tensors().get_const_tensor(TensorType::ACL_SRC_2);
- }
- _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
- ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _weight, _dst);
-}
-
-std::string ClTemplateDepthwiseConv2d::get_name() const
-{
- return "depthwise_conv2d";
-}
-
-std::string ClTemplateDepthwiseConv2d::get_component_code(const ComponentGroup &comp_group) const
-{
- ARM_COMPUTE_UNUSED(comp_group);
-
- constexpr int height_idx = 2; // Data Layout is NHWC
-
- std::string code = R"_(
-//------------------ START KERNEL {{meta_kernel_id}} ---------------------
-// IN_0(src) {{src}}
-// IN_1(wei) {{weight}}
-)_";
-
- if (_bias != nullptr && _bias->has_valid_id())
- {
- code += R"_(
-// IN_1(bia) {{bias}}
-)_";
- }
-
- code += R"_(
-// OUT(dst, accum) {{dst}}
-
-TILE(uint, M0, 1, g_dst_indirect_y);
-
-{
-#define _IWEI_WIDTH {{WEI_WIDTH}}
-#define _IWEI_HEIGHT {{WEI_HEIGHT}}
-#define _IDST_WIDTH {{arg_dst}}_w
-#define _IDST_HEIGHT {{arg_dst}}_h
-#define _IM0_A M0_A
-#define _IN0_A N0_A
-#define _IM0_B _IWEI_WIDTH
-#define _IN0_B N0
-#define _IBOUNDARY_CHECK (!((_IWEI_WIDTH == 1 && _IWEI_HEIGHT == 1 && {{PAD_LEFT}} == 0 && {{PAD_TOP}} == 0 && M0 == 1)))
-)_";
-
- code += R"_(
- const int yo = g_ind_2 % {{arg_dst}}_h;
- const int bout = g_ind_2 / {{arg_dst}}_h;
-)_";
-
- code += R"_(
-
- int xi = g_ind_1 * {{STRIDE_X}};
- int yi = yo * {{STRIDE_Y}};
- xi -= {{PAD_LEFT}};
- yi -= {{PAD_TOP}};
-
- LOOP_UNROLLING(int, i, 0, 1, M0,
- {
- {{dst}}[i].v = 0;
- })
-)_";
-
- if (_weight->dimension(height_idx) < 5)
- {
- code += R"_(
- LOOP_UNROLLING(int, yk, 0, 1, _IWEI_HEIGHT,
-)_";
- }
- else
- {
- code += R"_(
- for(int yk = 0; yk < _IWEI_HEIGHT; ++yk)
-)_";
- }
-
- code += R"_(
- {
- TILE({{SRC_DATA_TYPE}}, _IM0_A, _IN0_A, a);
-
- LOOP_UNROLLING(int, i, 0, 1, _IM0_A,
- {
- a[i].v = 0;
- })
-
- T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, _IM0_A, _IN0_A, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi + yk * {{DILATION_Y}}, xi, (g_ind_0 / {{DEPTH_MULTIPLIER}}), {{src}}_w, {{src}}_h, {{DILATION_X}}, 1, _IBOUNDARY_CHECK, a);
-
- TILE({{WEI_DATA_TYPE}}, _IM0_B, _IN0_B, b);
-
- T_LOAD({{WEI_DATA_TYPE}}, _IM0_B, _IN0_B, {{WEI_TENSOR_TYPE}}, {{weight}}, g_ind_0, yk * _IM0_B, 1, {{weight}}_stride_y, b);
-
- LOOP_UNROLLING(int, m0, 0, 1, M0,
- {
- LOOP_UNROLLING(int, xk, 0, 1, _IWEI_WIDTH,
- {
-)_";
-
- if (!_settings.is_fma_available())
- {
- code += R"_(
- {{dst}}[m0].v += a[xk + m0].v * b[xk].v;
-)_";
- }
- else
- {
- code += R"_(
- {{dst}}[m0].v = fma(a[xk + m0].v, b[xk].v, {{dst}}[m0].v);
-)_";
- }
-
- code += R"_(
- })
- })
- }
-)_";
-
- if (_weight->dimension(height_idx) < 5)
- {
- code += R"_(
- )
-)_";
- }
-
- if (_bias && _bias->has_valid_id())
- {
- code += R"_(
- TILE({{BIA_DATA_TYPE}}, 1, N0, {{bias}});
-
- T_LOAD({{BIA_DATA_TYPE}}, 1, N0, BUFFER, {{bias}}, g_ind_0, 0, 0, 0, {{bias}});
-
- T_ELTWISE_BROADCAST_ADD_X({{ACC_DATA_TYPE}}, M0, N0, {{dst}}, {{bias}}, {{dst}});
-)_";
- }
-
- code += R"_(
- LOOP_UNROLLING(int, i, 0, 1, M0,
- {
- g_dst_indirect_y[i].v = (uint)min((int)(g_ind_1 + i), (int)({{arg_dst}}_w) - 1);
- g_dst_indirect_y[i].v += (int)(g_ind_2 % {{arg_dst}}_h) * (int)({{arg_dst}}_w);
- g_dst_indirect_y[i].v += (int)(g_ind_2 / {{arg_dst}}_h) * (int)({{arg_dst}}_w * {{arg_dst}}_h);
- })
-}
-//------------------ END KERNEL {{meta_kernel_id}} ---------------------
-)_";
-
- return code;
-}
-
-void ClTemplateDepthwiseConv2d::declare_variables(GpuKernelVariableTable &vtable,
- const ComponentGroup &comp_group) const
-{
- const GpuKernelArgumentInfo::Type input_type = _settings.export_input_to_cl_image()
- ? GpuKernelArgumentInfo::Type::Tensor_4D_t_Image
- : GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer;
-
- vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(input_type), "src");
-
- const GpuKernelArgumentInfo::Type weight_type = _settings.export_weights_to_cl_image()
- ? GpuKernelArgumentInfo::Type::Tensor_4D_t_Image
- : GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer;
-
- vtable.declare_variable(comp_group, _weight, GpuKernelArgumentInfo(weight_type), "weight");
-
- if (_bias != nullptr && _bias->has_valid_id()) // optional bias
- {
- vtable.declare_variable(comp_group, _bias, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Vector), "bias");
- }
- vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "dst");
-}
-
-TagLUT ClTemplateDepthwiseConv2d::get_tag_lut(const GpuKernelVariableTable &vtable,
- const ComponentGroup &comp_group) const
-{
- TagLUT lut{};
-
- // Arguments and global shared variables
- lut["src"] = vtable.get_variable(_src);
- lut["weight"] = vtable.get_variable(_weight);
-
- if (_bias != nullptr && _bias->has_valid_id()) // optional bias
- {
- lut["bias"] = vtable.get_variable(_bias);
- lut["BIA_DATA_TYPE"] = get_cl_type_from_data_type(_bias->data_type());
- }
- lut["dst"] = vtable.get_variable(_dst);
-
- const auto dst_argument = vtable.get_variable(comp_group.get_any_dst_tensor());
- lut["arg_dst"] = dst_argument.uniq_name;
-
- // Local build options
- lut["meta_kernel_id"] = id();
- lut["ACC_DATA_TYPE"] = _src->data_type();
- lut["SRC_DATA_TYPE"] = _src->data_type();
- lut["WEI_DATA_TYPE"] = _weight->data_type();
-
- switch (vtable.get_variable(_src).kernel_argument_info.type)
- {
- case GpuKernelArgumentInfo::Type::Image_Export_To_ClImage2D:
- case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D:
- case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image:
- lut["SRC_TENSOR_TYPE"] = "IMAGE";
- break;
- default:
- lut["SRC_TENSOR_TYPE"] = "BUFFER";
- break;
- }
-
- switch (vtable.get_variable(_weight).kernel_argument_info.type)
- {
- case GpuKernelArgumentInfo::Type::Image_Export_To_ClImage2D:
- case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D:
- case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image:
- lut["WEI_TENSOR_TYPE"] = "IMAGE";
- break;
- default:
- lut["WEI_TENSOR_TYPE"] = "BUFFER";
- break;
- }
-
- // Data Layout is NHWC
- constexpr int width_idx = 1;
- constexpr int height_idx = 2;
-
- lut["WEI_WIDTH"] = _weight->dimension(width_idx);
- lut["WEI_HEIGHT"] = _weight->dimension(height_idx);
-
- lut["STRIDE_X"] = _attributes.stride().x();
- lut["STRIDE_Y"] = _attributes.stride().y();
-
- lut["PAD_LEFT"] = _attributes.pad().left;
- lut["PAD_TOP"] = _attributes.pad().top;
-
- lut["DILATION_X"] = _attributes.dilation().x();
- lut["DILATION_Y"] = _attributes.dilation().y();
-
- lut["DEPTH_MULTIPLIER"] = _attributes.depth_multiplier();
-
- return lut;
-}
-
-CLBuildOptions ClTemplateDepthwiseConv2d::get_build_options(const ComponentGroup &comp_group) const
-{
- ARM_COMPUTE_UNUSED(comp_group);
-
- constexpr unsigned int width_idx = 1; // Data Layout is NHWC
-
- const unsigned int n0 = _settings.n0();
- const unsigned int m0 = _settings.m0();
- const unsigned int m0_a = _weight->dimension(width_idx) + m0 - 1;
- const unsigned int n0_a = _attributes.depth_multiplier() > 1 ? 1 : n0;
- const unsigned int partial_store_n0 = _dst->dimension(0) % n0;
-
- CLBuildOptions build_opts{};
-
- if (_settings.fast_relaxed_math())
- {
- build_opts.add_option("-cl-fast-relaxed-math");
- }
- else
- {
- // -cl-fast-relaxed-math also sets -cl-finite-math-only and -cl-unsafe-math-optimizations
- // to disable -cl-finite-math-only, we only include -cl-unsafe-math-optimizations
- build_opts.add_option("-cl-unsafe-math-optimizations");
- }
-
- build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
- build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
- build_opts.add_option("-DN0_A=" + support::cpp11::to_string(n0_a));
- build_opts.add_option("-DM0_A=" + support::cpp11::to_string(m0_a));
- build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0));
-
- return build_opts;
-}
-
-std::string ClTemplateDepthwiseConv2d::get_config_id() const
-{
- std::string config_id{};
-
- config_id += support::cpp11::to_string(_src->dimension(0));
- config_id += "_";
- config_id += support::cpp11::to_string(_src->dimension(1));
- config_id += "_";
- config_id += support::cpp11::to_string(_src->dimension(2));
- config_id += "_";
- config_id += support::cpp11::to_string(_dst->dimension(0));
- config_id += "_";
- config_id += support::cpp11::to_string(_dst->dimension(1));
- config_id += "_";
- config_id += support::cpp11::to_string(_dst->dimension(2));
- config_id += "_";
- config_id += string_from_data_type(_src->data_type());
-
- return config_id;
-}
-
-std::set<std::string> ClTemplateDepthwiseConv2d::get_headers_list() const
-{
- return std::set<std::string>{"helpers.h", "tile_helpers.h"};
-}
-
-Window ClTemplateDepthwiseConv2d::get_window() const
-{
- ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
-
- Window win = calculate_max_window(*_dst, Steps(_settings.n0(), _settings.m0()));
- return win.collapse(win, Window::DimZ);
-}
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h
deleted file mode 100644
index 5d04c687c3..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDEPTHWISECONV2D
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDEPTHWISECONV2D
-
-#include "arm_compute/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.h"
-
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-class ClTemplateDepthwiseConv2d final : public IGpuTemplateComponentWriter
-{
-public:
- using Attributes = ClComponentDepthwiseConv2d::Attributes;
- using Settings = ClComponentDepthwiseConv2d::Settings;
- /** Constructor
- *
- * Similar to @ref ClComponentDepthwiseConv2d::validate()
- *
- * @param[in] id Component id
- * @param[in] tensors Tensor arguments to the components
- * @param[in] attributes Component attributes
- * @param[in] settings Component settings
- */
- ClTemplateDepthwiseConv2d(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes,
- const Settings &settings);
- /** Prevent instances of this class from being copy constructed */
- ClTemplateDepthwiseConv2d(const ClTemplateDepthwiseConv2d &depthwise_conv2d) = delete;
- /** Prevent instances of this class from being copied */
- ClTemplateDepthwiseConv2d &operator=(const ClTemplateDepthwiseConv2d &depthwise_conv2d) = delete;
- /** Allow instances of this class to be move constructed */
- ClTemplateDepthwiseConv2d(ClTemplateDepthwiseConv2d &&depthwise_conv2d) = default;
- /** Allow instances of this class to be moved */
- ClTemplateDepthwiseConv2d &operator=(ClTemplateDepthwiseConv2d &&depthwise_conv2d) = default;
- /** Generate kernel component name */
- std::string get_name() const override;
- /** Generate kernel component code template
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return std::string Component code
- */
- std::string get_component_code(const ComponentGroup &comp_group) const override;
- /** Declare all variables used by the component in the @p vtable
- *
- * @param[out] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- */
- void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
- /** Generate the tag look-up table used to instantiate the component code.
- *
- * @param[in] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return TagLUT Tag lookup table
- */
- TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
- /** Generate the build options used in the component
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return CLBuildOptions Build options
- */
- CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override;
- /** Generate the component config id string used for tuning */
- std::string get_config_id() const override;
- /** Generate the header list used in the component */
- std::set<std::string> get_headers_list() const override;
- /** Generate the execution window for the component */
- Window get_window() const override;
-
-private:
- const ITensorInfo *_src;
- const ITensorInfo *_weight;
- const ITensorInfo *_bias;
- const ITensorInfo *_dst;
- Attributes _attributes;
- Settings _settings;
-};
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDEPTHWISECONV2D */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp
deleted file mode 100644
index f6a7a58d1d..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp
+++ /dev/null
@@ -1,393 +0,0 @@
-/*
- * Copyright (c) 2022-2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "ClTemplateDirectConv2d.h"
-
-#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/core/utils/StringUtils.h"
-
-#include "src/core/helpers/WindowHelpers.h"
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
-#include "support/StringSupport.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-ClTemplateDirectConv2d::ClTemplateDirectConv2d(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes,
- const Settings &settings)
- : IGpuTemplateComponentWriter{id, tensors},
- _src{},
- _weight{},
- _bias{},
- _dst{},
- _attributes{attributes},
- _settings{settings}
-{
- _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
- _weight = this->tensors().get_const_tensor(TensorType::ACL_SRC_1);
- if (this->tensors().get_const_tensor(TensorType::ACL_SRC_2))
- {
- _bias = this->tensors().get_const_tensor(TensorType::ACL_SRC_2);
- }
- _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
- ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _weight, _dst);
-}
-
-std::string ClTemplateDirectConv2d::get_name() const
-{
- return "direct_conv2d";
-}
-
-std::string ClTemplateDirectConv2d::get_component_code(const ComponentGroup &comp_group) const
-{
- ARM_COMPUTE_UNUSED(comp_group);
-
- const auto channel_idx = get_data_layout_dimension_index(_src->data_layout(), DataLayoutDimension::CHANNEL);
- const auto k0 = adjust_vec_size(_settings.direct_conv_descriptor().k0, _src->dimension(channel_idx));
- const bool leftover_loop = (_src->dimension(channel_idx) % k0) != 0;
-
- std::string code = R"_(
-//------------------ START KERNEL {{meta_kernel_id}} ---------------------
-// IN_0(src) {{src}}
-// IN_1(wei) {{weight}}
-)_";
- if (_bias && _bias->has_valid_id())
- {
- code += R"_(
-// IN_1(bia) {{bias}}
-)_";
- }
- code += R"_(
-// OUT(dst, accum) {{dst}}
-
-TILE(uint, M0, 1, g_dst_indirect_y);
-
-{
-#define _IWEI_WIDTH {{WEI_WIDTH}}
-#define _IWEI_HEIGHT {{WEI_HEIGHT}}
-#define _ISRC_WIDTH {{SRC_WIDTH}}
-#define _ISRC_HEIGHT {{SRC_HEIGHT}}
-#define _ISRC_CHANNELS {{SRC_CHANNELS}}
-#define _IDST_WIDTH {{DST_WIDTH}}
-#define _IDST_HEIGHT {{DST_HEIGHT}}
-#define _IDST_CHANNELS {{DST_CHANNELS}}
-#define _IY_MULTIPLIER (_IWEI_WIDTH * _IWEI_HEIGHT)
-
- TILE(int, M0, 1, xi);
- TILE(int, M0, 1, yi);
-
- // Convert the linear index to coordinate
- LOOP_UNROLLING(int, i, 0, 1, M0,
- {
- xi[0].s[i] = ((g_ind_1 + i) % _IDST_WIDTH) * {{STRIDE_X}};
- yi[0].s[i] = ((g_ind_1 + i) / _IDST_WIDTH) * {{STRIDE_Y}};
- xi[0].s[i] -= {{PAD_LEFT}};
- yi[0].s[i] -= {{PAD_TOP}};
- })
-
- LOOP_UNROLLING(int, i, 0, 1, M0,
- {
- {{dst}}[i].v = 0;
- })
-
- for(int i = 0; i < (_IWEI_WIDTH * _IWEI_HEIGHT); ++i)
- {
- int xk = i % _IWEI_WIDTH;
- int yk = i / _IWEI_WIDTH;
-
- TILE(int, 1, M0, my);
-
- LOOP_UNROLLING(int, i, 0, 1, M0,
- {
- int x_s = xi[0].s[i] + xk;
- int y_s = yi[0].s[i] + yk;
- my[0].s[i] = x_s + y_s *_ISRC_WIDTH;
- my[0].s[i] = my[0].s[i] + g_ind_2 * (int)(_ISRC_WIDTH * _ISRC_HEIGHT);
- my[0].s[i] = select(-1, my[0].s[i], x_s >= 0);
- my[0].s[i] = select(-1, my[0].s[i], x_s < _ISRC_WIDTH);
- my[0].s[i] = select(-1, my[0].s[i], y_s >= 0);
- my[0].s[i] = select(-1, my[0].s[i], y_s < _ISRC_HEIGHT);
- })
-
- int ck = 0;
- for(; ck <= (_ISRC_CHANNELS - K0); ck += K0)
- {
- TILE({{SRC_DATA_TYPE}}, M0, K0, a);
- TILE({{WEI_DATA_TYPE}}, N0, K0, b);
-
- LOOP_UNROLLING(int, i, 0, 1, M0,
- {
- a[i].v = {{ZERO_VALUE}};
- })
-
- LOOP_UNROLLING(int, i, 0, 1, N0,
- {
- b[i].v = {{ZERO_VALUE}};
- })
-
- T_LOAD2D_INDIRECT({{SRC_DATA_TYPE}}, M0, K0, {{SRC_TENSOR_TYPE}}, {{src}}, ck, {{src}}_stride_y, my, a);
-
- T_LOAD({{WEI_DATA_TYPE}}, N0, K0, {{WEI_TENSOR_TYPE}}, {{weight}}, ck, g_ind_0 * _IY_MULTIPLIER + i, _IY_MULTIPLIER, {{weight}}_stride_y, b);
-
- T_MMUL({{SRC_DATA_TYPE}}, {{WEI_DATA_TYPE}}, {{ACC_DATA_TYPE}}, M0, N0, K0, NT, T, a, b, {{dst}});
- }
-)_";
-
- if (leftover_loop)
- {
- code += R"_(
- for(; ck < _ISRC_CHANNELS; ++ck)
- {
- TILE({{SRC_DATA_TYPE}}, M0, 1, a);
- TILE({{WEI_DATA_TYPE}}, N0, 1, b);
-
- LOOP_UNROLLING(int, i, 0, 1, M0,
- {
- a[i].v = {{ZERO_VALUE}};
- })
-
- LOOP_UNROLLING(int, i, 0, 1, N0,
- {
- b[i].v = {{ZERO_VALUE}};
- })
-
- T_LOAD2D_INDIRECT({{SRC_DATA_TYPE}}, M0, 1, {{SRC_TENSOR_TYPE}}, {{src}}, ck, {{src}}_stride_y, my, a);
-
- T_LOAD({{WEI_DATA_TYPE}}, N0, 1, BUFFER, {{weight}}, ck, g_ind_0 * _IY_MULTIPLIER + i, _IY_MULTIPLIER, {{weight}}_stride_y, b);
-
- T_MMUL({{SRC_DATA_TYPE}}, {{WEI_DATA_TYPE}}, {{ACC_DATA_TYPE}}, M0, N0, 1, NT, T, a, b, {{dst}});
- }
- )_";
- }
-
- code += R"_(
-#undef _I_WEI_WIDTH
-#undef _I_WEI_HEIGHT
-#undef _ISRC_WIDTH
-#undef _ISRC_HEIGHT
-#undef _ISRC_CHANNELS
-#undef _IDST_WIDTH
-#undef _IDST_HEIGHT
-#undef _IDST_CHANNELS
-#undef _IY_MULTIPLIER
-
- }
-)_";
-
- if (_bias && _bias->has_valid_id())
- {
- code += R"_(
- TILE({{BIA_DATA_TYPE}}, 1, N0, bias0);
-
- T_LOAD({{BIA_DATA_TYPE}}, 1, N0, BUFFER, {{bias}}, g_ind_0, 0, 1, 0, bias0);
-
- T_ELTWISE_BROADCAST_ADD_X({{ACC_DATA_TYPE}}, M0, N0, {{dst}}, bias0, {{dst}});
- )_";
- }
-
- code += R"_(
- LOOP_UNROLLING(int, i, 0, 1, M0,
- {
- g_dst_indirect_y[i].v = (uint)min(g_ind_1 + i, (int)({{DST_WIDTH}} * {{DST_HEIGHT}}) - 1);
- g_dst_indirect_y[i].v += g_ind_2 * (int)({{DST_WIDTH}} * {{DST_HEIGHT}});
- })
-}
-//------------------ END KERNEL {{meta_kernel_id}} ---------------------
-)_";
- return code;
-}
-
-void ClTemplateDirectConv2d::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
-{
- vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "src");
-
- const GpuKernelArgumentInfo::Type weight_type = _settings.export_to_cl_image()
- ? GpuKernelArgumentInfo::Type::Tensor_4D_t_Image
- : GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer;
- vtable.declare_variable(comp_group, _weight, GpuKernelArgumentInfo(weight_type), "weight");
-
- if (_bias && _bias->has_valid_id()) // optional bias
- {
- vtable.declare_variable(comp_group, _bias, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Vector), "bias");
- }
- vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(common_tensor_type), "dst");
-}
-
-TagLUT ClTemplateDirectConv2d::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
-{
- TagLUT lut{};
- // Arguments and global shared variables
- lut["src"] = vtable.get_variable(_src);
- lut["weight"] = vtable.get_variable(_weight);
-
- if (_bias && _bias->has_valid_id()) // optional bias
- {
- lut["bias"] = vtable.get_variable(_bias);
- lut["BIA_DATA_TYPE"] = get_cl_type_from_data_type(_bias->data_type());
- }
- lut["dst"] = vtable.get_variable(_dst);
-
- const auto dst_argument = vtable.get_variable(comp_group.get_any_dst_tensor());
- lut["arg_dst"] = dst_argument.uniq_name;
-
- // Local build options
- lut["meta_kernel_id"] = id();
- lut["ACC_DATA_TYPE"] = _src->data_type();
- lut["SRC_DATA_TYPE"] = _src->data_type();
- lut["WEI_DATA_TYPE"] = _weight->data_type();
-
- lut["SRC_TENSOR_TYPE"] = "BUFFER";
- switch (vtable.get_variable(_weight).kernel_argument_info.type)
- {
- case GpuKernelArgumentInfo::Type::Image_Export_To_ClImage2D:
- case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D:
- case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image:
- {
- lut["WEI_TENSOR_TYPE"] = "IMAGE";
- break;
- }
- default:
- {
- lut["WEI_TENSOR_TYPE"] = "BUFFER";
- break;
- }
- }
- const auto width_idx = 1;
- const auto height_idx = 2;
- const auto channel_idx = 0;
-
- lut["SRC_WIDTH"] = _src->dimension(width_idx);
- lut["SRC_HEIGHT"] = _src->dimension(height_idx);
- lut["SRC_CHANNELS"] = _src->dimension(channel_idx);
-
- lut["WEI_WIDTH"] = _weight->dimension(width_idx);
- lut["WEI_HEIGHT"] = _weight->dimension(height_idx);
-
- lut["DST_WIDTH"] = _dst->dimension(width_idx);
- lut["DST_HEIGHT"] = _dst->dimension(height_idx);
- lut["DST_CHANNELS"] = _dst->dimension(channel_idx);
-
- lut["STRIDE_X"] = _attributes.stride().x();
- lut["STRIDE_Y"] = _attributes.stride().y();
-
- lut["PAD_LEFT"] = _attributes.pad().left;
- lut["PAD_TOP"] = _attributes.pad().top;
-
- lut["ZERO_VALUE"] = 0;
-
- return lut;
-}
-
-CLBuildOptions ClTemplateDirectConv2d::get_build_options(const ComponentGroup &comp_group) const
-{
- const unsigned int channel_idx = get_data_layout_dimension_index(_src->data_layout(), DataLayoutDimension::CHANNEL);
-
- const auto root_window = comp_group.get_root_component()->template_writer()->get_window();
- const unsigned int n0 = root_window.x().step();
- const unsigned int m0 = root_window.y().step();
- const unsigned int k0 = adjust_vec_size(_settings.direct_conv_descriptor().k0, _src->dimension(channel_idx));
- const unsigned int partial_store_n0 = _dst->dimension(0) % n0;
-
- CLBuildOptions build_opts{};
- if (_settings.fast_relaxed_math())
- {
- build_opts.add_option("-cl-fast-relaxed-math");
- }
- else
- {
- // -cl-fast-relaxed-math also sets -cl-finite-math-only and -cl-unsafe-math-optimizations
- // to disable -cl-finite-math-only, we only include -cl-unsafe-math-optimizations
- build_opts.add_option("-cl-unsafe-math-optimizations");
- }
-
- build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
- build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
- build_opts.add_option("-DK0=" + support::cpp11::to_string(k0));
- build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0));
-
- return build_opts;
-}
-
-std::string ClTemplateDirectConv2d::get_config_id() const
-{
- const DataType data_type = _src->data_type();
- const DataLayout data_layout = _src->data_layout();
-
- const unsigned int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
- const unsigned int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
-
- const unsigned int kernel_size = _weight->dimension(width_idx);
-
- std::string config_id{};
- config_id += lower_string(string_from_data_type(data_type));
- config_id += "_";
- config_id += support::cpp11::to_string(kernel_size);
- config_id += "_";
- config_id += support::cpp11::to_string(_attributes.stride().x());
- config_id += "_";
- config_id += support::cpp11::to_string(_attributes.stride().y());
- config_id += "_";
- config_id += support::cpp11::to_string(_dst->dimension(width_idx));
- config_id += "_";
- config_id += support::cpp11::to_string(_dst->dimension(height_idx));
- config_id += "_";
- config_id += lower_string(string_from_data_layout(data_layout));
- return config_id;
-}
-
-std::set<std::string> ClTemplateDirectConv2d::get_headers_list() const
-{
- return std::set<std::string>{"helpers.h", "tile_helpers.h"};
-}
-
-Window ClTemplateDirectConv2d::get_window() const
-{
- ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
-
- const auto output_shape = _dst->tensor_shape();
- const auto desc = _settings.direct_conv_descriptor();
-
- const unsigned int n0 = adjust_vec_size(desc.n0, output_shape[0]);
- const unsigned int m0 = adjust_vec_size(desc.m0, output_shape[1] * output_shape[2]);
-
- // Create and configure kernel window
- Window win = calculate_max_window(output_shape, Steps(n0, m0));
-
- const size_t dim_y_collapsed = ceil_to_multiple(output_shape[1] * output_shape[2], m0);
- win.set(Window::DimY, Window::Dimension(0, dim_y_collapsed, m0));
- win.set(Window::DimZ, Window::Dimension(0, output_shape.total_size_upper(3), 1));
-
- return win;
-}
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h
deleted file mode 100644
index 03c8cd2f15..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (c) 2022-2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDIRECTCONV2D
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDIRECTCONV2D
-
-#include "arm_compute/core/experimental/Types.h"
-#include "arm_compute/dynamic_fusion/sketch/attributes/Conv2dAttributes.h"
-
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-class ClTemplateDirectConv2d final : public IGpuTemplateComponentWriter
-{
-public:
- using Attributes = ClComponentDirectConv2d::Attributes;
- using Settings = ClComponentDirectConv2d::Settings;
- /** Constructor
- *
- * Similar to @ref ClComponentDirectConv2d::validate()
- *
- * @param[in] id Component id
- * @param[in] tensors Tensor arguments to the components
- * @param[in] attributes Component attributes
- * @param[in] settings Component settings
- */
- ClTemplateDirectConv2d(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes,
- const Settings &settings);
- /** Destructor */
- ~ClTemplateDirectConv2d() override = default;
- /** Prevent instances of this class from being copy constructed */
- ClTemplateDirectConv2d(const ClTemplateDirectConv2d &direct_conv2d) = delete;
- /** Prevent instances of this class from being copied */
- ClTemplateDirectConv2d &operator=(const ClTemplateDirectConv2d &direct_conv2d) = delete;
- /** Allow instances of this class to be move constructed */
- ClTemplateDirectConv2d(ClTemplateDirectConv2d &&direct_conv2d) = default;
- /** Allow instances of this class to be moved */
- ClTemplateDirectConv2d &operator=(ClTemplateDirectConv2d &&direct_conv2d) = default;
- /** Generate kernel component name */
- std::string get_name() const override;
- /** Generate kernel component code template
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return std::string Component code
- */
- std::string get_component_code(const ComponentGroup &comp_group) const override;
- /** Declare all variables used by the component in the @p vtable
- *
- * @param[out] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- */
- void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
- /** Generate the tag look-up table used to instantiate the component code.
- *
- * @param[in] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return TagLUT Tag lookup table
- */
- TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
- /** Generate the build options used in the component
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return CLBuildOptions Build options
- */
- CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override;
- /** Generate the component config id string used for tuning */
- std::string get_config_id() const override;
- /** Generate the header list used in the component */
- std::set<std::string> get_headers_list() const override;
- /** Generate the execution window for the component */
- Window get_window() const override;
-
-private:
- const ITensorInfo *_src;
- const ITensorInfo *_weight;
- const ITensorInfo *_bias;
- const ITensorInfo *_dst;
- Attributes _attributes;
- Settings _settings;
-};
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDIRECTCONV2D */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp
deleted file mode 100644
index 78bff3c3f3..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp
+++ /dev/null
@@ -1,274 +0,0 @@
-/*
- * Copyright (c) 2022-2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "ClTemplateElementwiseBinary.h"
-
-#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/core/utils/StringUtils.h"
-
-#include "src/core/helpers/WindowHelpers.h"
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
-#include "support/StringSupport.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-constexpr unsigned int vector_size_byte_opencl = 16;
-
-ClTemplateElementwiseBinary::ClTemplateElementwiseBinary(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes)
- : IGpuTemplateComponentWriter{id, tensors}, _lhs{}, _rhs{}, _dst{}, _attributes{attributes}
-{
- _lhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
- _rhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_1);
- _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
- ARM_COMPUTE_ERROR_ON_NULLPTR(_lhs, _rhs, _dst);
-}
-
-std::string ClTemplateElementwiseBinary::get_name() const
-{
- return "elementwise_binary";
-}
-
-std::string ClTemplateElementwiseBinary::get_component_code(const ComponentGroup &comp_group) const
-{
- std::string code;
- const bool is_root = (comp_group.get_root_component()->id() == this->id());
- const bool is_lhs_input = comp_group.is_input_tensor(_lhs);
- const bool is_rhs_input = comp_group.is_input_tensor(_rhs);
-
- code =
- R"_(
- //------------------ START KERNEL {{meta_kernel_id}} {{ELTWISE_OP}} ---------------------
-)_";
-
- if (is_root)
- {
- code +=
- R"_(
- TILE(uint, M0, 1, g_dst_indirect_y);
-)_";
- }
-
- if (is_lhs_input)
- {
- code +=
- R"_(
- TILE({{DATA_TYPE}}, {{lhs_m0}}, N0, {{lhs}});
-)_";
- }
-
- if (is_rhs_input)
- {
- code +=
- R"_(
- TILE({{DATA_TYPE}}, {{rhs_m0}}, N0, {{rhs}});
-)_";
- }
-
- code +=
- R"_(
- {
-)_";
-
- if (is_lhs_input)
- {
- code +=
- R"_(
- {{lhs}}_offset_first_element_in_bytes += g_ind_2 * {{lhs}}_stride_w;
- T_LOAD({{DATA_TYPE}}, {{lhs_m0}}, {{lhs_n0}}, BUFFER, {{lhs}}, {{lhs_start_ind_0}}, {{lhs_start_ind_1}}, 1, {{lhs}}_stride_y, {{lhs}});
-)_";
- }
-
- if (is_rhs_input)
- {
- code +=
- R"_(
- {{rhs}}_offset_first_element_in_bytes += g_ind_2 * {{rhs}}_stride_w;
- T_LOAD({{DATA_TYPE}}, {{rhs_m0}}, {{rhs_n0}}, BUFFER, {{rhs}}, {{rhs_start_ind_0}}, {{rhs_start_ind_1}}, 1, {{rhs}}_stride_y, {{rhs}});
-)_";
- }
-
- code +=
- R"_(
- T_ELTWISE_{{BROADCAST_OP}}{{ELTWISE_OP}}({{DATA_TYPE}}, M0, N0, {{lhs}}, {{rhs}}, {{dst}});
-)_";
-
- if (is_root)
- {
- // Calculate the destination indirect Y
- code +=
- R"_(
- LOOP_UNROLLING(int, i, 0, 1, M0,
- {
- g_dst_indirect_y[i].v = (uint)min(g_ind_1 + i, (int)({{arg_dst}}_w * {{arg_dst}}_h) - 1);
- g_dst_indirect_y[i].v += g_ind_2 * (int)({{arg_dst}}_w * {{arg_dst}}_h);
- })
-)_";
- }
-
- code +=
- R"_(
- }
- //------------------ END KERNEL {{meta_kernel_id}} {{ELTWISE_OP}} ---------------------
-)_";
-
- return code;
-}
-
-void ClTemplateElementwiseBinary::declare_variables(GpuKernelVariableTable &vtable,
- const ComponentGroup &comp_group) const
-{
- vtable.declare_variable(comp_group, _lhs, GpuKernelArgumentInfo(common_tensor_type), "lhs");
-
- vtable.declare_variable(comp_group, _rhs, GpuKernelArgumentInfo(common_tensor_type), "rhs");
-
- vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(common_tensor_type), "dst");
-}
-
-TagLUT ClTemplateElementwiseBinary::get_tag_lut(const GpuKernelVariableTable &vtable,
- const ComponentGroup &comp_group) const
-{
- TagLUT lut{};
-
- // Local build options
- lut["meta_kernel_id"] = id();
- lut["DATA_TYPE"] = get_cl_type_from_data_type(_lhs->data_type());
- // Arguments and global shared variables
-
- lut["lhs"] = vtable.get_variable(_lhs);
- lut["rhs"] = vtable.get_variable(_rhs);
- lut["dst"] = vtable.get_variable(_dst);
- lut["arg_dst"] = vtable.get_variable(comp_group.get_any_dst_tensor());
-
- switch (_attributes.operation())
- {
- case Attributes::ElementwiseOp::Add:
- lut["ELTWISE_OP"] = "ADD";
- break;
- case Attributes::ElementwiseOp::Sub:
- lut["ELTWISE_OP"] = "SUB";
- break;
- case Attributes::ElementwiseOp::Mul:
- lut["ELTWISE_OP"] = "MUL";
- break;
- default:
- ARM_COMPUTE_ERROR("Arithmetic Operation not supported");
- }
-
- ARM_COMPUTE_ERROR_ON(comp_group.is_intermediate_tensor(_lhs) &&
- detail::have_different_dimensions(_lhs->tensor_shape(), _dst->tensor_shape(), 0));
- ARM_COMPUTE_ERROR_ON(comp_group.is_intermediate_tensor(_rhs) &&
- detail::have_different_dimensions(_rhs->tensor_shape(), _dst->tensor_shape(), 0));
-
- // Set broadcast parameters
- // PRE: All tensors are broadcast-compatible
- const auto &lhs_dims = _lhs->tensor_shape();
- const auto &rhs_dims = _rhs->tensor_shape();
- const auto &dst_dims = _dst->tensor_shape();
-
- const auto lhs_broadcast_x = dst_dims[0] != 1 && lhs_dims[0] == 1;
- const auto rhs_broadcast_x = dst_dims[0] != 1 && rhs_dims[0] == 1;
- const auto lhs_broadcast_y = dst_dims[1] != 1 && lhs_dims[1] == 1;
- const auto rhs_broadcast_y = dst_dims[1] != 1 && rhs_dims[1] == 1;
- const auto lhs_broadcast_z = dst_dims[2] != 1 && lhs_dims[2] == 1;
- const auto rhs_broadcast_z = dst_dims[2] != 1 && rhs_dims[2] == 1;
-
- const auto lhs_broadcast_yz = lhs_broadcast_y && lhs_broadcast_z;
- const auto rhs_broadcast_yz = rhs_broadcast_y && rhs_broadcast_z;
-
- lut["lhs_n0"] = (lhs_broadcast_x) ? "1" : "N0";
- lut["lhs_start_ind_0"] = (lhs_broadcast_x) ? "0" : "g_ind_0";
- lut["rhs_n0"] = (rhs_broadcast_x) ? "1" : "N0";
- lut["rhs_start_ind_0"] = (rhs_broadcast_x) ? "0" : "g_ind_0";
-
- lut["lhs_m0"] = (lhs_broadcast_yz) ? "1" : "M0";
- lut["lhs_start_ind_1"] = (lhs_broadcast_yz) ? "0" : "g_ind_1";
- lut["rhs_m0"] = (rhs_broadcast_yz) ? "1" : "M0";
- lut["rhs_start_ind_1"] = (rhs_broadcast_yz) ? "0" : "g_ind_1";
-
- lut["BROADCAST_OP"] = (lhs_broadcast_yz) ? "BROADCAST_LHS_X_" : (rhs_broadcast_yz) ? "BROADCAST_RHS_X_" : "";
-
- return lut;
-}
-
-CLBuildOptions ClTemplateElementwiseBinary::get_build_options(const ComponentGroup &comp_group) const
-{
- CLBuildOptions build_opts{};
- /// NOTE: For now tile sizes (n0, m0) are set by the execution window. This may change in the future
- const auto root_window = comp_group.get_root_component()->template_writer()->get_window();
- const unsigned int n0 = root_window.x().step();
- const unsigned int m0 = root_window.y().step();
- const unsigned int partial_store_n0 = _dst->dimension(0) % n0;
-
- build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
- build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
- build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(_lhs->data_type()));
- build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0));
-
- return build_opts;
-}
-
-std::string ClTemplateElementwiseBinary::get_config_id() const
-{
- std::string config_id{};
- config_id += lower_string(string_from_data_type(_dst->data_type()));
- config_id += "_";
- config_id += support::cpp11::to_string(_dst->dimension(0));
- config_id += "_";
- config_id += support::cpp11::to_string(_dst->dimension(1));
- config_id += "_";
- config_id += lower_string(string_from_data_layout(_dst->data_layout()));
-
- return config_id;
-}
-
-std::set<std::string> ClTemplateElementwiseBinary::get_headers_list() const
-{
- return std::set<std::string>{"helpers.h", "tile_helpers.h"};
-}
-
-Window ClTemplateElementwiseBinary::get_window() const
-{
- ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
-
- TensorShape output_shape = _dst->tensor_shape();
- // Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) and upper dimensions unchanged
- // This is in line with the collapsing convention used by operators like Conv2d
- output_shape.collapse(2U, 1U);
- const unsigned int num_elems_processed_per_iteration =
- adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
- Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
-
- return win;
-}
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h
deleted file mode 100644
index 991c0eca44..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Copyright (c) 2022-2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEELEMENTWISEBINARY
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEELEMENTWISEBINARY
-
-#include "arm_compute/core/experimental/Types.h"
-
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-class ClTemplateElementwiseBinary final : public IGpuTemplateComponentWriter
-{
-public:
- using Attributes = ClComponentElementwiseBinary::Attributes;
-
- /** Constructor
- *
- * Similar to @ref ClComponentElementwiseBinary::validate()
- *
- * @param[in] id Component id
- * @param[in] tensors Tensor arguments to the components
- * @param[in] attributes Component attributes
- */
- ClTemplateElementwiseBinary(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
- /** Prevent instances of this class from being copy constructed */
- ClTemplateElementwiseBinary(const ClTemplateElementwiseBinary &elementwise) = delete;
- /** Prevent instances of this class from being copied */
- ClTemplateElementwiseBinary &operator=(const ClTemplateElementwiseBinary &elementwise) = delete;
- /** Allow instances of this class to be move constructed */
- ClTemplateElementwiseBinary(ClTemplateElementwiseBinary &&elementwise) = default;
- /** Allow instances of this class to be moved */
- ClTemplateElementwiseBinary &operator=(ClTemplateElementwiseBinary &&elementwise) = default;
-
- /** Generate kernel component name */
- std::string get_name() const override;
-
- /** Generate kernel component code template
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return std::string Component code
- */
- std::string get_component_code(const ComponentGroup &comp_group) const override;
-
- /** Declare all variables used by the component in the @p vtable
- *
- * @param[out] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- */
- void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
-
- /** Generate the tag look-up table used to instantiate the component code.
- *
- * @param[in] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return TagLUT Tag lookup table
- */
- TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
-
- /** Generate the build options used in the component
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return CLBuildOptions Build options
- */
- CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override;
-
- /** Generate the component config id string used for tuning */
- std::string get_config_id() const override;
-
- /** Generate the header list used in the component */
- std::set<std::string> get_headers_list() const override;
-
- /** Generate the execution window for the component */
- Window get_window() const override;
-
-private:
- const ITensorInfo *_lhs;
- const ITensorInfo *_rhs;
- const ITensorInfo *_dst;
- Attributes _attributes;
-};
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEELEMENTWISEBINARY */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp
deleted file mode 100644
index 522c33a022..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp
+++ /dev/null
@@ -1,267 +0,0 @@
-/*
- * Copyright (c) 2022-2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h"
-
-#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
-#include "arm_compute/core/utils/StringUtils.h"
-
-#include "src/core/helpers/WindowHelpers.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
-#include "support/StringSupport.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-namespace
-{
-constexpr unsigned int serial_vector_size = 8;
-} // namespace
-ClTemplateLogits1DMaxShiftExpSum::ClTemplateLogits1DMaxShiftExpSum(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes)
- : IGpuTemplateComponentWriter{id, tensors}, _src{}, _sum{}, _dst{}, _attributes{attributes}
-{
- _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
- _sum = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
- _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_1);
- ARM_COMPUTE_ERROR_ON_NULLPTR(_src);
- ARM_COMPUTE_ERROR_ON_NULLPTR(_sum);
- ARM_COMPUTE_ERROR_ON_NULLPTR(_dst);
-}
-
-std::string ClTemplateLogits1DMaxShiftExpSum::get_name() const
-{
- return "logits_1d_max_shift_exp_sum";
-}
-
-std::string ClTemplateLogits1DMaxShiftExpSum::get_component_code(const ComponentGroup &comp_group) const
-{
- ARM_COMPUTE_UNUSED(comp_group);
-
- std::string code = R"_(
-//------------------ START KERNEL {{meta_kernel_id}} ---------------------
-#define VEC_TYPE VEC_DATA_TYPE({{DATA_TYPE}}, N0)
-#define SELECT_TYPE SELECT_VEC_DATA_TYPE({{DATA_TYPE}}, N0)
-{
- __global uchar *src_addr = {{src}}_ptr + {{src}}_offset_first_element_in_bytes + g_ind_1 * {{src}}_stride_y + g_ind_2 * {{src}}_stride_z;
- __global uchar *dst_addr = {{dst}}_ptr + {{dst}}_offset_first_element_in_bytes + g_ind_1 * {{dst}}_stride_y + g_ind_2 * {{dst}}_stride_z;
- Image sum = CONVERT_TENSOR3D_TO_IMAGE_STRUCT({{sum}});
- VEC_TYPE max_val_vec = (VEC_TYPE)({{MINVAL}});
-)_";
-
- const bool beta_defined = (_attributes.beta() != 1.f);
-
- if (beta_defined)
- {
- code += R"_(
- VEC_TYPE beta = (VEC_TYPE){{BETA}};
-)_";
- }
-
- constexpr unsigned int _serial_vector_size = 8;
- const unsigned int reduction_dim_size = _src->dimension(0);
- const unsigned int vector_size = adjust_vec_size(_serial_vector_size, reduction_dim_size);
- const bool non_multiple_of_n0 = ((reduction_dim_size % vector_size) != 0);
-
- if (non_multiple_of_n0)
- {
- code += R"_(
- VEC_TYPE data = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)src_addr);
- SELECT_TYPE widx = (SELECT_TYPE)PARTIAL_N0 > VEC_OFFS(SELECT_DATA_TYPE({{DATA_TYPE}}), N0);
- max_val_vec = max(max_val_vec, select((VEC_TYPE)({{MINVAL}}), data, widx));
-)_";
- }
-
- code += R"_(
- for(uint i = PARTIAL_N0; i < {{SRC_WIDTH}}; i += N0)
- {
- VEC_TYPE data = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(src_addr + i * sizeof({{DATA_TYPE}})));
- max_val_vec = max(data, max_val_vec);
- }
-
- {{DATA_TYPE}} max_val = MAX_REDUCE(max_val_vec, N0);
- VEC_TYPE sum1D = 0;
-)_";
-
- if (non_multiple_of_n0)
- {
- code += R"_(
- data -= max_val;
-)_";
- if (beta_defined)
- {
- code += R"_(
- data *= beta;
-)_";
- }
-
- if (_attributes.is_log_softmax())
- {
- code += R"_(
- VSTORE_PARTIAL(N0, PARTIAL_N0)
- (data, 0, (__global {{DATA_TYPE}} *)dst_addr);
- data = exp(data);
- data = select(0, data, widx);
-)_";
- }
- else
- {
- code += R"_(
- data = exp(data);
- data = select(0, data, widx);
- VSTORE_PARTIAL(N0, PARTIAL_N0)
- (data, 0, (__global {{DATA_TYPE}} *)dst_addr);
-)_";
- }
-
- code += R"_(
- sum1D += data;
-)_";
- }
- code += R"_(
- for(uint i = PARTIAL_N0; i < {{SRC_WIDTH}}; i += N0)
- {
- VEC_TYPE data = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(src_addr + i * sizeof({{DATA_TYPE}})));
- data -= max_val;
-)_";
-
- if (beta_defined)
- {
- code += R"_(
- data *= beta;
-)_";
- }
-
- if (_attributes.is_log_softmax())
- {
- code += R"_(
- VSTORE(N0)
- (data, 0, (__global {{DATA_TYPE}} *)(dst_addr + i * sizeof({{DATA_TYPE}})));
- data = exp(data);
-)_";
- }
- else
- {
- code += R"_(
- data = exp(data);
- VSTORE(N0)
- (data, 0, (__global {{DATA_TYPE}} *)(dst_addr + i * sizeof({{DATA_TYPE}})));
-)_";
- }
-
- code += R"_(
- sum1D += data;
- }
-)_";
-
- code += R"_(
- *((__global {{DATA_TYPE}} *)sum.ptr) = SUM_REDUCE(sum1D, N0);
-}
-//------------------ END KERNEL {{meta_kernel_id}} ---------------------
-)_";
-
- return code;
-}
-
-void ClTemplateLogits1DMaxShiftExpSum::declare_variables(GpuKernelVariableTable &vtable,
- const ComponentGroup &comp_group) const
-{
- vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "src");
-
- vtable.declare_variable(comp_group, _sum, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "sum");
-
- vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "dst");
-}
-
-TagLUT ClTemplateLogits1DMaxShiftExpSum::get_tag_lut(const GpuKernelVariableTable &vtable,
- const ComponentGroup &comp_group) const
-{
- ARM_COMPUTE_UNUSED(comp_group);
-
- TagLUT lut{};
-
- // Arguments and global shared variables
- lut["src"] = vtable.get_variable(_src);
- lut["sum"] = vtable.get_variable(_sum);
- lut["dst"] = vtable.get_variable(_dst);
-
- // Local build options
- lut["meta_kernel_id"] = id();
-
- const DataType data_type = _src->data_type();
-
- lut["DATA_TYPE"] = get_cl_type_from_data_type(data_type);
- lut["BETA"] = float_to_string_with_full_precision(_attributes.beta());
- lut["MINVAL"] = (data_type == DataType::F16) ? std::string("-HALF_MAX") : std::string("-FLT_MAX");
- lut["SRC_WIDTH"] = support::cpp11::to_string(_src->dimension(0));
-
- return lut;
-}
-
-CLBuildOptions ClTemplateLogits1DMaxShiftExpSum::get_build_options(const ComponentGroup &comp_group) const
-{
- ARM_COMPUTE_UNUSED(comp_group);
- CLBuildOptions build_opts{};
-
- const unsigned int reduction_dim_size = _src->dimension(0);
- const unsigned int vector_size = adjust_vec_size(serial_vector_size, reduction_dim_size);
-
- build_opts.add_option("-DN0=" + support::cpp11::to_string(vector_size));
- build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string((reduction_dim_size % vector_size)));
-
- return build_opts;
-}
-
-std::string ClTemplateLogits1DMaxShiftExpSum::get_config_id() const
-{
- std::string config_id = get_name();
-
- config_id += "_";
- config_id += support::cpp11::to_string(_src->dimension(0));
- config_id += "_";
- config_id += string_from_data_type(_src->data_type());
-
- return config_id;
-}
-
-std::set<std::string> ClTemplateLogits1DMaxShiftExpSum::get_headers_list() const
-{
- return std::set<std::string>{"helpers.h", "tile_helpers.h"};
-}
-
-Window ClTemplateLogits1DMaxShiftExpSum::get_window() const
-{
- ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
-
- Window win = calculate_max_window(*_dst, Steps(_src->dimension(0)));
- return win.collapse(win, Window::DimZ);
-}
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h
deleted file mode 100644
index ac9ddaa9d4..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATELOGITS1DMAXSHIFTEXPSUM
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATELOGITS1DMAXSHIFTEXPSUM
-
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-class ClTemplateLogits1DMaxShiftExpSum final : public IGpuTemplateComponentWriter
-{
-public:
- using Attributes = ClComponentLogits1DMaxShiftExpSum::Attributes;
-
- /** Constructor
- *
- * @param[in] id Component id
- * @param[in] tensors Tensor arguments to the components
- * @param[in] attributes Component attributes
- */
- ClTemplateLogits1DMaxShiftExpSum(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes);
- /** Prevent instances of this class from being copy constructed */
- ClTemplateLogits1DMaxShiftExpSum(const ClTemplateLogits1DMaxShiftExpSum &) = delete;
- /** Prevent instances of this class from being copied */
- ClTemplateLogits1DMaxShiftExpSum &operator=(const ClTemplateLogits1DMaxShiftExpSum &) = delete;
- /** Allow instances of this class to be move constructed */
- ClTemplateLogits1DMaxShiftExpSum(ClTemplateLogits1DMaxShiftExpSum &&) = default;
- /** Allow instances of this class to be moved */
- ClTemplateLogits1DMaxShiftExpSum &operator=(ClTemplateLogits1DMaxShiftExpSum &&) = default;
- /** Generate kernel component name */
- std::string get_name() const override;
- /** Generate kernel component code template
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return std::string Component code
- */
- std::string get_component_code(const ComponentGroup &comp_group) const override;
- /** Declare all variables used by the component in the @p vtable
- *
- * @param[out] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- */
- void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
- /** Generate the tag look-up table used to instantiate the component code.
- *
- * @param[in] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return TagLUT Tag lookup table
- */
- TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
- /** Generate the build options used in the component
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return CLBuildOptions Build options
- */
- CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override;
- /** Generate the component config id string used for tuning */
- std::string get_config_id() const override;
- /** Generate the header list used in the component */
- std::set<std::string> get_headers_list() const override;
- /** Generate the execution window for the component */
- Window get_window() const override;
-
-private:
- const ITensorInfo *_src; // input
- const ITensorInfo *_sum; // exponentiated and summed input
- const ITensorInfo *_dst; // exponentiated input
- Attributes _attributes;
-};
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATELOGITS1DMAXSHIFTEXPSUM */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp
deleted file mode 100644
index 7d7c3e6673..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.h"
-
-#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
-
-#include "src/core/helpers/WindowHelpers.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
-#include "support/StringSupport.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-ClTemplateLogits1DNorm::ClTemplateLogits1DNorm(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes)
- : IGpuTemplateComponentWriter{id, tensors}, _src{}, _sum{}, _dst{}, _attributes{attributes}
-{
- _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
- _sum = this->tensors().get_const_tensor(TensorType::ACL_SRC_1);
- _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
- ARM_COMPUTE_ERROR_ON_NULLPTR(_src);
- ARM_COMPUTE_ERROR_ON_NULLPTR(_sum);
- ARM_COMPUTE_ERROR_ON_NULLPTR(_dst);
-}
-
-std::string ClTemplateLogits1DNorm::get_name() const
-{
- return "logits_1d_norm";
-}
-
-std::string ClTemplateLogits1DNorm::get_component_code(const ComponentGroup &comp_group) const
-{
- ARM_COMPUTE_UNUSED(comp_group);
-
- std::string code = R"_(
-//------------------ START KERNEL {{meta_kernel_id}} ---------------------
-{
- const int x_offs = g_ind_0 * sizeof({{DATA_TYPE}});
- __global uchar *src_addr = {{src}}_ptr + {{src}}_offset_first_element_in_bytes + x_offs + g_ind_1 * {{src}}_stride_y + g_ind_2 * {{src}}_stride_z;
- __global uchar *dst_addr = {{dst}}_ptr + {{dst}}_offset_first_element_in_bytes + x_offs + g_ind_1 * {{dst}}_stride_y + g_ind_2 * {{dst}}_stride_z;
- Image sum = CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP({{sum}});
-)_";
- // Load max value of 1D logits vector (row)
- code += R"_(
- {{DATA_TYPE}} sum_val = *((__global {{DATA_TYPE}} *)offset(&sum, 0, g_ind_1));
- VEC_DATA_TYPE({{DATA_TYPE}}, N0)
- data0 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)src_addr);
-)_";
-
- if (_attributes.is_log_softmax())
- {
- code += R"_(
- sum_val = log(sum_val);
- data0 -= sum_val;
-)_";
- }
- else
- {
- code += R"_(
- data0 /= sum_val;
-)_";
- }
-
- code += R"_(
- STORE_VECTOR_SELECT(data, {{DATA_TYPE}}, dst_addr, N0, PARTIAL_N0, PARTIAL_N0 != 0 && g_ind_0 == 0);
-}
-//------------------ END KERNEL {{meta_kernel_id}} ---------------------
-)_";
-
- return code;
-}
-
-void ClTemplateLogits1DNorm::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
-{
- vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "src");
-
- vtable.declare_variable(comp_group, _sum, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "sum");
-
- vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "dst");
-}
-
-TagLUT ClTemplateLogits1DNorm::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
-{
- ARM_COMPUTE_UNUSED(comp_group);
-
- TagLUT lut{};
-
- // Arguments and global shared variables
- lut["src"] = vtable.get_variable(_src);
- lut["sum"] = vtable.get_variable(_sum);
- lut["dst"] = vtable.get_variable(_dst);
-
- // Local build options
- lut["meta_kernel_id"] = id();
-
- const DataType data_type = _src->data_type();
-
- lut["DATA_TYPE"] = get_cl_type_from_data_type(data_type);
-
- return lut;
-}
-
-CLBuildOptions ClTemplateLogits1DNorm::get_build_options(const ComponentGroup &comp_group) const
-{
- ARM_COMPUTE_UNUSED(comp_group);
- CLBuildOptions build_opts{};
-
- const auto root_window = comp_group.get_root_component()->template_writer()->get_window();
- const unsigned int n0 = root_window.x().step();
- build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
- build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string((_src->dimension(0) % n0)));
-
- return build_opts;
-}
-
-std::string ClTemplateLogits1DNorm::get_config_id() const
-{
- std::string config_id = get_name();
-
- config_id += "_";
- config_id += support::cpp11::to_string(_src->dimension(0));
- config_id += "_";
- config_id += string_from_data_type(_src->data_type());
-
- return config_id;
-}
-
-std::set<std::string> ClTemplateLogits1DNorm::get_headers_list() const
-{
- return std::set<std::string>{"helpers.h", "tile_helpers.h"};
-}
-
-Window ClTemplateLogits1DNorm::get_window() const
-{
- ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
- constexpr unsigned int serial_vector_size = 16;
- const unsigned int vector_size = adjust_vec_size(serial_vector_size, _src->dimension(0));
-
- Window win = calculate_max_window(*_src, Steps(vector_size));
- return win.collapse(win, Window::DimZ);
-}
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.h
deleted file mode 100644
index 5a74be5842..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATELOGITS1DNORM
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATELOGITS1DNORM
-
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-class ClTemplateLogits1DNorm final : public IGpuTemplateComponentWriter
-{
-public:
- using Attributes = ClComponentLogits1DNorm::Attributes;
-
- /** Constructor
- *
- * @param[in] id Component id
- * @param[in] tensors Tensor arguments to the components
- * @param[in] attributes Component attributes
- */
- ClTemplateLogits1DNorm(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
- /** Prevent instances of this class from being copy constructed */
- ClTemplateLogits1DNorm(const ClTemplateLogits1DNorm &) = delete;
- /** Prevent instances of this class from being copied */
- ClTemplateLogits1DNorm &operator=(const ClTemplateLogits1DNorm &) = delete;
- /** Allow instances of this class to be move constructed */
- ClTemplateLogits1DNorm(ClTemplateLogits1DNorm &&) = default;
- /** Allow instances of this class to be moved */
- ClTemplateLogits1DNorm &operator=(ClTemplateLogits1DNorm &&) = default;
- /** Generate kernel component name */
- std::string get_name() const override;
- /** Generate kernel component code template
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return std::string Component code
- */
- std::string get_component_code(const ComponentGroup &comp_group) const override;
- /** Declare all variables used by the component in the @p vtable
- *
- * @param[out] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- */
- void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
- /** Generate the tag look-up table used to instantiate the component code.
- *
- * @param[in] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return TagLUT Tag lookup table
- */
- TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
- /** Generate the build options used in the component
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return CLBuildOptions Build options
- */
- CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override;
- /** Generate the component config id string used for tuning */
- std::string get_config_id() const override;
- /** Generate the header list used in the component */
- std::set<std::string> get_headers_list() const override;
- /** Generate the execution window for the component */
- Window get_window() const override;
-
-private:
- const ITensorInfo *_src; // exponentiated input
- const ITensorInfo *_sum; // exponentiated and summed input
- const ITensorInfo *_dst; // normalization of input with _sum
-
- Attributes _attributes;
-};
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATELOGITS1DNORM */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp
deleted file mode 100644
index 8936db6abe..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp
+++ /dev/null
@@ -1,470 +0,0 @@
-/*
- * Copyright (c) 2023-2024 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "ClTemplatePool2d.h"
-
-#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/core/utils/StringUtils.h"
-
-#include "src/core/helpers/WindowHelpers.h"
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
-#include "support/StringSupport.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-namespace
-{
-// Shape indexes for NHWC Datalayout
-constexpr static int32_t height_idx = 2;
-constexpr static int32_t width_idx = 1;
-constexpr static int32_t channel_idx = 0;
-} // namespace
-ClTemplatePool2d::ClTemplatePool2d(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes,
- const Settings &settings)
- : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}, _attributes{attributes}, _settings{settings}
-{
- _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
- _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
- ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
-}
-
-std::string ClTemplatePool2d::get_name() const
-{
- return "pool2d";
-}
-
-std::string ClTemplatePool2d::get_component_code(const ComponentGroup &comp_group) const
-{
- ARM_COMPUTE_UNUSED(comp_group);
-
- // Condition to use 2x2 optimized kernel
- if (_attributes.pool_size() == Size2D(2, 2))
- {
- return get_2x2_kernel_code();
- }
- else
- {
- return get_MxN_kernel_code();
- }
-}
-
-std::string ClTemplatePool2d::get_MxN_kernel_code() const
-{
- const auto pool_type = _attributes.pool_type();
- const bool fp_mixed_precision = (_src->data_type() == DataType::F16) && pool_type != PoolingType::MAX;
-
- // Define pool op macro.
- std::string pool_op = (pool_type == PoolingType::AVG) ? R"_(#define POOL_OP(x,y) ((x) + (y)))_"
- : R"_(#define POOL_OP(x,y) (fmax((x), (y))) )_";
-
- // Kernel start
- // Note: If C is not multiple of N0, we shift back of PARTIAL_N0 elements to compute the leftover elements for get_global_id(0) == 0
- // Note: If C is less than N0, N0 should be SHRINKED to the closest smaller N0. This operation is performed on the host side
- std::string code = R"_(
-//------------------ START KERNEL {{meta_kernel_id}} ---------------------
-// IN_0(src) {{src}}
-// OUT(dst, accum) {{dst}}
-
-{
- const int idx_out_c = g_ind_0;
- const int idx_out_w = g_ind_1;
-)_";
-
- // Add macro for POOL_OP
- code += "\n" + pool_op + "\n";
-
- code += R"_(
- const int idx_out_h = g_ind_2 % {{DST_HEIGHT}};
- const int idx_out_n = g_ind_2 / {{DST_HEIGHT}};
-)_";
-
- // Define common variables.
- code += R"_(
- __global unsigned char *in_base_ptr = {{src}}_ptr + {{src}}_offset_first_element_in_bytes + idx_out_c * sizeof({{DATA_TYPE}}) + idx_out_n * {{src}}_stride_w;
-
- __global unsigned char *out_base_ptr = {{dst}}_ptr + {{dst}}_offset_first_element_in_bytes + idx_out_c * sizeof({{DATA_TYPE}}) + idx_out_w * {{dst}}_stride_y + idx_out_h * {{dst}}_stride_z + idx_out_n * {{dst}}_stride_w;
-
- VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)
- res0 = {{INITIAL_VALUE}};
-
- const int idx_in_w = idx_out_w * {{STRIDE_X}} - {{PAD_X}};
- const int idx_in_h = idx_out_h * {{STRIDE_Y}} - {{PAD_Y}};
-
- const int pool_x_s = max((int)0, -idx_in_w);
- const int pool_x_e = min((int){{POOL_SIZE_X}}, (int){{SRC_WIDTH}} - idx_in_w);
- const int pool_y_s = max((int)0, -idx_in_h);
- const int pool_y_e = min((int){{POOL_SIZE_Y}}, (int){{SRC_HEIGHT}} - idx_in_h);
-)_";
-
- // Determine filter size depending on if padding is excluded or not
- if (_attributes.exclude_padding())
- {
- code += R"_(
- const int filter_size = (pool_y_e - pool_y_s) * (pool_x_e - pool_x_s);
-)_";
- }
- else
- {
- code += R"_(
- const int filter_size = {{POOL_SIZE_X}} * {{POOL_SIZE_Y}};
-)_";
- }
-
- // Loop through pool size
- // if global pooling
- if (_attributes.pool_size().x() == _src->dimension(width_idx) &&
- _attributes.pool_size().y() == _src->dimension(height_idx))
- {
- // Begin loop
- code += R"_(
- // Global pooling path
- for(int y = 0; y < {{POOL_SIZE_Y}}; ++y)
- {
- #pragma unroll 8
- for(int x = 0; x < {{POOL_SIZE_X}}; ++x)
- {
- VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)
- data0;
-)_";
- }
- else // if local pooling size
- {
- code += R"_(
- for(int y = pool_y_s; y < pool_y_e; ++y)
- {
- #pragma unroll 8
- for(int x = pool_x_s; x < pool_x_e; ++x)
- {
- VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)
- data0;
-)_";
- } // end else
-
- // if condition inside loop - use 32bit acc if mixed_precision.
- // End loop through pooling section.
- if (fp_mixed_precision)
- {
- // In case of FP_MIXED_PRECISION, ACC_DATA_TYPE is != DATA_TYPE
- code += R"_(
- data0 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + (x + idx_in_w) * {{src}}_stride_y + (y + idx_in_h) * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0));
- res0 = POOL_OP(res0, data0);
- }
- }
-)_";
- }
- else // load data, compute result and end loop
- {
- code += R"_(
- data0 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + (x + idx_in_w) * {{src}}_stride_y + (y + idx_in_h) * {{src}}_stride_z));
- res0 = POOL_OP(res0, data0);
- }
- }
-)_";
- }
-
- // For Pool AVG ONLY, divide pool output by filter size
- if (pool_type == PoolingType::AVG)
- {
- code += R"_(
- res0 /= (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0))filter_size;
-)_";
- }
-
- // If mixed precision convert datatype before storing. Then end kernel.
- if (fp_mixed_precision)
- {
- code += R"_(
- VEC_DATA_TYPE({{DATA_TYPE}}, N0)
- res_converted0 = CONVERT(res0, VEC_DATA_TYPE({{DATA_TYPE}}, N0));
- STORE_VECTOR_SELECT(res_converted, {{DATA_TYPE}}, out_base_ptr, N0, PARTIAL_N0, (PARTIAL_N0 != 0) && g_ind_0 == 0);
-)_";
- }
- else
- {
- // Store data
- code += R"_(
- STORE_VECTOR_SELECT(res, {{DATA_TYPE}}, out_base_ptr, N0, PARTIAL_N0, (PARTIAL_N0 != 0) && g_ind_0 == 0);
-)_";
- }
-
- code += R"_(
-//------------------ END KERNEL {{meta_kernel_id}} ---------------------
-}
-)_";
-
- return code;
-}
-
-std::string ClTemplatePool2d::get_2x2_kernel_code() const
-{
- const auto pool_type = _attributes.pool_type();
- const bool fp_mixed_precision = (_src->data_type() == DataType::F16) && pool_type != PoolingType::MAX;
- std::string pool_op = (pool_type == PoolingType::AVG) ? R"_(#define POOL_OP(x,y) ((x) + (y)))_"
- : R"_(#define POOL_OP(x,y) (fmax((x), (y))) )_";
-
- std::string code = R"_(
-//------------------ START KERNEL {{meta_kernel_id}} ---------------------
-// IN_0(src) {{src}}
-// OUT(dst, accum) {{dst}}
-
-#define SELECT_TYPE SELECT_VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)
-
-{
- const int idx_out_c = g_ind_0;
- const int idx_out_w = g_ind_1;
-)_";
-
- // Add pool op macro
- code += "\n" + pool_op + "\n";
-
- // If batch size != 1, the batch size dimension is collapsed over the height dimension
- code += R"_(
- const int idx_out_h = g_ind_2 % {{DST_HEIGHT}};
- const int idx_out_n = g_ind_2 / {{DST_HEIGHT}};
-)_";
-
- code += R"_(
- const int idx_in_w = idx_out_w * {{STRIDE_X}} - {{PAD_X}};
- const int idx_in_h = idx_out_h * {{STRIDE_Y}} - {{PAD_Y}};
-
- __global unsigned char *in_base_ptr = {{src}}_ptr + {{src}}_offset_first_element_in_bytes + idx_out_c * sizeof({{DATA_TYPE}}) + idx_out_n * {{src}}_stride_w;
- __global unsigned char *out_base_ptr = {{dst}}_ptr + {{dst}}_offset_first_element_in_bytes + idx_out_c * sizeof({{DATA_TYPE}}) + idx_out_w * {{dst}}_stride_y + idx_out_h * {{dst}}_stride_z + idx_out_n *
- {{dst}}_stride_w;
- const int pool_x_s = max((int)0, -idx_in_w);
- const int pool_x_e = min((int)2, (int){{SRC_WIDTH}} - idx_in_w);
- const int pool_y_s = max((int)0, -idx_in_h);
- const int pool_y_e = min((int)2, (int){{SRC_HEIGHT}} - idx_in_h);
-
- const int filter_size = (pool_x_e - pool_x_s) * (pool_y_e - pool_y_s);
- const int x0 = pool_x_s + idx_in_w;
- const int y0 = pool_y_s + idx_in_h;
- const int x1 = pool_x_e - 1 + idx_in_w;
- const int y1 = pool_y_e - 1 + idx_in_h;
-
- REPEAT_VAR_INIT_TO_CONST(4, VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0), data, 0);
-)_";
-
- if (fp_mixed_precision)
- {
- // In case of FP_MIXED_PRECISION, ACC_DATA_TYPE is != DATA_TYPE
- code += R"_(
- data0 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x0 * {{src}}_stride_y + y0 * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0));
- data1 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x1 * {{src}}_stride_y + y0 * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0));
- data2 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x0 * {{src}}_stride_y + y1 * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0));
- data3 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x1 * {{src}}_stride_y + y1 * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0));
-)_";
- }
- else
- {
- code += R"_(
- data0 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x0 * {{src}}_stride_y + y0 * {{src}}_stride_z));
- data1 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x1 * {{src}}_stride_y + y0 * {{src}}_stride_z));
- data2 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x0 * {{src}}_stride_y + y1 * {{src}}_stride_z));
- data3 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x1 * {{src}}_stride_y + y1 * {{src}}_stride_z));
-)_";
- }
-
- if (pool_type != PoolingType::MAX)
- {
- // Make invalid the values loaded if the x or y coordinate was clamped (out-of-bound)
- code += R"_(
- if(filter_size != 4)
- {
- SELECT_TYPE cond_w_s = (SELECT_TYPE)idx_in_w < (SELECT_TYPE)0;
- SELECT_TYPE cond_w_e = (SELECT_TYPE)idx_in_w >= (SELECT_TYPE)({{SRC_WIDTH}} - 1);
- SELECT_TYPE cond_h_s = (SELECT_TYPE)idx_in_h < (SELECT_TYPE)0;
- SELECT_TYPE cond_h_e = (SELECT_TYPE)idx_in_h >= (SELECT_TYPE)({{SRC_HEIGHT}} - 1);
-
- data0 = select(data0, (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)){{INITIAL_VALUE}}, (SELECT_TYPE)(cond_w_s | cond_h_s));
- data1 = select(data1, (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)){{INITIAL_VALUE}}, (SELECT_TYPE)(cond_w_e | cond_h_s));
- data2 = select(data2, (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)){{INITIAL_VALUE}}, (SELECT_TYPE)(cond_w_s | cond_h_e));
- data3 = select(data3, (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)){{INITIAL_VALUE}}, (SELECT_TYPE)(cond_w_e | cond_h_e));
- }
-)_";
- }
-
- code += R"_(
- VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)
- res0 = data0;
- res0 = POOL_OP(res0, data1);
- res0 = POOL_OP(res0, data2);
- res0 = POOL_OP(res0, data3);
-)_";
-
- if (pool_type == PoolingType::AVG)
- {
- // If avg pooling divide result accordingly.
- if (_attributes.exclude_padding())
- {
- code += R"_(
- res0 /= (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0))filter_size;
-)_";
- }
- else
- {
- code += R"_(
- res0 /= (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0))4;
-)_";
- }
- }
-
- // Store result
- if (fp_mixed_precision)
- {
- code += R"_(
- VEC_DATA_TYPE({{DATA_TYPE}}, N0)
- res_converted0 = CONVERT(res0, VEC_DATA_TYPE({{DATA_TYPE}}, N0));
- STORE_VECTOR_SELECT(res_converted, {{DATA_TYPE}}, out_base_ptr, N0, PARTIAL_N0, (PARTIAL_N0 != 0) && g_ind_0 == 0);
-)_";
- }
- else
- {
- code += R"_(
- STORE_VECTOR_SELECT(res, {{DATA_TYPE}}, out_base_ptr, N0, PARTIAL_N0, (PARTIAL_N0 != 0) && g_ind_0 == 0);
-)_";
- }
-
- code += R"_(
- //------------------ END KERNEL {{meta_kernel_id}} ---------------------
-}
-#undef SELECT_TYPE
-)_";
-
- return code;
-}
-
-void ClTemplatePool2d::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
-{
- vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "src");
-
- vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "dst");
-}
-
-TagLUT ClTemplatePool2d::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
-{
- ARM_COMPUTE_UNUSED(comp_group);
-
- TagLUT lut{};
- // Arguments and global shared variables
- lut["src"] = vtable.get_variable(_src);
- lut["dst"] = vtable.get_variable(_dst);
-
- // Local build options
- lut["meta_kernel_id"] = id();
-
- // Retrieve relevant data
- const auto padding = _attributes.pad();
- const auto stride = _attributes.stride();
- const auto pool_size = _attributes.pool_size();
- const auto data_type = _src->data_type();
- const auto use_fp_mixed_precision =
- (_src->data_type() == DataType::F16) && _attributes.pool_type() != PoolingType::MAX;
- const std::string max_initial_value =
- _settings.use_inf_as_limit() ? "(-INFINITY)"
- : float_to_string_with_full_precision(std::numeric_limits<float>::lowest());
-
- // pool specific
- lut["STRIDE_X"] = stride.x();
- lut["STRIDE_Y"] = stride.y();
- lut["PAD_X"] = padding.left;
- lut["PAD_Y"] = padding.top;
- lut["POOL_SIZE_X"] = pool_size.width;
- lut["POOL_SIZE_Y"] = pool_size.height;
-
- // Datatypes and variables
- lut["ACC_DATA_TYPE"] = get_cl_type_from_data_type(
- (use_fp_mixed_precision) ? (DataType::F32) : (data_type)); // Type of accumulators to use.
- lut["DATA_TYPE"] = get_cl_type_from_data_type(data_type);
- lut["SRC_WIDTH"] = _src->dimension(width_idx);
- lut["SRC_HEIGHT"] = _src->dimension(height_idx);
- lut["INITIAL_VALUE"] = (_attributes.pool_type() == PoolingType::MAX) ? max_initial_value : std::string("0");
-
- // Tensor specific data
- lut["DST_HEIGHT"] = _dst->dimension(height_idx);
-
- return lut;
-}
-
-CLBuildOptions ClTemplatePool2d::get_build_options(const ComponentGroup &comp_group) const
-{
- const auto root_window = comp_group.get_root_component()->template_writer()->get_window();
- const unsigned int n0 = root_window.x().step();
- const unsigned int partial_store_n0 = _dst->dimension(0) % n0;
-
- CLBuildOptions build_opts{};
- build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
- build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0));
-
- return build_opts;
-}
-
-std::string ClTemplatePool2d::get_config_id() const
-{
- const DataType data_type = _src->data_type();
- const DataLayout data_layout = _src->data_layout();
-
- std::string config_id{};
- config_id += "pooling_layer_2d_";
- config_id += lower_string(string_from_data_type(data_type));
- config_id += "_";
- config_id += lower_string(string_from_data_layout(data_layout));
- config_id += "_";
- config_id += support::cpp11::to_string(_dst->dimension(width_idx));
- config_id += "_";
- config_id += support::cpp11::to_string(_dst->dimension(height_idx));
- config_id += "_";
- config_id += support::cpp11::to_string(_dst->dimension(channel_idx));
-
- return config_id;
-}
-
-std::set<std::string> ClTemplatePool2d::get_headers_list() const
-{
- return std::set<std::string>{"helpers.h", "tile_helpers.h", "repeat.h"};
-}
-
-Window ClTemplatePool2d::get_window() const
-{
- ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
- const auto output_shape = _dst->tensor_shape();
- const unsigned int vec_size = adjust_vec_size(((_dst->data_type() == DataType::F32) ? 2 : 4), _dst->dimension(0));
-
- // Create and configure kernel window
- auto win = calculate_max_window(output_shape, Steps(vec_size));
- win = win.collapse_if_possible(win, Window::DimZ); // collapse window on batch size.
- return win;
-}
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h
deleted file mode 100644
index d1d3c01669..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEPOOL2D
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEPOOL2D
-
-#include "arm_compute/core/experimental/Types.h"
-#include "arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h"
-#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h"
-
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-class ClTemplatePool2d final : public IGpuTemplateComponentWriter
-{
-public:
- using Attributes = ClComponentPool2d::Attributes;
- using Settings = ClComponentPool2d::Settings;
- /** Constructor
- *
- * @param[in] id Component id
- * @param[in] tensors Tensor arguments to the components
- * @param[in] attributes Component attributes
- * @param[in] settings Component settings
- */
- ClTemplatePool2d(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes,
- const Settings &settings);
-
- /** Prevent instances of this class from being copy constructed */
- ClTemplatePool2d(const ClTemplatePool2d &direct_conv2d) = delete;
-
- /** Prevent instances of this class from being copied */
- ClTemplatePool2d &operator=(const ClTemplatePool2d &direct_conv2d) = delete;
-
- /** Allow instances of this class to be move constructed */
- ClTemplatePool2d(ClTemplatePool2d &&direct_conv2d) = default;
-
- /** Allow instances of this class to be moved */
- ClTemplatePool2d &operator=(ClTemplatePool2d &&direct_conv2d) = default;
-
- /** Generate kernel component name */
- std::string get_name() const override;
-
- /** Generate kernel component code template
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return std::string Component code
- */
- std::string get_component_code(const ComponentGroup &comp_group) const override;
- /** Declare all variables used by the component in the @p vtable
- *
- * @param[out] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- */
- void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
- /** Generate the tag look-up table used to instantiate the component code.
- *
- * @param[in] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return TagLUT Tag lookup table
- */
- TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
- /** Generate the build options used in the component
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return CLBuildOptions Build options
- */
- CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override;
-
- /** Generate the component config id string used for tuning */
- std::string get_config_id() const override;
-
- /** Generate the header list used in the component */
- std::set<std::string> get_headers_list() const override;
-
- /** Generate the execution window for the component */
- Window get_window() const override;
-
-private:
- /** Generate pooling kernel template code optimized for 2x2 pooling
- *
- * @return std::String Component code
- */
- std::string get_2x2_kernel_code() const;
-
- /** Generate generalised pooling kernel template code for MxN pooling
- *
- * @return std::String Component code
- */
- std::string get_MxN_kernel_code() const;
-
- const ITensorInfo *_src;
- const ITensorInfo *_dst;
- Attributes _attributes;
- Settings _settings;
-};
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEPOOL2D */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp
deleted file mode 100644
index c882353fcb..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "ClTemplateReshape.h"
-
-#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
-#include "arm_compute/core/utils/StringUtils.h"
-
-#include "src/core/helpers/WindowHelpers.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-constexpr unsigned int vector_size_byte_opencl = 16;
-
-ClTemplateReshape::ClTemplateReshape(ComponentId id, const ArgumentPack<ITensorInfo> &tensors)
- : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}
-{
- _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
- _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
- ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
-}
-
-std::string ClTemplateReshape::get_name() const
-{
- return "reshape";
-}
-
-std::string ClTemplateReshape::get_component_code(const ComponentGroup &comp_group) const
-{
- ARM_COMPUTE_UNUSED(comp_group);
- std::string code;
-
- code = R"_(
-//------------------ START KERNEL {{meta_kernel_id}} ---------------------
-
-// IN(src) {{src}}
-// OUT(dst, accum) {{dst}}
-
-TILE(uint, M0, 1, g_dst_indirect_y);
-{
- __global uchar * base_src_ptr = {{src}}_ptr + {{src}}_offset_first_element_in_bytes;
- const int tile_vertical_idx = g_ind_1 * {{arg_dst}}_c + g_ind_2 * {{arg_dst}}_c * {{arg_dst}}_w;
- LOOP_UNROLLING(int, _m0, 0, 1, M0,
- {
- const int row_idx = _m0 * {{arg_dst}}_c + tile_vertical_idx;
- const int tile_horizontal_idx = g_ind_0 + row_idx;
- LOOP_UNROLLING(int, _n0, 0, 1, N0,
- {
- {{src}}_ptr = base_src_ptr;
- const int linear_idx = tile_horizontal_idx + _n0;
- const int in_id_x = linear_idx % {{src}}_c;
- const int in_id_y = (linear_idx / {{src}}_c) % {{src}}_w;
- const int in_id_z = linear_idx / ({{src}}_c * {{src}}_w);
- {{src}}_ptr += in_id_x * sizeof({{DATA_TYPE}}) + in_id_y * {{src}}_stride_y + in_id_z * {{src}}_stride_z;
- {{dst}}[_m0].s[_n0] = *((__global {{DATA_TYPE}} *){{src}}_ptr);
- })
- })
-
- LOOP_UNROLLING(int, i, 0, 1, M0,
- {
- g_dst_indirect_y[i].v = (uint)min((int)(g_ind_1 + i), (int)({{arg_dst}}_w) - 1);
- g_dst_indirect_y[i].v += (int)(g_ind_2 % {{arg_dst}}_h) * (int)({{arg_dst}}_w);
- g_dst_indirect_y[i].v += (int)(g_ind_2 / {{arg_dst}}_h) * (int)({{arg_dst}}_w * {{arg_dst}}_h);
- })
-}
-//------------------ END KERNEL {{meta_kernel_id}} ---------------------
-)_";
- return code;
-}
-
-void ClTemplateReshape::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
-{
- vtable.declare_variable(comp_group, _src,
- GpuKernelArgumentInfo(common_tensor_type), // GpuKernelArgumentInfo::Type::Image_3D
- "src");
-
- vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(common_tensor_type), "dst");
-}
-
-TagLUT ClTemplateReshape::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
-{
- ARM_COMPUTE_UNUSED(comp_group);
- TagLUT lut{};
-
- // Arguments and global shared variables
- lut["src"] = vtable.get_variable(_src);
- lut["dst"] = vtable.get_variable(_dst);
- lut["arg_dst"] = vtable.get_variable(comp_group.get_any_dst_tensor());
- lut["meta_kernel_id"] = id();
- lut["DATA_TYPE"] = get_cl_type_from_data_type(_dst->data_type());
-
- return lut;
-}
-
-CLBuildOptions ClTemplateReshape::get_build_options(const ComponentGroup &comp_group) const
-{
- CLBuildOptions build_opts{};
- const auto root_window = comp_group.get_root_component()->template_writer()->get_window();
- const unsigned int n0 = root_window.x().step();
- const unsigned int m0 = root_window.y().step();
- const unsigned int partial_store_n0 = _dst->dimension(0) % n0;
- build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
- build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
- build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0));
-
- return build_opts;
-}
-
-std::string ClTemplateReshape::get_config_id() const
-{
- std::string config_id{};
- config_id += lower_string(string_from_data_type(_dst->data_type()));
- config_id += "_";
- config_id += support::cpp11::to_string(_dst->dimension(0));
- config_id += "_";
- config_id += support::cpp11::to_string(_dst->dimension(1));
-
- return config_id;
-}
-
-std::set<std::string> ClTemplateReshape::get_headers_list() const
-{
- return std::set<std::string>{"helpers.h", "tile_helpers.h"};
-}
-
-Window ClTemplateReshape::get_window() const
-{
- ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
- const unsigned int n0 = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
- Window win = calculate_max_window(*_dst, Steps(n0));
- return win.collapse(win, Window::DimZ);
-}
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.h
deleted file mode 100644
index 838a21db6d..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATERESHAPE
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATERESHAPE
-
-#include "arm_compute/core/experimental/Types.h"
-
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-class ClTemplateReshape final : public IGpuTemplateComponentWriter
-{
-public:
- /** Constructor
- *
- * @param[in] id Component id
- * @param[in] tensors Tensor arguments to the components
- */
- ClTemplateReshape(ComponentId id, const ArgumentPack<ITensorInfo> &tensors);
- /** Prevent instances of this class from being copy constructed */
- ClTemplateReshape(const ClTemplateReshape &reshape) = delete;
- /** Prevent instances of this class from being copied */
- ClTemplateReshape &operator=(const ClTemplateReshape &reshape) = delete;
- /** Allow instances of this class to be move constructed */
- ClTemplateReshape(ClTemplateReshape &&reshape) = default;
- /** Allow instances of this class to be moved */
- ClTemplateReshape &operator=(ClTemplateReshape &&reshape) = default;
-
- /** Generate kernel component name */
- std::string get_name() const override;
-
- /** Generate kernel component code template
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return std::string Component code
- */
- std::string get_component_code(const ComponentGroup &comp_group) const override;
-
- /** Declare all variables used by the component in the @p vtable
- *
- * @param[out] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- */
- void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
-
- /** Generate the tag look-up table used to instantiate the component code.
- *
- * @param[in] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return TagLUT Tag lookup table
- */
- TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
-
- /** Generate the build options used in the component
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return CLBuildOptions Build options
- */
- CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override;
-
- /** Generate the component config id string used for tuning */
- std::string get_config_id() const override;
-
- /** Generate the header list used in the component */
- std::set<std::string> get_headers_list() const override;
-
- /** Generate the execution window for the component */
- Window get_window() const override;
-
-private:
- const ITensorInfo *_src;
- const ITensorInfo *_dst;
-};
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATERESHAPE */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp
deleted file mode 100644
index 846c712ceb..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp
+++ /dev/null
@@ -1,279 +0,0 @@
-/*
- * Copyright (c) 2022-2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "ClTemplateResize.h"
-
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
-#include "arm_compute/core/utils/StringUtils.h"
-
-#include "src/core/helpers/WindowHelpers.h"
-#include "src/core/utils/ScaleUtils.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-ClTemplateResize::ClTemplateResize(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const ClTemplateResize::Attributes &attributes)
- : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}, _attributes{attributes}
-{
- _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
- _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
-
- ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
-}
-
-std::string ClTemplateResize::get_name() const
-{
- return _attributes.interpolation_policy() == InterpolationPolicy::BILINEAR ? "resize_bilinear" : "resize_nearest";
-}
-
-std::string ClTemplateResize::get_component_code(const IGpuTemplateComponentWriter::ComponentGroup &comp_group) const
-{
- ARM_COMPUTE_UNUSED(comp_group);
-
- std::string code = R"_(
-//------------------ START KERNEL {{meta_kernel_id}} ---------------------
-TILE(uint, 1, 1, g_dst_indirect_y);
-{
- const int yo = g_ind_2 % {{arg_dst}}_h;
- const int bout = g_ind_2 / {{arg_dst}}_h;
-)_";
-
- if (_attributes.interpolation_policy() == InterpolationPolicy::NEAREST_NEIGHBOR)
- {
- if (_attributes.sampling_policy() == SamplingPolicy::TOP_LEFT)
- {
- code += R"_(
- float xi_f = (g_ind_1 * {{SCALE_X}});
- float yi_f = (yo * {{SCALE_Y}});
-)_";
- }
- else
- {
- code += R"_(
- float xi_f = ((g_ind_1 + 0.5f) * {{SCALE_X}});
- float yi_f = ((yo + 0.5f) * {{SCALE_Y}});
-)_";
- }
-
- if (_attributes.align_corners())
- {
- code += R"_(
- xi_f = round(xi_f);
- yi_f = round(yi_f);
-)_";
- }
-
- code += R"_(
- const int xi0 = clamp((int)xi_f, 0, (int){{src}}_w - 1);
- const int yi0 = clamp((int)yi_f, 0, (int){{src}}_h - 1);
-
- T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, 1, N0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi0, xi0, g_ind_0, {{src}}_w, {{src}}_h, 1, 1, false, {{dst}});
-)_";
- }
- else if (_attributes.interpolation_policy() == InterpolationPolicy::BILINEAR)
- {
- if (_attributes.sampling_policy() == SamplingPolicy::TOP_LEFT)
- {
- code += R"_(
- float xi_f = (g_ind_1 * {{SCALE_X}});
- float yi_f = (yo * {{SCALE_Y}});
-)_";
- }
- else
- {
- code += R"_(
- float xi_f = ((g_ind_1 + 0.5f) * {{SCALE_X}} - 0.5f);
- float yi_f = ((yo + 0.5f) * {{SCALE_Y}} - 0.5f);
-)_";
- }
-
- code += R"_(
- const int xi = (int)floor(xi_f);
- const int yi = (int)floor(yi_f);
-
- TILE({{SRC_DATA_TYPE}}, 1, N0, in00);
- TILE({{SRC_DATA_TYPE}}, 1, N0, in01);
- TILE({{SRC_DATA_TYPE}}, 1, N0, in10);
- TILE({{SRC_DATA_TYPE}}, 1, N0, in11);
-
- in00[0].v = {{CONSTANT_VALUE}};
- in01[0].v = {{CONSTANT_VALUE}};
- in10[0].v = {{CONSTANT_VALUE}};
- in11[0].v = {{CONSTANT_VALUE}};
-
- const int xi0 = clamp(xi, 0, (int){{src}}_w - 1);
- const int yi0 = clamp(yi, 0, (int){{src}}_h - 1);
- const int xi1 = clamp(xi + 1, 0, (int){{src}}_w - 1);
- const int yi1 = clamp(yi + 1, 0, (int){{src}}_h - 1);
-
- T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, 1, N0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi0, xi0, g_ind_0, {{src}}_w, {{src}}_h, 1, 1, false, in00);
- T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, 1, N0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi0, xi1, g_ind_0, {{src}}_w, {{src}}_h, 1, 1, false, in01);
- T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, 1, N0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi1, xi0, g_ind_0, {{src}}_w, {{src}}_h, 1, 1, false, in10);
- T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, 1, N0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi1, xi1, g_ind_0, {{src}}_w, {{src}}_h, 1, 1, false, in11);
-)_";
-
- if (is_data_type_float(_src->data_type()))
- {
- code += R"_(
- const {{SRC_DATA_TYPE}} a = ({{SRC_DATA_TYPE}})(xi_f - (float)xi);
- const {{SRC_DATA_TYPE}} b = ({{SRC_DATA_TYPE}})(1.f - a);
- const {{SRC_DATA_TYPE}} a1 = ({{SRC_DATA_TYPE}})(yi_f - (float)yi);
- const {{SRC_DATA_TYPE}} b1 = ({{SRC_DATA_TYPE}})(1.f - a1);
-
- // Calculate the output
- {{dst}}[0].v = ((in00[0].v * b * b1) + (in01[0].v * a * b1) + (in10[0].v * b * a1) + (in11[0].v * a * a1));
-)_";
- }
- else
- {
- code += R"_(
- const float a = (xi_f - (float)xi);
- const float b = (1.f - a);
- const float a1 = (yi_f - (float)yi);
- const float b1 = (1.f - a1);
-
- {{dst}}[0].v = CONVERT_SAT(
- (CONVERT(in00[0].v, VEC_DATA_TYPE(float, N0)) * b * b1) +
- (CONVERT(in01[0].v, VEC_DATA_TYPE(float, N0)) * a * b1) +
- (CONVERT(in10[0].v, VEC_DATA_TYPE(float, N0)) * b * a1) +
- (CONVERT(in11[0].v, VEC_DATA_TYPE(float, N0)) * a * a1), VEC_DATA_TYPE({{DST_DATA_TYPE}}, N0));
-)_";
- }
- }
- else
- {
- ARM_COMPUTE_ERROR("Unsupported interpolation policy");
- }
-
- code += R"_(
- g_dst_indirect_y[0].v = g_ind_1 + (yo * (int)({{arg_dst}}_w)) + bout * (int)({{arg_dst}}_w * {{arg_dst}}_h);
-}
-//------------------ END KERNEL {{meta_kernel_id}} ---------------------
-)_";
-
- return code;
-}
-
-void ClTemplateResize::declare_variables(GpuKernelVariableTable &vtable,
- const IGpuTemplateComponentWriter::ComponentGroup &comp_group) const
-{
- vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "src");
-
- vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "dst");
-}
-
-TagLUT ClTemplateResize::get_tag_lut(const GpuKernelVariableTable &vtable,
- const IGpuTemplateComponentWriter::ComponentGroup &comp_group) const
-{
- TagLUT lut{};
-
- // Arguments and global shared variables
- lut["src"] = vtable.get_variable(_src);
- lut["dst"] = vtable.get_variable(_dst);
-
- const auto dst_argument = vtable.get_variable(comp_group.get_any_dst_tensor());
- lut["arg_dst"] = dst_argument.uniq_name;
-
- // Local build options
- lut["meta_kernel_id"] = id();
- lut["SRC_DATA_TYPE"] = get_cl_type_from_data_type(_src->data_type());
- lut["SRC_TENSOR_TYPE"] = "BUFFER";
- lut["DST_DATA_TYPE"] = get_cl_type_from_data_type(_dst->data_type());
- lut["CONSTANT_VALUE"] = string_from_pixel_value(0, _src->data_type());
-
- const float scale_x =
- scale_utils::calculate_resize_ratio(_src->dimension(1), _dst->dimension(1), _attributes.align_corners());
- const float scale_y =
- scale_utils::calculate_resize_ratio(_src->dimension(2), _dst->dimension(2), _attributes.align_corners());
-
- lut["SCALE_X"] = float_to_string_with_full_precision(scale_x);
- lut["SCALE_Y"] = float_to_string_with_full_precision(scale_y);
-
- return lut;
-}
-
-CLBuildOptions ClTemplateResize::get_build_options(const IGpuTemplateComponentWriter::ComponentGroup &comp_group) const
-{
- const Window root_window = comp_group.get_root_component()->template_writer()->get_window();
- const unsigned int n0 = root_window.x().step();
- const unsigned int m0 = root_window.y().step();
- const unsigned int partial_n0 = _dst->dimension(0) % n0;
-
- CLBuildOptions build_opts;
-
- build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
- build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
- build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_n0));
-
- return build_opts;
-}
-
-std::string ClTemplateResize::get_config_id() const
-{
- std::string config_id{};
-
- config_id += "resize_";
- config_id +=
- (_attributes.interpolation_policy() == InterpolationPolicy::NEAREST_NEIGHBOR ? "NEAREST_NEIGHBOR" : "");
- config_id += (_attributes.interpolation_policy() == InterpolationPolicy::BILINEAR ? "BILINEAR" : "");
- config_id += "_";
- config_id += (_attributes.sampling_policy() == SamplingPolicy::CENTER ? "center" : "topleft");
- config_id += "_";
- config_id += support::cpp11::to_string(_dst->dimension(0));
- config_id += "_";
- config_id += support::cpp11::to_string(_dst->dimension(1));
- config_id += "_";
- config_id += support::cpp11::to_string(_dst->dimension(2));
- config_id += "_";
- config_id += support::cpp11::to_string(_dst->dimension(3));
-
- return config_id;
-}
-
-std::set<std::string> ClTemplateResize::get_headers_list() const
-{
- return std::set<std::string>{"helpers.h", "tile_helpers.h"};
-}
-
-Window ClTemplateResize::get_window() const
-{
- ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
-
- const unsigned int n0 = adjust_vec_size(16 / _src->element_size(), _src->dimension(0));
- Window win = calculate_max_window(*_dst, Steps(n0));
- return win.collapse(win, Window::DimZ);
-}
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.h
deleted file mode 100644
index 4c69007185..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATERESIZE
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATERESIZE
-
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-class ClTemplateResize final : public IGpuTemplateComponentWriter
-{
-public:
- using Attributes = ClComponentResize::Attributes;
-
- /** Constructor
- *
- * @param[in] id Component id
- * @param[in] tensors Tensor arguments to the components
- * @param[in] attributes Component attributes
- */
- ClTemplateResize(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
-
- /** Destructor */
- ~ClTemplateResize() override = default;
-
- /** Prevent instances of this class from being copy constructed */
- ClTemplateResize(const ClTemplateResize &resize) = delete;
-
- /** Prevent instances of this class from being copied */
- ClTemplateResize &operator=(const ClTemplateResize &resize) = delete;
-
- /** Allow instances of this class to be move constructed */
- ClTemplateResize(ClTemplateResize &&resize) = default;
-
- /** Allow instances of this class to be moved */
- ClTemplateResize &operator=(ClTemplateResize &&resize) = default;
-
- /** Generate kernel component name */
- std::string get_name() const override;
-
- /** Generate kernel component code template
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return std::string Component code
- */
- std::string get_component_code(const ComponentGroup &comp_group) const override;
-
- /** Declare all variables used by the component in the @p vtable
- *
- * @param[out] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- */
- void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
-
- /** Generate the tag look-up table used to instantiate the component code.
- *
- * @param[in] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return TagLUT Tag lookup table
- */
- TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
-
- /** Generate the build options used in the component
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return CLBuildOptions Build options
- */
- CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override;
-
- /** Generate the component config id string used for tuning */
- std::string get_config_id() const override;
-
- /** Generate the header list used in the component */
- std::set<std::string> get_headers_list() const override;
-
- /** Generate the execution window for the component */
- Window get_window() const override;
-
-private:
- const ITensorInfo *_src;
- const ITensorInfo *_dst;
- Attributes _attributes;
-};
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATERESIZE */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp
deleted file mode 100644
index d0ec91e0a9..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "ClTemplateStore.h"
-
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-ClTemplateStore::ClTemplateStore(ComponentId id, const ArgumentPack<ITensorInfo> &tensors)
- : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}
-{
- _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
- _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
-}
-
-std::string ClTemplateStore::get_name() const
-{
- return "store";
-}
-
-std::string ClTemplateStore::get_component_code(const ComponentGroup &comp_group) const
-{
- ARM_COMPUTE_UNUSED(comp_group);
-
- return R"_(
-//------------------ START KERNEL {{meta_kernel_id}} STORE ---------------------
-{
- bool x_cond = PARTIAL_N0 != 0 && get_global_id(0) == 0;
-
- T_STORE_INDIRECT_WIDTH_SELECT({{DST_DATA_TYPE}}, M0, N0, PARTIAL_N0, {{DST_TENSOR_TYPE}}, {{dst}}, g_ind_0, {{dst}}_stride_y, x_cond, {{src}}, g_dst_indirect_y);
-//------------------ END KERNEL {{meta_kernel_id}} STORE ---------------------
-}
-
-)_";
-}
-
-void ClTemplateStore::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
-{
- vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "src");
- vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "dst");
-}
-
-TagLUT ClTemplateStore::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
-{
- TagLUT lut{};
-
- // Arguments and global shared variables
- lut["src"] = vtable.get_variable(_src);
- lut["dst"] = vtable.get_variable(_dst);
-
- // Local build options
- lut["meta_kernel_id"] = id();
- lut["DST_TENSOR_TYPE"] = "BUFFER";
- lut["DST_DATA_TYPE"] = _dst->data_type();
-
- ARM_COMPUTE_UNUSED(comp_group);
- return lut;
-}
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h
deleted file mode 100644
index b8c82ceadd..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATESTORE
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATESTORE
-
-#include "arm_compute/core/experimental/Types.h"
-
-#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-class ClTemplateStore final : public IGpuTemplateComponentWriter
-{
-public:
- /** Constructor
- *
- * @param[in] id Component id
- * @param[in] tensors Tensor arguments to the components
- */
- ClTemplateStore(ComponentId id, const ArgumentPack<ITensorInfo> &tensors);
- /** Prevent instances of this class from being copy constructed */
- ClTemplateStore(const ClTemplateStore &store) = delete;
- /** Prevent instances of this class from being copied */
- ClTemplateStore &operator=(const ClTemplateStore &store) = delete;
- /** Allow instances of this class to be move constructed */
- ClTemplateStore(ClTemplateStore &&store) = default;
- /** Allow instances of this class to be moved */
- ClTemplateStore &operator=(ClTemplateStore &&store) = default;
- /** Generate kernel component name */
- std::string get_name() const override;
- /** Generate kernel component code template
- *
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return std::string Component code
- */
- std::string get_component_code(const ComponentGroup &comp_group) const override;
- /** Declare all variables used by the component in the @p vtable
- *
- * @param[out] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- */
- void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
- /** Generate the tag look-up table used to instantiate the component code.
- *
- * @param[in] vtable Variable table
- * @param[in] comp_group Component group of which the component is a part of
- *
- * @return TagLUT Tag lookup table
- */
- TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
-
-private:
- const ITensorInfo *_src;
- const ITensorInfo *_dst;
-};
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATESTORE */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp
deleted file mode 100644
index d3d7c8db83..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp
+++ /dev/null
@@ -1,325 +0,0 @@
-/*
- * Copyright (c) 2022-2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "ClTemplateWriter.h"
-
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-
-#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-/// @note: some tags can be unused since they could be used only for the macros, or only for the component code
-std::string ClTemplateWriter::replace_tags(const std::string &code_template, const TagLUT &tags)
-{
- std::string replaced_code = "";
- bool scanning_pattern = false;
- std::string pattern_found = "";
- for (size_t i = 0; i < code_template.size() - 1; ++i)
- {
- if (!scanning_pattern)
- {
- if (code_template[i] == '{' && code_template[i + 1] == '{')
- {
- i += 1;
- scanning_pattern = true;
- pattern_found = "";
- }
- else
- {
- replaced_code += code_template[i];
- }
- }
- else
- {
- if (code_template[i] == '}' && code_template[i + 1] == '}')
- {
- i += 1;
- scanning_pattern = false;
- std::string err = "Pattern " + pattern_found + " not found in tags";
- ARM_COMPUTE_ERROR_ON_MSG(tags.find(pattern_found) == tags.end(), err.c_str());
- replaced_code += tags.find(pattern_found)->second.value;
- }
- else
- {
- pattern_found += code_template[i];
- }
- }
- }
-
- return replaced_code;
-}
-ClTemplateWriter::~ClTemplateWriter()
-{
-}
-ClTemplateWriter::ClTemplateWriter(const GpuKernelComponentGroup &components) : _components{components}
-{
-}
-std::string ClTemplateWriter::get_name()
-{
- return write_kernel_name();
-}
-std::string ClTemplateWriter::get_code()
-{
- return write_code();
-}
-std::string ClTemplateWriter::get_config_id()
-{
- std::string config_id = get_name();
- for (const auto &comp : _components)
- {
- config_id += "--" + comp->template_writer()->get_config_id() + "--";
- }
-
- return config_id;
-}
-
-CLBuildOptions ClTemplateWriter::get_build_options()
-{
- CLBuildOptions build_opts{};
-
- for (const auto &comp : _components)
- {
- build_opts.add_options(comp->template_writer()->get_build_options(_components).options());
- }
-
- return build_opts;
-}
-
-Window ClTemplateWriter::get_window() const
-{
- const auto root_comp = _components.get_root_component();
- ARM_COMPUTE_ERROR_ON_MSG(root_comp == nullptr, "No root component found");
- return root_comp->template_writer()->get_window();
-}
-
-std::map<ITensorInfo::Id, GpuKernelArgument> ClTemplateWriter::get_tensors()
-{
- // Assemble GpuKernelArguments
- std::map<ITensorInfo::Id, GpuKernelArgument> tensors;
- for (const auto t : _components.get_argument_tensors())
- {
- tensors.emplace(t->id(), GpuKernelArgument{*t, _vtable.get_variable(t).kernel_argument_info});
- }
- return tensors;
-}
-
-std::string ClTemplateWriter::write_code()
-{
- ARM_COMPUTE_ERROR_ON_MSG(_components.empty(), "No components found");
-
- // These data structures will hold the data from all the components in the blueprint
- std::set<std::string> headers_list{};
- std::set<std::string> additional_macros{};
- std::vector<std::string> component_codes{}; // vector because order matters
-
- // Pass 1: Declare all kernel variables
- for (auto &component : _components)
- {
- component->template_writer()->declare_variables(_vtable, _components);
- }
- // Pass 2: Generate component codes
- for (auto &component : _components)
- {
- const auto component_writer = component->template_writer();
- auto curr_headers_list = component_writer->get_headers_list();
- auto curr_additional_macros = component_writer->get_additional_macros();
- auto curr_component_code = component_writer->get_component_code(_components);
- const auto var_lut = component_writer->get_tag_lut(
- _vtable,
- _components); // Ideally can be merged with get_component_code once we have finer-grained code generation technique
- component_codes.push_back(replace_tags(curr_component_code, var_lut));
-
- headers_list.insert(curr_headers_list.begin(), curr_headers_list.end());
- if (!additional_macros.empty()) // Some components might not have any
- {
- additional_macros.insert(replace_tags(curr_additional_macros, var_lut));
- }
- }
-
- // Step 3: Assemble the data gathered by traversing the graph into the string "code"
- std::string code = "";
-
- for (auto &header : headers_list)
- {
-#if defined(EMBEDDED_KERNELS)
- code += CLKernelLibrary::get().get_program(header).first;
-#else // defined(EMBEDDED_KERNELS)
- code += "#include \"" + header + "\"\n";
-#endif // defined(EMBEDDED_KERNELS)
- }
-
- for (auto &macros : additional_macros)
- {
- code += macros;
- }
-
- auto arguments = _components.get_argument_tensors();
- std::sort(arguments.begin(), arguments.end(),
- [](const ITensorInfo *l, const ITensorInfo *r) { return l->id() < r->id(); });
- code += write_kernel_signature(_vtable.get_variable_list(arguments));
-
- code += "\n{\n\n";
-
- code += " //------------------ START KERNEL_BUILDER_COORDINATE ---------------------\n\n";
- code += write_global_section();
- code += " //------------------ END KERNEL_BUILDER_COORDINATE ---------------------\n";
-
- {
- const auto tiles = _components.get_tiles();
- std::stringstream tiles_ss;
-
- tiles_ss << " //------------------ START TILE DECLARATION ---------------------\n";
-
- for (auto tile : tiles)
- {
- const auto var = _vtable.get_variable(tile);
- const auto data_type = get_cl_type_from_data_type(tile->data_type());
- const auto var_name = var.uniq_name;
-
- tiles_ss << " TILE(" << data_type << ", M0, N0, " << var_name << ");\n";
- }
-
- tiles_ss << " //------------------ END TILE DECLARATION ---------------------\n";
-
- code += tiles_ss.str();
- }
-
- for (const auto &component_code : component_codes)
- {
- code += component_code;
- code += "\n";
- }
-
- code += "}\n";
-
- return code;
-}
-std::string ClTemplateWriter::write_global_section() const
-{
- const auto dst_info = _components.get_any_dst_tensor();
- const auto dst_w = dst_info->dimension(0);
- const auto tile_w = std::max(1, get_window().x().step());
- const auto tile_h = std::max(1, get_window().y().step());
- auto leftover_w = dst_w % tile_w;
-
- std::string code = "";
- code += std::string(" int g_ind_0 = GET_SPATIAL_IDX(0, ") + std::to_string(tile_w) + ", " +
- std::to_string(leftover_w) + ");\n";
- code += std::string(" int g_ind_1 = GET_SPATIAL_IDX(1, ") + std::to_string(tile_h) + ", " + "0);\n";
- code += std::string(" int g_ind_2 = GET_SPATIAL_IDX(2, 1, 0);\n\n");
-
- code += " const bool g_cond_x = (g_ind_0 == 0);\n";
- code += " const bool g_cond_y = (g_ind_1 == 0);\n";
-
- return code;
-}
-std::string ClTemplateWriter::write_argument_declaration(const GpuKernelVariableTable::TensorVariable &var) const
-{
- std::string code;
- switch (var.kernel_argument_info.type)
- {
- case GpuKernelArgumentInfo::Type::Vector:
- {
- code += "\n VECTOR_DECLARATION(" + var.uniq_name + ")";
- break;
- }
- case GpuKernelArgumentInfo::Type::Image:
- {
- code += "\n IMAGE_DECLARATION(" + var.uniq_name + ")";
- break;
- }
- case GpuKernelArgumentInfo::Type::Image_3D:
- {
- code += "\n IMAGE_DECLARATION(" + var.uniq_name + "),";
- code += "\n unsigned int " + var.uniq_name + "_stride_z";
- break;
- }
- case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D:
- {
- code += "\n __read_only image2d_t " + var.uniq_name + "_img,";
- code += "\n unsigned int " + var.uniq_name + "_stride_z";
- break;
- }
- case GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer:
- {
- code += "\n TENSOR4D_T(" + var.uniq_name + ", BUFFER)";
- break;
- }
- case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image:
- {
- code += "\n TENSOR4D_T(" + var.uniq_name + ", IMAGE)";
- break;
- }
- case GpuKernelArgumentInfo::Type::Tensor_3D:
- {
- code += "\n TENSOR3D_DECLARATION(" + var.uniq_name + ")";
- break;
- }
- default:
- {
- ARM_COMPUTE_ERROR("Unsupported declaration generation for GpuKernelArgumentInfo::Type");
- }
- }
- return code;
-}
-std::string ClTemplateWriter::write_kernel_signature(const GpuKernelVariableTable::VariableList &argument_list) const
-{
- std::string code = "\n__kernel void " + write_kernel_name() + "(";
-
- for (int i = 0; i < static_cast<int>(argument_list.size()) - 1; ++i)
- {
- code += write_argument_declaration(argument_list[i]) + ",";
- }
- if (static_cast<int>(argument_list.size()) - 1 >= 0)
- {
- code += write_argument_declaration(argument_list[argument_list.size() - 1]);
- }
-
- code += ')';
-
- return code;
-}
-std::string ClTemplateWriter::write_kernel_name() const
-{
- if (_components.empty())
- {
- return "empty_kernel";
- }
- std::string name = _components.empty() ? "" : _components[0]->template_writer()->get_name();
- for (size_t i = 1; i < _components.size(); ++i)
- {
- name += "___";
- name += _components[i]->template_writer()->get_name();
- }
-
- return name;
-}
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h
deleted file mode 100644
index 83f617b6c6..0000000000
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEWRITER
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEWRITER
-
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
-#include "src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
-
-#include <map>
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-/** Use a templated-string-based method to write kernel code
- * It stitches the component code templates together based on the valid fusion configuration.
- * It then instantiates the actual kernel code from the template and the generated tag lookup table.
- */
-class ClTemplateWriter : public IGpuKernelWriter
-{
-public:
- /** Instantiates a kernel code string from the kernel code template
- * @note: some tags can be unused since they could be used only for the macros, or only for the component code
- *
- * @param[in] code_template Kernel code template
- * @param[in] tags Tag lookup table
- *
- * @return std::string Instantiated kernel string
- */
- static std::string replace_tags(const std::string &code_template, const TagLUT &tags);
- /** Default constructor */
- ClTemplateWriter() = default;
- /** Constructor
- *
- * @param[in] components Kernel component group from which the kernel will be generated
- */
- ClTemplateWriter(const GpuKernelComponentGroup &components);
- /** Destructor */
- ~ClTemplateWriter() override;
- /** Generate kernel name */
- std::string get_name() override;
- /** Generate kernel code */
- std::string get_code() override;
- /** Generate build options */
- CLBuildOptions get_build_options() override;
- /** Generate config id string of the entire kernel. This is used for tuning */
- std::string get_config_id() override;
- /** Generate execution window */
- Window get_window() const override;
- /** Get the kernel argument lists of the kernel*/
- std::map<ITensorInfo::Id, GpuKernelArgument> get_tensors() override;
-
-private:
- std::string write_kernel_name() const;
- std::string write_code();
- std::string write_global_section() const;
- std::string write_argument_declaration(const GpuKernelVariableTable::TensorVariable &var) const;
- std::string write_kernel_signature(const GpuKernelVariableTable::VariableList &argument_list) const;
-
-private:
- GpuKernelComponentGroup _components{};
- GpuKernelVariableTable _vtable{};
-};
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEWRITER */