Remove CKW prototype and Template Writer

Gpu code in dynamic fusion is now written by stable CKW. We do not need CKW protoype and the older writer implementation, i.e. TemplateWriter. It also removes the need for the flag -DACL_INTERNAL_TEST_CKW_IN_DF to compile and test dynamic fusion operator. Resolves: COMPMID-6715 Signed-off-by: Gunes Bayir <gunes.bayir@arm.com> Change-Id: I9f9453311e79d9be612bd4754240d832f98503e8 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11116 Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Jakub Sujak <jakub.sujak@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
author: Gunes Bayir <gunes.bayir@arm.com> 2024-02-07 15:34:45 +0000
committer: Gunes Bayir <gunes.bayir@arm.com> 2024-02-09 15:59:45 +0000
commit: 0ee13afc4429411de9a05ba4c2ff8a580784b568 (patch)
tree: c9ee1acf684d52b92ffb7500b0b65eee8377ce45 /src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp
parent: a3e1b50588b89a2c0c67da2679728a422fc16402 (diff)
download: ComputeLibrary-0ee13afc4429411de9a05ba4c2ff8a580784b568.tar.gz
1 files changed, 2 insertions, 137 deletions
diff --git a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp
index 9ca20fa152..eab5cddd07 100644
--- a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp
+++ b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,12 +26,11 @@
 #include "arm_compute/core/CL/ICLTensor.h"
 
 #include "src/core/CL/CLUtils.h"
-#ifdef ACL_INTERNAL_TEST_CKW_IN_DF
 #include "src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.h"
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
 #include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h"
 #include "src/gpu/cl/ClKernelLibrary.h"
 #include "support/Cast.h"
+
 namespace arm_compute
 {
 namespace experimental
@@ -61,128 +60,6 @@ void ClKernelRuntime::configure(const ClCompileContext &compile_ctx, const GpuKe
     _arguments = code.arguments();
 }
 
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-
-inline void ClKernelRuntime::add_tensor_argument(unsigned int                &idx,
-                                                 const GpuKernelArgumentInfo &arg,
-                                                 const ICLTensor             *tensor,
-                                                 const Window                &arg_slice,
-                                                 std::vector<cl::Image2D>    &cl_images)
-{
-    ARM_COMPUTE_ERROR_ON_NULLPTR(tensor);
-
-    switch (arg.type)
-    {
-        case GpuKernelArgumentInfo::Type::Scalar:
-        {
-            ARM_COMPUTE_ERROR("Unsupported yet");
-            break;
-        }
-
-        case GpuKernelArgumentInfo::Type::Vector:
-        {
-            add_1D_tensor_argument(idx, tensor, arg_slice);
-            break;
-        }
-
-        case GpuKernelArgumentInfo::Type::Image:
-        {
-            add_2D_tensor_argument(idx, tensor, arg_slice);
-            break;
-        }
-        case GpuKernelArgumentInfo::Type::Image_Reinterpret_As_3D:
-        {
-            add_2D_tensor_argument(idx, tensor, arg_slice);
-            const unsigned int total_cross_plane_pad = tensor->info()->padding().top + tensor->info()->padding().bottom;
-            _kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(total_cross_plane_pad));
-            break;
-        }
-        case GpuKernelArgumentInfo::Type::Image_Export_To_ClImage2D:
-        {
-            const TensorShape shape2d(tensor->info()->dimension(0) / 4, tensor->info()->dimension(1) *
-                                                                            tensor->info()->dimension(2) *
-                                                                            tensor->info()->dimension(3));
-            const size_t      image_row_pitch = tensor->info()->strides_in_bytes()[1];
-            cl::Image2D       tensor_image2d =
-                create_image2d_from_buffer(CLKernelLibrary::get().context(), tensor->cl_buffer(), shape2d,
-                                           tensor->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly);
-            cl_images.push_back(tensor_image2d);
-            _kernel.setArg(idx++, tensor_image2d);
-            break;
-        }
-
-        case GpuKernelArgumentInfo::Type::Image_3D:
-        {
-            add_2D_tensor_argument(idx, tensor, arg_slice);
-            _kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(tensor->info()->strides_in_bytes()[2]));
-            break;
-        }
-        case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D:
-        {
-            const TensorShape shape2d(tensor->info()->dimension(0) / 4, tensor->info()->dimension(1) *
-                                                                            tensor->info()->dimension(2) *
-                                                                            tensor->info()->dimension(3));
-            const size_t      image_row_pitch = tensor->info()->strides_in_bytes()[1];
-            cl::Image2D       tensor_image2d =
-                create_image2d_from_buffer(CLKernelLibrary::get().context(), tensor->cl_buffer(), shape2d,
-                                           tensor->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly);
-            cl_images.push_back(tensor_image2d);
-            _kernel.setArg(idx++, tensor_image2d);
-            _kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(tensor->info()->strides_in_bytes()[2]));
-            break;
-        }
-
-        case GpuKernelArgumentInfo::Type::Tensor_3D:
-        {
-            add_3D_tensor_argument(idx, tensor, arg_slice);
-            break;
-        }
-
-        case GpuKernelArgumentInfo::Type::Tensor_4D:
-        {
-            add_4D_tensor_argument(idx, tensor, arg_slice);
-            break;
-        }
-        case GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer:
-        {
-            add_4d_tensor_nhwc_argument(idx, tensor);
-            break;
-        }
-        case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image:
-        {
-            const size_t image_w        = tensor->info()->dimension(0) / 4;
-            const size_t image_h        = tensor->info()->tensor_shape().total_size_upper(1);
-            const size_t image_stride_y = tensor->info()->strides_in_bytes()[1];
-
-            cl::Image2D tensor_image2d = create_image2d_from_buffer(
-                CLKernelLibrary::get().context(), tensor->cl_buffer(), TensorShape(image_w, image_h),
-                tensor->info()->data_type(), image_stride_y, CLImage2DType::ReadOnly);
-            cl_images.push_back(tensor_image2d);
-
-            _kernel.setArg(idx++, tensor_image2d);
-            add_4d_tensor_nhwc_argument(idx, tensor);
-            break;
-        }
-        case GpuKernelArgumentInfo::Type::Tensor_Special_0:
-        {
-            const ITensorInfo *info    = tensor->info();
-            const Strides     &strides = info->strides_in_bytes();
-
-            _kernel.setArg(idx++, tensor->cl_buffer());
-            const size_t dim1xdim2 = info->tensor_shape()[1] * info->tensor_shape()[2];
-            _kernel.setArg<cl_int>(idx++, static_cast<int32_t>(dim1xdim2));
-            const size_t stride1 = strides[1];
-            _kernel.setArg<cl_int>(idx++, static_cast<int32_t>(stride1));
-            break;
-        }
-        default:
-        {
-            ARM_COMPUTE_ERROR("Unsupported");
-        }
-    }
-}
-
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
 inline void ClKernelRuntime::add_kernel_argument(unsigned int                   &idx,
                                                  const GpuKernelArgumentBinding &arg,
                                                  const ICLTensor                *tensor,
@@ -234,7 +111,6 @@ inline void ClKernelRuntime::add_kernel_argument(unsigned int
     }
 }
 
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
 void ClKernelRuntime::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
 {
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
@@ -253,17 +129,7 @@ void ClKernelRuntime::run_op(ITensorPack &tensors, const Window &window, cl::Com
         // Set kernel arguments
         // CLImages created from tensor arguments. Need to be retained until enqueue
         std::vector<cl::Image2D> cl_images;
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-        for (auto id_arg : _arguments)
-        {
-            const auto arg    = id_arg.second;
-            auto       tensor = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(id_arg.first));
-            ARM_COMPUTE_ERROR_ON_NULLPTR(tensor);
-            ARM_COMPUTE_ERROR_ON_NULLPTR(tensor->info());
-            add_tensor_argument(idx, *arg.kernel_argument_info(), tensor, slice, cl_images);
-        }
 
-#else  // ACL_INTERNAL_TEST_CKW_IN_DF
         for (const auto &arg : _arguments)
         {
             auto tensor = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(arg.id()));
@@ -271,7 +137,6 @@ void ClKernelRuntime::run_op(ITensorPack &tensors, const Window &window, cl::Com
             ARM_COMPUTE_ERROR_ON_NULLPTR(tensor->info());
             add_kernel_argument(idx, arg, tensor, cl_images);
         }
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
 
         // Dispatch kernel
         enqueue(queue, *this, slice, lws_hint(), use_dummy_work_items);
author	Gunes Bayir <gunes.bayir@arm.com>	2024-02-07 15:34:45 +0000
committer	Gunes Bayir <gunes.bayir@arm.com>	2024-02-09 15:59:45 +0000
commit	0ee13afc4429411de9a05ba4c2ff8a580784b568 (patch)
tree	c9ee1acf684d52b92ffb7500b0b65eee8377ce45 /src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp
parent	a3e1b50588b89a2c0c67da2679728a422fc16402 (diff)
download	ComputeLibrary-0ee13afc4429411de9a05ba4c2ff8a580784b568.tar.gz