diff options
author | Gunes Bayir <gunes.bayir@arm.com> | 2024-02-07 15:34:45 +0000 |
---|---|---|
committer | Gunes Bayir <gunes.bayir@arm.com> | 2024-02-09 15:59:45 +0000 |
commit | 0ee13afc4429411de9a05ba4c2ff8a580784b568 (patch) | |
tree | c9ee1acf684d52b92ffb7500b0b65eee8377ce45 /src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp | |
parent | a3e1b50588b89a2c0c67da2679728a422fc16402 (diff) | |
download | ComputeLibrary-0ee13afc4429411de9a05ba4c2ff8a580784b568.tar.gz |
Remove CKW prototype and Template Writer
Gpu code in dynamic fusion is now written by stable CKW. We do not need CKW protoype and the older writer implementation, i.e. TemplateWriter.
It also removes the need for the flag -DACL_INTERNAL_TEST_CKW_IN_DF to compile and test dynamic fusion operator.
Resolves: COMPMID-6715
Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
Change-Id: I9f9453311e79d9be612bd4754240d832f98503e8
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11116
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Jakub Sujak <jakub.sujak@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp')
-rw-r--r-- | src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp | 139 |
1 files changed, 2 insertions, 137 deletions
diff --git a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp index 9ca20fa152..eab5cddd07 100644 --- a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp +++ b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,12 +26,11 @@ #include "arm_compute/core/CL/ICLTensor.h" #include "src/core/CL/CLUtils.h" -#ifdef ACL_INTERNAL_TEST_CKW_IN_DF #include "src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.h" -#endif // ACL_INTERNAL_TEST_CKW_IN_DF #include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h" #include "src/gpu/cl/ClKernelLibrary.h" #include "support/Cast.h" + namespace arm_compute { namespace experimental @@ -61,128 +60,6 @@ void ClKernelRuntime::configure(const ClCompileContext &compile_ctx, const GpuKe _arguments = code.arguments(); } -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - -inline void ClKernelRuntime::add_tensor_argument(unsigned int &idx, - const GpuKernelArgumentInfo &arg, - const ICLTensor *tensor, - const Window &arg_slice, - std::vector<cl::Image2D> &cl_images) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(tensor); - - switch (arg.type) - { - case GpuKernelArgumentInfo::Type::Scalar: - { - ARM_COMPUTE_ERROR("Unsupported yet"); - break; - } - - case GpuKernelArgumentInfo::Type::Vector: - { - add_1D_tensor_argument(idx, tensor, arg_slice); - break; - } - - case GpuKernelArgumentInfo::Type::Image: - { - add_2D_tensor_argument(idx, tensor, arg_slice); - break; - } - case GpuKernelArgumentInfo::Type::Image_Reinterpret_As_3D: - { - add_2D_tensor_argument(idx, tensor, arg_slice); - const unsigned int total_cross_plane_pad = tensor->info()->padding().top + tensor->info()->padding().bottom; - _kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(total_cross_plane_pad)); - break; - } - case GpuKernelArgumentInfo::Type::Image_Export_To_ClImage2D: - { - const TensorShape shape2d(tensor->info()->dimension(0) / 4, tensor->info()->dimension(1) * - tensor->info()->dimension(2) * - tensor->info()->dimension(3)); - const size_t image_row_pitch = tensor->info()->strides_in_bytes()[1]; - cl::Image2D tensor_image2d = - create_image2d_from_buffer(CLKernelLibrary::get().context(), tensor->cl_buffer(), shape2d, - tensor->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly); - cl_images.push_back(tensor_image2d); - _kernel.setArg(idx++, tensor_image2d); - break; - } - - case GpuKernelArgumentInfo::Type::Image_3D: - { - add_2D_tensor_argument(idx, tensor, arg_slice); - _kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(tensor->info()->strides_in_bytes()[2])); - break; - } - case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D: - { - const TensorShape shape2d(tensor->info()->dimension(0) / 4, tensor->info()->dimension(1) * - tensor->info()->dimension(2) * - tensor->info()->dimension(3)); - const size_t image_row_pitch = tensor->info()->strides_in_bytes()[1]; - cl::Image2D tensor_image2d = - create_image2d_from_buffer(CLKernelLibrary::get().context(), tensor->cl_buffer(), shape2d, - tensor->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly); - cl_images.push_back(tensor_image2d); - _kernel.setArg(idx++, tensor_image2d); - _kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(tensor->info()->strides_in_bytes()[2])); - break; - } - - case GpuKernelArgumentInfo::Type::Tensor_3D: - { - add_3D_tensor_argument(idx, tensor, arg_slice); - break; - } - - case GpuKernelArgumentInfo::Type::Tensor_4D: - { - add_4D_tensor_argument(idx, tensor, arg_slice); - break; - } - case GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer: - { - add_4d_tensor_nhwc_argument(idx, tensor); - break; - } - case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image: - { - const size_t image_w = tensor->info()->dimension(0) / 4; - const size_t image_h = tensor->info()->tensor_shape().total_size_upper(1); - const size_t image_stride_y = tensor->info()->strides_in_bytes()[1]; - - cl::Image2D tensor_image2d = create_image2d_from_buffer( - CLKernelLibrary::get().context(), tensor->cl_buffer(), TensorShape(image_w, image_h), - tensor->info()->data_type(), image_stride_y, CLImage2DType::ReadOnly); - cl_images.push_back(tensor_image2d); - - _kernel.setArg(idx++, tensor_image2d); - add_4d_tensor_nhwc_argument(idx, tensor); - break; - } - case GpuKernelArgumentInfo::Type::Tensor_Special_0: - { - const ITensorInfo *info = tensor->info(); - const Strides &strides = info->strides_in_bytes(); - - _kernel.setArg(idx++, tensor->cl_buffer()); - const size_t dim1xdim2 = info->tensor_shape()[1] * info->tensor_shape()[2]; - _kernel.setArg<cl_int>(idx++, static_cast<int32_t>(dim1xdim2)); - const size_t stride1 = strides[1]; - _kernel.setArg<cl_int>(idx++, static_cast<int32_t>(stride1)); - break; - } - default: - { - ARM_COMPUTE_ERROR("Unsupported"); - } - } -} - -#else // ACL_INTERNAL_TEST_CKW_IN_DF inline void ClKernelRuntime::add_kernel_argument(unsigned int &idx, const GpuKernelArgumentBinding &arg, const ICLTensor *tensor, @@ -234,7 +111,6 @@ inline void ClKernelRuntime::add_kernel_argument(unsigned int } } -#endif // ACL_INTERNAL_TEST_CKW_IN_DF void ClKernelRuntime::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); @@ -253,17 +129,7 @@ void ClKernelRuntime::run_op(ITensorPack &tensors, const Window &window, cl::Com // Set kernel arguments // CLImages created from tensor arguments. Need to be retained until enqueue std::vector<cl::Image2D> cl_images; -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - for (auto id_arg : _arguments) - { - const auto arg = id_arg.second; - auto tensor = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(id_arg.first)); - ARM_COMPUTE_ERROR_ON_NULLPTR(tensor); - ARM_COMPUTE_ERROR_ON_NULLPTR(tensor->info()); - add_tensor_argument(idx, *arg.kernel_argument_info(), tensor, slice, cl_images); - } -#else // ACL_INTERNAL_TEST_CKW_IN_DF for (const auto &arg : _arguments) { auto tensor = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(arg.id())); @@ -271,7 +137,6 @@ void ClKernelRuntime::run_op(ITensorPack &tensors, const Window &window, cl::Com ARM_COMPUTE_ERROR_ON_NULLPTR(tensor->info()); add_kernel_argument(idx, arg, tensor, cl_images); } -#endif // ACL_INTERNAL_TEST_CKW_IN_DF // Dispatch kernel enqueue(queue, *this, slice, lws_hint(), use_dummy_work_items); |