diff options
Diffstat (limited to 'src/dynamic_fusion/runtime/gpu/cl')
-rw-r--r-- | src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp | 139 | ||||
-rw-r--r-- | src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h | 24 |
2 files changed, 6 insertions, 157 deletions
diff --git a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp index 9ca20fa152..eab5cddd07 100644 --- a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp +++ b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,12 +26,11 @@ #include "arm_compute/core/CL/ICLTensor.h" #include "src/core/CL/CLUtils.h" -#ifdef ACL_INTERNAL_TEST_CKW_IN_DF #include "src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.h" -#endif // ACL_INTERNAL_TEST_CKW_IN_DF #include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h" #include "src/gpu/cl/ClKernelLibrary.h" #include "support/Cast.h" + namespace arm_compute { namespace experimental @@ -61,128 +60,6 @@ void ClKernelRuntime::configure(const ClCompileContext &compile_ctx, const GpuKe _arguments = code.arguments(); } -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - -inline void ClKernelRuntime::add_tensor_argument(unsigned int &idx, - const GpuKernelArgumentInfo &arg, - const ICLTensor *tensor, - const Window &arg_slice, - std::vector<cl::Image2D> &cl_images) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(tensor); - - switch (arg.type) - { - case GpuKernelArgumentInfo::Type::Scalar: - { - ARM_COMPUTE_ERROR("Unsupported yet"); - break; - } - - case GpuKernelArgumentInfo::Type::Vector: - { - add_1D_tensor_argument(idx, tensor, arg_slice); - break; - } - - case GpuKernelArgumentInfo::Type::Image: - { - add_2D_tensor_argument(idx, tensor, arg_slice); - break; - } - case GpuKernelArgumentInfo::Type::Image_Reinterpret_As_3D: - { - add_2D_tensor_argument(idx, tensor, arg_slice); - const unsigned int total_cross_plane_pad = tensor->info()->padding().top + tensor->info()->padding().bottom; - _kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(total_cross_plane_pad)); - break; - } - case GpuKernelArgumentInfo::Type::Image_Export_To_ClImage2D: - { - const TensorShape shape2d(tensor->info()->dimension(0) / 4, tensor->info()->dimension(1) * - tensor->info()->dimension(2) * - tensor->info()->dimension(3)); - const size_t image_row_pitch = tensor->info()->strides_in_bytes()[1]; - cl::Image2D tensor_image2d = - create_image2d_from_buffer(CLKernelLibrary::get().context(), tensor->cl_buffer(), shape2d, - tensor->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly); - cl_images.push_back(tensor_image2d); - _kernel.setArg(idx++, tensor_image2d); - break; - } - - case GpuKernelArgumentInfo::Type::Image_3D: - { - add_2D_tensor_argument(idx, tensor, arg_slice); - _kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(tensor->info()->strides_in_bytes()[2])); - break; - } - case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D: - { - const TensorShape shape2d(tensor->info()->dimension(0) / 4, tensor->info()->dimension(1) * - tensor->info()->dimension(2) * - tensor->info()->dimension(3)); - const size_t image_row_pitch = tensor->info()->strides_in_bytes()[1]; - cl::Image2D tensor_image2d = - create_image2d_from_buffer(CLKernelLibrary::get().context(), tensor->cl_buffer(), shape2d, - tensor->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly); - cl_images.push_back(tensor_image2d); - _kernel.setArg(idx++, tensor_image2d); - _kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(tensor->info()->strides_in_bytes()[2])); - break; - } - - case GpuKernelArgumentInfo::Type::Tensor_3D: - { - add_3D_tensor_argument(idx, tensor, arg_slice); - break; - } - - case GpuKernelArgumentInfo::Type::Tensor_4D: - { - add_4D_tensor_argument(idx, tensor, arg_slice); - break; - } - case GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer: - { - add_4d_tensor_nhwc_argument(idx, tensor); - break; - } - case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image: - { - const size_t image_w = tensor->info()->dimension(0) / 4; - const size_t image_h = tensor->info()->tensor_shape().total_size_upper(1); - const size_t image_stride_y = tensor->info()->strides_in_bytes()[1]; - - cl::Image2D tensor_image2d = create_image2d_from_buffer( - CLKernelLibrary::get().context(), tensor->cl_buffer(), TensorShape(image_w, image_h), - tensor->info()->data_type(), image_stride_y, CLImage2DType::ReadOnly); - cl_images.push_back(tensor_image2d); - - _kernel.setArg(idx++, tensor_image2d); - add_4d_tensor_nhwc_argument(idx, tensor); - break; - } - case GpuKernelArgumentInfo::Type::Tensor_Special_0: - { - const ITensorInfo *info = tensor->info(); - const Strides &strides = info->strides_in_bytes(); - - _kernel.setArg(idx++, tensor->cl_buffer()); - const size_t dim1xdim2 = info->tensor_shape()[1] * info->tensor_shape()[2]; - _kernel.setArg<cl_int>(idx++, static_cast<int32_t>(dim1xdim2)); - const size_t stride1 = strides[1]; - _kernel.setArg<cl_int>(idx++, static_cast<int32_t>(stride1)); - break; - } - default: - { - ARM_COMPUTE_ERROR("Unsupported"); - } - } -} - -#else // ACL_INTERNAL_TEST_CKW_IN_DF inline void ClKernelRuntime::add_kernel_argument(unsigned int &idx, const GpuKernelArgumentBinding &arg, const ICLTensor *tensor, @@ -234,7 +111,6 @@ inline void ClKernelRuntime::add_kernel_argument(unsigned int } } -#endif // ACL_INTERNAL_TEST_CKW_IN_DF void ClKernelRuntime::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); @@ -253,17 +129,7 @@ void ClKernelRuntime::run_op(ITensorPack &tensors, const Window &window, cl::Com // Set kernel arguments // CLImages created from tensor arguments. Need to be retained until enqueue std::vector<cl::Image2D> cl_images; -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - for (auto id_arg : _arguments) - { - const auto arg = id_arg.second; - auto tensor = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(id_arg.first)); - ARM_COMPUTE_ERROR_ON_NULLPTR(tensor); - ARM_COMPUTE_ERROR_ON_NULLPTR(tensor->info()); - add_tensor_argument(idx, *arg.kernel_argument_info(), tensor, slice, cl_images); - } -#else // ACL_INTERNAL_TEST_CKW_IN_DF for (const auto &arg : _arguments) { auto tensor = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(arg.id())); @@ -271,7 +137,6 @@ void ClKernelRuntime::run_op(ITensorPack &tensors, const Window &window, cl::Com ARM_COMPUTE_ERROR_ON_NULLPTR(tensor->info()); add_kernel_argument(idx, arg, tensor, cl_images); } -#endif // ACL_INTERNAL_TEST_CKW_IN_DF // Dispatch kernel enqueue(queue, *this, slice, lws_hint(), use_dummy_work_items); diff --git a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h index e78567eb9d..148e4db581 100644 --- a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h +++ b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_DYNAMIC_FUSION_RUNTIME_GPU_CL_CLKERNELRUNTIME -#define SRC_DYNAMIC_FUSION_RUNTIME_GPU_CL_CLKERNELRUNTIME +#ifndef ACL_SRC_DYNAMIC_FUSION_RUNTIME_GPU_CL_CLKERNELRUNTIME_H +#define ACL_SRC_DYNAMIC_FUSION_RUNTIME_GPU_CL_CLKERNELRUNTIME_H #include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" #include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h" @@ -59,21 +59,6 @@ public: virtual void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; private: -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - /** Set a kernel tensor argument - * - * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. - * @param[in] arg Kernel argument descriptor accompanying @p tensor - * @param[in] tensor Tensor to set as an argument of the object's kernel - * @param[in] arg_slice Window the kernel will be run on - * @param[out] cl_images Extra cl images created from the tensor (will need to be retained until the kernel is enqueued) - */ - inline void add_tensor_argument(unsigned int &idx, - const GpuKernelArgumentInfo &arg, - const ICLTensor *tensor, - const Window &arg_slice, - std::vector<cl::Image2D> &cl_images); -#else // ACL_INTERNAL_TEST_CKW_IN_DF /** Set a kernel argument as part of a tensor * * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. @@ -85,7 +70,6 @@ private: const GpuKernelArgumentBinding &arg, const ICLTensor *tensor, std::vector<cl::Image2D> &cl_images); -#endif // ACL_INTERNAL_TEST_CKW_IN_DF private: GpuKernelArgumentList _arguments{}; @@ -94,4 +78,4 @@ private: } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_RUNTIME_GPU_CL_CLKERNELRUNTIME */ +#endif // ACL_SRC_DYNAMIC_FUSION_RUNTIME_GPU_CL_CLKERNELRUNTIME_H |