diff options
author | Gunes Bayir <gunes.bayir@arm.com> | 2022-03-28 21:32:33 +0100 |
---|---|---|
committer | SiCong Li <sicong.li@arm.com> | 2022-04-13 10:36:30 +0000 |
commit | 16c5697085c256c19fb8ba4bef6188d61f30a88b (patch) | |
tree | 609bfe2082c939ff37bdf6ef37bc22fc071bd934 /src/gpu/cl/kernels/experimental/dynamic_fusion | |
parent | 5d606cccaabdfc435734c9fb51e11f14f3724a23 (diff) | |
download | ComputeLibrary-16c5697085c256c19fb8ba4bef6188d61f30a88b.tar.gz |
Add DirectConvolution2D kernel component for dynamic fusion
Resolves: COMPMID-5156
Change-Id: I438da924cb80d3bce72106b06ca7181e0606bd01
Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
Signed-off-by: Giorgio Arena <giorgio.arena@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7399
Reviewed-by: SiCong Li <sicong.li@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/gpu/cl/kernels/experimental/dynamic_fusion')
-rw-r--r-- | src/gpu/cl/kernels/experimental/dynamic_fusion/ClCompositeKernel.cpp | 25 |
1 files changed, 20 insertions, 5 deletions
diff --git a/src/gpu/cl/kernels/experimental/dynamic_fusion/ClCompositeKernel.cpp b/src/gpu/cl/kernels/experimental/dynamic_fusion/ClCompositeKernel.cpp index 05912dfd81..472cfb9df0 100644 --- a/src/gpu/cl/kernels/experimental/dynamic_fusion/ClCompositeKernel.cpp +++ b/src/gpu/cl/kernels/experimental/dynamic_fusion/ClCompositeKernel.cpp @@ -66,7 +66,6 @@ inline void ClCompositeKernel::add_tensor_argument(unsigned int &idx, const ClKe ARM_COMPUTE_ERROR("Unsupported yet"); break; } - case TensorArgType::Vector: { add_1D_tensor_argument(idx, tensor, arg_slice); @@ -93,7 +92,6 @@ inline void ClCompositeKernel::add_tensor_argument(unsigned int &idx, const ClKe _kernel.setArg(idx++, tensor_image2d); break; } - case TensorArgType::Image_3D: { add_2D_tensor_argument(idx, tensor, arg_slice); @@ -109,18 +107,34 @@ inline void ClCompositeKernel::add_tensor_argument(unsigned int &idx, const ClKe _kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(tensor->info()->strides_in_bytes()[2])); break; } - case TensorArgType::Tensor_3D: { add_3D_tensor_argument(idx, tensor, arg_slice); break; } - case TensorArgType::Tensor_4D: { add_4D_tensor_argument(idx, tensor, arg_slice); break; } + case TensorArgType::Tensor_4D_t_Buffer: + { + add_4d_tensor_nhwc_argument(idx, tensor); + break; + } + case TensorArgType::Tensor_4D_t_Image: + { + const size_t image_w = tensor->info()->dimension(0) / 4; + const size_t image_h = tensor->info()->tensor_shape().total_size_upper(1); + const size_t image_stride_y = tensor->info()->strides_in_bytes()[1]; + + cl::Image2D tensor_cl_image = create_image2d_from_buffer(CLKernelLibrary::get().context(), tensor->cl_buffer(), + TensorShape(image_w, image_h), tensor->info()->data_type(), image_stride_y); + + _kernel.setArg(idx++, tensor_cl_image); + add_4d_tensor_nhwc_argument(idx, tensor); + break; + } default: { ARM_COMPUTE_ERROR("Unsupported"); @@ -140,6 +154,7 @@ void ClCompositeKernel::run_composite_op(TensorBinding &tensors, const Window &w Window slice_fixed_z = slice; slice_fixed_z.set(Window::DimX, Window::Dimension(0, 1, 1)); slice_fixed_z.set(Window::DimY, Window::Dimension(0, 1, 1)); + unsigned int idx = 0; do { @@ -162,7 +177,7 @@ void ClCompositeKernel::run_composite_op(TensorBinding &tensors, const Window &w bool use_dummy_work_items = false; enqueue(queue, *this, slice, lws_hint(), use_dummy_work_items); } - while(window.slide_window_slice_3D(slice)); + while(!exec_desc.skip_sliding_window && window.slide_window_slice_3D(slice)); } Status bind_arguments(ITensorPack &, const ClKernelCode &, const TensorBinding &) |