aboutsummaryrefslogtreecommitdiff
path: root/src/gpu/cl/kernels/experimental/dynamic_fusion/ClCompositeKernel.cpp
diff options
context:
space:
mode:
authorGunes Bayir <gunes.bayir@arm.com>2022-03-28 21:32:33 +0100
committerSiCong Li <sicong.li@arm.com>2022-04-13 10:36:30 +0000
commit16c5697085c256c19fb8ba4bef6188d61f30a88b (patch)
tree609bfe2082c939ff37bdf6ef37bc22fc071bd934 /src/gpu/cl/kernels/experimental/dynamic_fusion/ClCompositeKernel.cpp
parent5d606cccaabdfc435734c9fb51e11f14f3724a23 (diff)
downloadComputeLibrary-16c5697085c256c19fb8ba4bef6188d61f30a88b.tar.gz
Add DirectConvolution2D kernel component for dynamic fusion
Resolves: COMPMID-5156 Change-Id: I438da924cb80d3bce72106b06ca7181e0606bd01 Signed-off-by: Gunes Bayir <gunes.bayir@arm.com> Signed-off-by: Giorgio Arena <giorgio.arena@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7399 Reviewed-by: SiCong Li <sicong.li@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/gpu/cl/kernels/experimental/dynamic_fusion/ClCompositeKernel.cpp')
-rw-r--r--src/gpu/cl/kernels/experimental/dynamic_fusion/ClCompositeKernel.cpp25
1 files changed, 20 insertions, 5 deletions
diff --git a/src/gpu/cl/kernels/experimental/dynamic_fusion/ClCompositeKernel.cpp b/src/gpu/cl/kernels/experimental/dynamic_fusion/ClCompositeKernel.cpp
index 05912dfd81..472cfb9df0 100644
--- a/src/gpu/cl/kernels/experimental/dynamic_fusion/ClCompositeKernel.cpp
+++ b/src/gpu/cl/kernels/experimental/dynamic_fusion/ClCompositeKernel.cpp
@@ -66,7 +66,6 @@ inline void ClCompositeKernel::add_tensor_argument(unsigned int &idx, const ClKe
ARM_COMPUTE_ERROR("Unsupported yet");
break;
}
-
case TensorArgType::Vector:
{
add_1D_tensor_argument(idx, tensor, arg_slice);
@@ -93,7 +92,6 @@ inline void ClCompositeKernel::add_tensor_argument(unsigned int &idx, const ClKe
_kernel.setArg(idx++, tensor_image2d);
break;
}
-
case TensorArgType::Image_3D:
{
add_2D_tensor_argument(idx, tensor, arg_slice);
@@ -109,18 +107,34 @@ inline void ClCompositeKernel::add_tensor_argument(unsigned int &idx, const ClKe
_kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(tensor->info()->strides_in_bytes()[2]));
break;
}
-
case TensorArgType::Tensor_3D:
{
add_3D_tensor_argument(idx, tensor, arg_slice);
break;
}
-
case TensorArgType::Tensor_4D:
{
add_4D_tensor_argument(idx, tensor, arg_slice);
break;
}
+ case TensorArgType::Tensor_4D_t_Buffer:
+ {
+ add_4d_tensor_nhwc_argument(idx, tensor);
+ break;
+ }
+ case TensorArgType::Tensor_4D_t_Image:
+ {
+ const size_t image_w = tensor->info()->dimension(0) / 4;
+ const size_t image_h = tensor->info()->tensor_shape().total_size_upper(1);
+ const size_t image_stride_y = tensor->info()->strides_in_bytes()[1];
+
+ cl::Image2D tensor_cl_image = create_image2d_from_buffer(CLKernelLibrary::get().context(), tensor->cl_buffer(),
+ TensorShape(image_w, image_h), tensor->info()->data_type(), image_stride_y);
+
+ _kernel.setArg(idx++, tensor_cl_image);
+ add_4d_tensor_nhwc_argument(idx, tensor);
+ break;
+ }
default:
{
ARM_COMPUTE_ERROR("Unsupported");
@@ -140,6 +154,7 @@ void ClCompositeKernel::run_composite_op(TensorBinding &tensors, const Window &w
Window slice_fixed_z = slice;
slice_fixed_z.set(Window::DimX, Window::Dimension(0, 1, 1));
slice_fixed_z.set(Window::DimY, Window::Dimension(0, 1, 1));
+
unsigned int idx = 0;
do
{
@@ -162,7 +177,7 @@ void ClCompositeKernel::run_composite_op(TensorBinding &tensors, const Window &w
bool use_dummy_work_items = false;
enqueue(queue, *this, slice, lws_hint(), use_dummy_work_items);
}
- while(window.slide_window_slice_3D(slice));
+ while(!exec_desc.skip_sliding_window && window.slide_window_slice_3D(slice));
}
Status bind_arguments(ITensorPack &, const ClKernelCode &, const TensorBinding &)