From 17975a61c5d7cbdc37c11d38e23eab8afa43f27c Mon Sep 17 00:00:00 2001 From: Adnan AlSinan Date: Mon, 8 Nov 2021 17:46:39 +0000 Subject: Improve start-up time for ClScale - Add macro guard for different kernels in scale.cl - Rework TENSOR4D to the new format - Pass scale_x and scale_y at runtime Resolves COMPMID-4886 Signed-off-by: Adnan AlSinan Change-Id: Ib904a703d511fb8260618057ac92e5ea9efeee2b Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6619 Reviewed-by: Gian Marco Iodice Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins --- src/core/CL/ICLKernel.cpp | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'src/core/CL/ICLKernel.cpp') diff --git a/src/core/CL/ICLKernel.cpp b/src/core/CL/ICLKernel.cpp index 9ba17d0e03..eb750cbd34 100644 --- a/src/core/CL/ICLKernel.cpp +++ b/src/core/CL/ICLKernel.cpp @@ -116,6 +116,33 @@ void ICLKernel::add_tensor_argument(unsigned &idx, const ICLTensor *tensor, cons ARM_COMPUTE_UNUSED(idx_start); } +void ICLKernel::add_4d_tensor_nhwc_argument(unsigned int &idx, const ICLTensor *tensor) +{ + ARM_COMPUTE_ERROR_ON(tensor == nullptr); + + const ITensorInfo *info = tensor->info(); + ARM_COMPUTE_ERROR_ON(info == nullptr); + const Strides &strides = info->strides_in_bytes(); + + // Tensor poniter + _kernel.setArg(idx++, tensor->cl_buffer()); + + // Add stride_y, stride_z and stride_w + _kernel.setArg(idx++, strides[1]); + _kernel.setArg(idx++, strides[2]); + _kernel.setArg(idx++, strides[3]); + + // Tensor dimensions + _kernel.setArg(idx++, info->dimension(0)); + _kernel.setArg(idx++, info->dimension(1)); + _kernel.setArg(idx++, info->dimension(2)); + _kernel.setArg(idx++, info->dimension(3)); + + // Offset of first element + unsigned int offset_first_element = info->offset_first_element_in_bytes(); + _kernel.setArg(idx++, offset_first_element); +} + #ifndef DOXYGEN_SKIP_THIS template void ICLKernel::add_tensor_argument<1>(unsigned &idx, const ICLTensor *tensor, const Window &window); template void ICLKernel::add_tensor_argument<2>(unsigned &idx, const ICLTensor *tensor, const Window &window); -- cgit v1.2.1