aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/ICLKernel.cpp
diff options
context:
space:
mode:
authorAdnan AlSinan <adnan.alsinan@arm.com>2021-11-08 17:46:39 +0000
committerAdnan AlSinan <adnan.alsinan@arm.com>2021-11-09 14:42:08 +0000
commit17975a61c5d7cbdc37c11d38e23eab8afa43f27c (patch)
tree2b9a82377dc90399dfb1a2e9e70e2dabb9735987 /src/core/CL/ICLKernel.cpp
parenta3b13b500e61a02d1e73148ccd97d054ef7c22ec (diff)
downloadComputeLibrary-17975a61c5d7cbdc37c11d38e23eab8afa43f27c.tar.gz
Improve start-up time for ClScale
- Add macro guard for different kernels in scale.cl - Rework TENSOR4D to the new format - Pass scale_x and scale_y at runtime Resolves COMPMID-4886 Signed-off-by: Adnan AlSinan <adnan.alsinan@arm.com> Change-Id: Ib904a703d511fb8260618057ac92e5ea9efeee2b Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6619 Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/ICLKernel.cpp')
-rw-r--r--src/core/CL/ICLKernel.cpp27
1 files changed, 27 insertions, 0 deletions
diff --git a/src/core/CL/ICLKernel.cpp b/src/core/CL/ICLKernel.cpp
index 9ba17d0e03..eb750cbd34 100644
--- a/src/core/CL/ICLKernel.cpp
+++ b/src/core/CL/ICLKernel.cpp
@@ -116,6 +116,33 @@ void ICLKernel::add_tensor_argument(unsigned &idx, const ICLTensor *tensor, cons
ARM_COMPUTE_UNUSED(idx_start);
}
+void ICLKernel::add_4d_tensor_nhwc_argument(unsigned int &idx, const ICLTensor *tensor)
+{
+ ARM_COMPUTE_ERROR_ON(tensor == nullptr);
+
+ const ITensorInfo *info = tensor->info();
+ ARM_COMPUTE_ERROR_ON(info == nullptr);
+ const Strides &strides = info->strides_in_bytes();
+
+ // Tensor poniter
+ _kernel.setArg(idx++, tensor->cl_buffer());
+
+ // Add stride_y, stride_z and stride_w
+ _kernel.setArg<cl_uint>(idx++, strides[1]);
+ _kernel.setArg<cl_uint>(idx++, strides[2]);
+ _kernel.setArg<cl_uint>(idx++, strides[3]);
+
+ // Tensor dimensions
+ _kernel.setArg<cl_uint>(idx++, info->dimension(0));
+ _kernel.setArg<cl_uint>(idx++, info->dimension(1));
+ _kernel.setArg<cl_uint>(idx++, info->dimension(2));
+ _kernel.setArg<cl_uint>(idx++, info->dimension(3));
+
+ // Offset of first element
+ unsigned int offset_first_element = info->offset_first_element_in_bytes();
+ _kernel.setArg<cl_uint>(idx++, offset_first_element);
+}
+
#ifndef DOXYGEN_SKIP_THIS
template void ICLKernel::add_tensor_argument<1>(unsigned &idx, const ICLTensor *tensor, const Window &window);
template void ICLKernel::add_tensor_argument<2>(unsigned &idx, const ICLTensor *tensor, const Window &window);