aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/ICLKernel.h
diff options
context:
space:
mode:
authorAdnan AlSinan <adnan.alsinan@arm.com>2021-11-08 17:46:39 +0000
committerAdnan AlSinan <adnan.alsinan@arm.com>2021-11-09 14:42:08 +0000
commit17975a61c5d7cbdc37c11d38e23eab8afa43f27c (patch)
tree2b9a82377dc90399dfb1a2e9e70e2dabb9735987 /src/core/CL/ICLKernel.h
parenta3b13b500e61a02d1e73148ccd97d054ef7c22ec (diff)
downloadComputeLibrary-17975a61c5d7cbdc37c11d38e23eab8afa43f27c.tar.gz
Improve start-up time for ClScale
- Add macro guard for different kernels in scale.cl - Rework TENSOR4D to the new format - Pass scale_x and scale_y at runtime Resolves COMPMID-4886 Signed-off-by: Adnan AlSinan <adnan.alsinan@arm.com> Change-Id: Ib904a703d511fb8260618057ac92e5ea9efeee2b Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6619 Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/ICLKernel.h')
-rw-r--r--src/core/CL/ICLKernel.h18
1 files changed, 18 insertions, 0 deletions
diff --git a/src/core/CL/ICLKernel.h b/src/core/CL/ICLKernel.h
index 3b3217d1d8..a7c979ef45 100644
--- a/src/core/CL/ICLKernel.h
+++ b/src/core/CL/ICLKernel.h
@@ -225,6 +225,24 @@ public:
{
add_tensor_argument<4>(idx, tensor, window);
}
+
+ /** Add the passed NHWC 4D tensor's parameters to the object's kernel's arguments by passing strides, dimensions and the offset to the first valid element in bytes.
+ *
+ * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
+ * @param[in] tensor Tensor to set as an argument of the object's kernel.
+ */
+ void add_4d_tensor_nhwc_argument(unsigned int &idx, const ICLTensor *tensor);
+
+ /** Returns the number of arguments enqueued per NHWC 4D Tensor object.
+ *
+ * @return The number of arguments enqueued per NHWC 4D Tensor object.
+ */
+ constexpr static unsigned int num_arguments_per_4d_tensor_nhwc()
+ {
+ constexpr unsigned int no_args_per_4d_tensor_nhwc = 9u;
+ return no_args_per_4d_tensor_nhwc;
+ }
+
/** Returns the number of arguments enqueued per 1D array object.
*
* @return The number of arguments enqueues per 1D array object.