Remove OpenCL padding CLFloorKernel

Use of proper vector size with boundary checking loads and stores Resolves: COMPMID-3922 Change-Id: Ib631d499603b860fcfdbe3da903b866a125359a8 Signed-off-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4789 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
author: Manuel Bottini <manuel.bottini@arm.com> 2021-01-08 15:57:11 +0000
committer: Manuel Bottini <manuel.bottini@arm.com> 2021-01-13 10:36:55 +0000
commit: e78ef6fda8f9dafec3a08af917a946ebe18944c5 (patch)
tree: a27ab5f827dad34d96c579bb068980866d936136 /src/core/CL/cl_kernels
parent: 5db75c350ca0c0d8965a894d7e1a371746a2102b (diff)
download: ComputeLibrary-e78ef6fda8f9dafec3a08af917a946ebe18944c5.tar.gz
1 files changed, 17 insertions, 7 deletions
diff --git a/src/core/CL/cl_kernels/floor.cl b/src/core/CL/cl_kernels/floor.cl
index 1988ba4e92..f6dd4edd2e 100644
--- a/src/core/CL/cl_kernels/floor.cl
+++ b/src/core/CL/cl_kernels/floor.cl
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -23,10 +23,13 @@
  */
 #include "helpers.h"
 
+#if defined(VEC_SIZE) && defined(VEC_SIZE_LEFTOVER)
+
 /** Perform a floor operation on an input tensor.
  *
- * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
- * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
+ * @note Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
+ * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
+ * @note The leftover size in the X dimension shoud be given as preprocessor argument using -DVEC_SIZE_LEFTOVER is; x_dimension % VEC_SIZE. e.g. -DVEC_SIZE_LEFTOVER=1
  * @note Can only take floating point data types.
  *
  * @param[in]  input_ptr                            Pointer to the source image. Supported data types: F16/F32
@@ -50,9 +53,16 @@ __kernel void floor_layer(
     TENSOR3D_DECLARATION(input),
     TENSOR3D_DECLARATION(output))
 {
-    Tensor3D input  = CONVERT_TO_TENSOR3D_STRUCT(input);
-    Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
+    // Offset computation
+    const uint x_offs = max((int)(get_global_id(0) * VEC_SIZE - (VEC_SIZE - VEC_SIZE_LEFTOVER) % VEC_SIZE), 0);
+
+    // Address computation
+    __global uchar *input_addr  = input_ptr + input_offset_first_element_in_bytes + x_offs * sizeof(DATA_TYPE) + get_global_id(1) * input_stride_y + get_global_id(2) * input_stride_z;
+    __global uchar *output_addr = output_ptr + output_offset_first_element_in_bytes + x_offs * sizeof(DATA_TYPE) + get_global_id(1) * output_stride_y + get_global_id(2) * output_stride_z;
+
+    VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+    data0 = floor(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input_addr));
 
-    VSTORE(VEC_SIZE)
-    (floor(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr)), 0, (__global DATA_TYPE *)output.ptr);
+    STORE_VECTOR_SELECT(data, DATA_TYPE, output_addr, VEC_SIZE, VEC_SIZE_LEFTOVER, VEC_SIZE_LEFTOVER != 0 && get_global_id(0) == 0);
 }
+#endif // defined(VEC_SIZE) && defined(VEC_SIZE_LEFTOVER)
+\ No newline at end of file
author	Manuel Bottini <manuel.bottini@arm.com>	2021-01-08 15:57:11 +0000
committer	Manuel Bottini <manuel.bottini@arm.com>	2021-01-13 10:36:55 +0000
commit	e78ef6fda8f9dafec3a08af917a946ebe18944c5 (patch)
tree	a27ab5f827dad34d96c579bb068980866d936136 /src/core/CL/cl_kernels
parent	5db75c350ca0c0d8965a894d7e1a371746a2102b (diff)
download	ComputeLibrary-e78ef6fda8f9dafec3a08af917a946ebe18944c5.tar.gz