aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels
diff options
context:
space:
mode:
authorManuel Bottini <manuel.bottini@arm.com>2021-01-08 15:57:11 +0000
committerManuel Bottini <manuel.bottini@arm.com>2021-01-13 10:36:55 +0000
commite78ef6fda8f9dafec3a08af917a946ebe18944c5 (patch)
treea27ab5f827dad34d96c579bb068980866d936136 /src/core/CL/cl_kernels
parent5db75c350ca0c0d8965a894d7e1a371746a2102b (diff)
downloadComputeLibrary-e78ef6fda8f9dafec3a08af917a946ebe18944c5.tar.gz
Remove OpenCL padding CLFloorKernel
Use of proper vector size with boundary checking loads and stores Resolves: COMPMID-3922 Change-Id: Ib631d499603b860fcfdbe3da903b866a125359a8 Signed-off-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4789 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels')
-rw-r--r--src/core/CL/cl_kernels/floor.cl24
1 files changed, 17 insertions, 7 deletions
diff --git a/src/core/CL/cl_kernels/floor.cl b/src/core/CL/cl_kernels/floor.cl
index 1988ba4e92..f6dd4edd2e 100644
--- a/src/core/CL/cl_kernels/floor.cl
+++ b/src/core/CL/cl_kernels/floor.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,10 +23,13 @@
*/
#include "helpers.h"
+#if defined(VEC_SIZE) && defined(VEC_SIZE_LEFTOVER)
+
/** Perform a floor operation on an input tensor.
*
- * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
- * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
+ * @note Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
+ * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
+ * @note The leftover size in the X dimension shoud be given as preprocessor argument using -DVEC_SIZE_LEFTOVER is; x_dimension % VEC_SIZE. e.g. -DVEC_SIZE_LEFTOVER=1
* @note Can only take floating point data types.
*
* @param[in] input_ptr Pointer to the source image. Supported data types: F16/F32
@@ -50,9 +53,16 @@ __kernel void floor_layer(
TENSOR3D_DECLARATION(input),
TENSOR3D_DECLARATION(output))
{
- Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
- Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
+ // Offset computation
+ const uint x_offs = max((int)(get_global_id(0) * VEC_SIZE - (VEC_SIZE - VEC_SIZE_LEFTOVER) % VEC_SIZE), 0);
+
+ // Address computation
+ __global uchar *input_addr = input_ptr + input_offset_first_element_in_bytes + x_offs * sizeof(DATA_TYPE) + get_global_id(1) * input_stride_y + get_global_id(2) * input_stride_z;
+ __global uchar *output_addr = output_ptr + output_offset_first_element_in_bytes + x_offs * sizeof(DATA_TYPE) + get_global_id(1) * output_stride_y + get_global_id(2) * output_stride_z;
+
+ VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+ data0 = floor(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input_addr));
- VSTORE(VEC_SIZE)
- (floor(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr)), 0, (__global DATA_TYPE *)output.ptr);
+ STORE_VECTOR_SELECT(data, DATA_TYPE, output_addr, VEC_SIZE, VEC_SIZE_LEFTOVER, VEC_SIZE_LEFTOVER != 0 && get_global_id(0) == 0);
}
+#endif // defined(VEC_SIZE) && defined(VEC_SIZE_LEFTOVER) \ No newline at end of file