diff options
author | Matthew Jackson <matthew.jackson@arm.com> | 2019-08-30 15:19:42 +0100 |
---|---|---|
committer | Matthew Jackson <matthew.jackson@arm.com> | 2019-09-02 13:54:07 +0000 |
commit | c2a60593436387d20ff142a619f4c3955a5cd41b (patch) | |
tree | 4b42d0e876e40b8439d826c75e57ca72303aaae6 /src/core/CL/cl_kernels | |
parent | c5f48adafede995cae6fcb2f44471c9bbcc8a125 (diff) | |
download | ComputeLibrary-c2a60593436387d20ff142a619f4c3955a5cd41b.tar.gz |
COMPMID-2639: CLPadLayer support for 4D padding
Add support for 4D padding to CLPadLayerKernel.
Add validation tests with 4D padding.
Change-Id: I5579cc441a155c03fa1d14c6e77ba8ec693a806d
Signed-off-by: Matthew Jackson <matthew.jackson@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1847
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels')
-rw-r--r-- | src/core/CL/cl_kernels/pad_layer.cl | 32 |
1 files changed, 23 insertions, 9 deletions
diff --git a/src/core/CL/cl_kernels/pad_layer.cl b/src/core/CL/cl_kernels/pad_layer.cl index ace2f0d3a0..fac97d25d9 100644 --- a/src/core/CL/cl_kernels/pad_layer.cl +++ b/src/core/CL/cl_kernels/pad_layer.cl @@ -53,12 +53,15 @@ * @note Data type to use for the select instruction must be passed using the -DSELECT_DT compile flag, e.g. -DSELECT_DT=float * @note In case pad left is more than the vector size, the number of threads to skil alond the X axis must be passed using the * -DTHREADS_TO_SKIP_X compile flag, e.g. -DTHREADS_TO_SKIP_X=1. This is defined as (PAD_LEFT / VEC_SIZE) - * @note In pad also needs to be added to the top of the tensor, the following compile flags must be passed at compile time: + * @note If pad also needs to be added to the top of the tensor, the following compile flags must be passed at compile time: * -# -DPAD_TOP: Pad to add to the top of the input tensor (e.g. -DPAD_TOP=3) * -# -DSRC_HEIGHT: Input tensor's height (e.g. -DSRC_HEIGHT=127) - * @note In pad also needs to be added to the depth of the tensor, the following compile flags must be passed at compile time: + * @note If pad also needs to be added to the depth of the tensor, the following compile flags must be passed at compile time: * -# -DPAD_NEAR: Pad to add before the first plane of the input tensor (e.g. -DPAD_NEAR=3) * -# -DSRC_DEPTH: Input tensor's depth (e.g. -DSRC_DEPTH=32) + * @note If pad also needs to be added to the batch of the tensor, the following compile flags must be passed at compile time: + * -# -DPAD_BTOP: Pad to add before the first batch of the input tensor (e.g. -DPAD_BTOP=3) + * -# -DSRC_BATCH: Input tensor's batch size (e.g. -DSRC_BATCH=4) * * @param[in] src_ptr Pointer to the source image. Supported data types: U8, S8, QASYMM8, U16, S16, U32, S32, F16, F32 * @param[in] src_stride_x Stride of the source image in X dimension (in bytes) @@ -78,22 +81,30 @@ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image */ __kernel void pad_layer(TENSOR3D_DECLARATION(src), - TENSOR3D_DECLARATION(dst)) + TENSOR3D_DECLARATION(dst), + uint batch) { const int x = get_global_id(0); const int y = get_global_id(1); const int z = get_global_id(2); +#if defined(PAD_BTOP) || defined(PAD_NEAR) + uint cond = 0; +#if defined(PAD_BTOP) + cond |= batch < PAD_BTOP || batch >= (PAD_BTOP + SRC_BATCH); +#endif // defined(PAD_BTOP) + #if defined(PAD_NEAR) - if(z < PAD_NEAR || z >= (PAD_NEAR + SRC_DEPTH)) + cond |= z < PAD_NEAR || z >= (PAD_NEAR + SRC_DEPTH); +#endif // defined(PAD_NEAR) + if(cond) { Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst); - VSTORE(VEC_SIZE) - ((VEC_TYPE)CONST_VAL, 0, (__global DATA_TYPE *)dst.ptr); + VSTORE(VEC_SIZE)((VEC_TYPE)CONST_VAL, 0, (__global DATA_TYPE *)dst.ptr); } else { -#endif // defined(PAD_NEAR) +#endif // defined(PAD_BTOP) || defined(PAD_NEAR) Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src); Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst); @@ -115,6 +126,9 @@ __kernel void pad_layer(TENSOR3D_DECLARATION(src), #if defined(PAD_NEAR) src.ptr -= PAD_NEAR * src_step_z; #endif // defined(PAD_NEAR) +#if defined(PAD_BTOP) + src.ptr -= PAD_BTOP * SRC_DEPTH * src_step_z; +#endif // defined(PAD_BTOP) VEC_TYPE src_vals = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)src.ptr); @@ -125,8 +139,8 @@ __kernel void pad_layer(TENSOR3D_DECLARATION(src), #endif // defined(PAD_TOP) VSTORE(VEC_SIZE) (select(src_vals, (VEC_TYPE)CONST_VAL, CONVERT_SELECT(cond)), 0, (__global DATA_TYPE *)dst.ptr); -#if defined(PAD_NEAR) +#if defined(PAD_NEAR) || defined(PAD_BTOP) } -#endif // defined(PAD_NEAR) +#endif // defined(PAD_NEAR) || defined(PAD_BTOP) } #endif // defined(DATA_TYPE) && defined(VEC_SIZE) && defined(CONST_VAL) |