aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMatthew Jackson <matthew.jackson@arm.com>2019-08-30 15:19:42 +0100
committerMatthew Jackson <matthew.jackson@arm.com>2019-09-02 13:54:07 +0000
commitc2a60593436387d20ff142a619f4c3955a5cd41b (patch)
tree4b42d0e876e40b8439d826c75e57ca72303aaae6 /src
parentc5f48adafede995cae6fcb2f44471c9bbcc8a125 (diff)
downloadComputeLibrary-c2a60593436387d20ff142a619f4c3955a5cd41b.tar.gz
COMPMID-2639: CLPadLayer support for 4D padding
Add support for 4D padding to CLPadLayerKernel. Add validation tests with 4D padding. Change-Id: I5579cc441a155c03fa1d14c6e77ba8ec693a806d Signed-off-by: Matthew Jackson <matthew.jackson@arm.com> Reviewed-on: https://review.mlplatform.org/c/1847 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Giorgio Arena <giorgio.arena@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/core/CL/cl_kernels/pad_layer.cl32
-rw-r--r--src/core/CL/kernels/CLPadLayerKernel.cpp8
-rw-r--r--src/runtime/CL/functions/CLPadLayer.cpp2
3 files changed, 32 insertions, 10 deletions
diff --git a/src/core/CL/cl_kernels/pad_layer.cl b/src/core/CL/cl_kernels/pad_layer.cl
index ace2f0d3a0..fac97d25d9 100644
--- a/src/core/CL/cl_kernels/pad_layer.cl
+++ b/src/core/CL/cl_kernels/pad_layer.cl
@@ -53,12 +53,15 @@
* @note Data type to use for the select instruction must be passed using the -DSELECT_DT compile flag, e.g. -DSELECT_DT=float
* @note In case pad left is more than the vector size, the number of threads to skil alond the X axis must be passed using the
* -DTHREADS_TO_SKIP_X compile flag, e.g. -DTHREADS_TO_SKIP_X=1. This is defined as (PAD_LEFT / VEC_SIZE)
- * @note In pad also needs to be added to the top of the tensor, the following compile flags must be passed at compile time:
+ * @note If pad also needs to be added to the top of the tensor, the following compile flags must be passed at compile time:
* -# -DPAD_TOP: Pad to add to the top of the input tensor (e.g. -DPAD_TOP=3)
* -# -DSRC_HEIGHT: Input tensor's height (e.g. -DSRC_HEIGHT=127)
- * @note In pad also needs to be added to the depth of the tensor, the following compile flags must be passed at compile time:
+ * @note If pad also needs to be added to the depth of the tensor, the following compile flags must be passed at compile time:
* -# -DPAD_NEAR: Pad to add before the first plane of the input tensor (e.g. -DPAD_NEAR=3)
* -# -DSRC_DEPTH: Input tensor's depth (e.g. -DSRC_DEPTH=32)
+ * @note If pad also needs to be added to the batch of the tensor, the following compile flags must be passed at compile time:
+ * -# -DPAD_BTOP: Pad to add before the first batch of the input tensor (e.g. -DPAD_BTOP=3)
+ * -# -DSRC_BATCH: Input tensor's batch size (e.g. -DSRC_BATCH=4)
*
* @param[in] src_ptr Pointer to the source image. Supported data types: U8, S8, QASYMM8, U16, S16, U32, S32, F16, F32
* @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
@@ -78,22 +81,30 @@
* @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
*/
__kernel void pad_layer(TENSOR3D_DECLARATION(src),
- TENSOR3D_DECLARATION(dst))
+ TENSOR3D_DECLARATION(dst),
+ uint batch)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
const int z = get_global_id(2);
+#if defined(PAD_BTOP) || defined(PAD_NEAR)
+ uint cond = 0;
+#if defined(PAD_BTOP)
+ cond |= batch < PAD_BTOP || batch >= (PAD_BTOP + SRC_BATCH);
+#endif // defined(PAD_BTOP)
+
#if defined(PAD_NEAR)
- if(z < PAD_NEAR || z >= (PAD_NEAR + SRC_DEPTH))
+ cond |= z < PAD_NEAR || z >= (PAD_NEAR + SRC_DEPTH);
+#endif // defined(PAD_NEAR)
+ if(cond)
{
Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);
- VSTORE(VEC_SIZE)
- ((VEC_TYPE)CONST_VAL, 0, (__global DATA_TYPE *)dst.ptr);
+ VSTORE(VEC_SIZE)((VEC_TYPE)CONST_VAL, 0, (__global DATA_TYPE *)dst.ptr);
}
else
{
-#endif // defined(PAD_NEAR)
+#endif // defined(PAD_BTOP) || defined(PAD_NEAR)
Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);
@@ -115,6 +126,9 @@ __kernel void pad_layer(TENSOR3D_DECLARATION(src),
#if defined(PAD_NEAR)
src.ptr -= PAD_NEAR * src_step_z;
#endif // defined(PAD_NEAR)
+#if defined(PAD_BTOP)
+ src.ptr -= PAD_BTOP * SRC_DEPTH * src_step_z;
+#endif // defined(PAD_BTOP)
VEC_TYPE src_vals = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)src.ptr);
@@ -125,8 +139,8 @@ __kernel void pad_layer(TENSOR3D_DECLARATION(src),
#endif // defined(PAD_TOP)
VSTORE(VEC_SIZE)
(select(src_vals, (VEC_TYPE)CONST_VAL, CONVERT_SELECT(cond)), 0, (__global DATA_TYPE *)dst.ptr);
-#if defined(PAD_NEAR)
+#if defined(PAD_NEAR) || defined(PAD_BTOP)
}
-#endif // defined(PAD_NEAR)
+#endif // defined(PAD_NEAR) || defined(PAD_BTOP)
}
#endif // defined(DATA_TYPE) && defined(VEC_SIZE) && defined(CONST_VAL)
diff --git a/src/core/CL/kernels/CLPadLayerKernel.cpp b/src/core/CL/kernels/CLPadLayerKernel.cpp
index 5270e31bbe..52b65c39b1 100644
--- a/src/core/CL/kernels/CLPadLayerKernel.cpp
+++ b/src/core/CL/kernels/CLPadLayerKernel.cpp
@@ -99,6 +99,12 @@ void CLPadLayerKernel::configure(const ICLTensor *input, ICLTensor *output, cons
{
build_opts.add_option("-DPAD_NEAR=" + support::cpp11::to_string(padding.at(2).first));
build_opts.add_option("-DSRC_DEPTH=" + support::cpp11::to_string(input->info()->dimension(2)));
+
+ if(padding.size() > 3)
+ {
+ build_opts.add_option("-DPAD_BTOP=" + support::cpp11::to_string(padding.at(3).first));
+ build_opts.add_option("-DSRC_BATCH=" + support::cpp11::to_string(input->info()->dimension(3)));
+ }
}
}
@@ -130,11 +136,13 @@ void CLPadLayerKernel::run(const Window &window, cl::CommandQueue &queue)
Window slice_out = window.first_slice_window_3D();
Window slice_in = win_in.first_slice_window_3D();
+ unsigned int batch = 0;
do
{
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, slice_in);
add_3D_tensor_argument(idx, _output, slice_out);
+ add_argument<unsigned int>(idx, batch++);
enqueue(queue, *this, slice_out, lws_hint());
}
diff --git a/src/runtime/CL/functions/CLPadLayer.cpp b/src/runtime/CL/functions/CLPadLayer.cpp
index dcd0a5b1ad..88b1b77a0d 100644
--- a/src/runtime/CL/functions/CLPadLayer.cpp
+++ b/src/runtime/CL/functions/CLPadLayer.cpp
@@ -183,7 +183,6 @@ void CLPadLayer::configure(ICLTensor *input, ICLTensor *output, const PaddingLis
Status CLPadLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, PixelValue constant_value, PaddingMode mode)
{
ARM_COMPUTE_RETURN_ERROR_ON(padding.size() > input->num_dimensions());
-
TensorShape padded_shape = misc::shape_calculator::compute_padded_shape(input->tensor_shape(), padding);
// Use CLCopyKernel and CLMemsetKernel to validate all padding modes as this includes all of the shape and info validation.
@@ -213,6 +212,7 @@ Status CLPadLayer::validate(const ITensorInfo *input, const ITensorInfo *output,
{
case PaddingMode::CONSTANT:
{
+ ARM_COMPUTE_RETURN_ERROR_ON(padding.size() > 4);
ARM_COMPUTE_RETURN_ON_ERROR(CLPadLayerKernel::validate(input, output, padding, constant_value, mode));
break;
}