From c2a60593436387d20ff142a619f4c3955a5cd41b Mon Sep 17 00:00:00 2001
From: Matthew Jackson <matthew.jackson@arm.com>
Date: Fri, 30 Aug 2019 15:19:42 +0100
Subject: COMPMID-2639: CLPadLayer support for 4D padding

Add support for 4D padding to CLPadLayerKernel.
Add validation tests with 4D padding.

Change-Id: I5579cc441a155c03fa1d14c6e77ba8ec693a806d
Signed-off-by: Matthew Jackson <matthew.jackson@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1847
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
---
 src/core/CL/cl_kernels/pad_layer.cl      | 32 ++++++++++++++------
 src/core/CL/kernels/CLPadLayerKernel.cpp |  8 +++++
 src/runtime/CL/functions/CLPadLayer.cpp  |  2 +-
 tests/validation/CL/PadLayer.cpp         | 52 ++++++++++++++++++++++++--------
 4 files changed, 71 insertions(+), 23 deletions(-)

diff --git a/src/core/CL/cl_kernels/pad_layer.cl b/src/core/CL/cl_kernels/pad_layer.cl
index ace2f0d3a0..fac97d25d9 100644
--- a/src/core/CL/cl_kernels/pad_layer.cl
+++ b/src/core/CL/cl_kernels/pad_layer.cl
@@ -53,12 +53,15 @@
  * @note Data type to use for the select instruction must be passed using the -DSELECT_DT compile flag, e.g. -DSELECT_DT=float
  * @note In case pad left is more than the vector size, the number of threads to skil alond the X axis must be passed using the
  *       -DTHREADS_TO_SKIP_X compile flag, e.g. -DTHREADS_TO_SKIP_X=1. This is defined as (PAD_LEFT / VEC_SIZE)
- * @note In pad also needs to be added to the top of the tensor, the following compile flags must be passed at compile time:
+ * @note If pad also needs to be added to the top of the tensor, the following compile flags must be passed at compile time:
  *       -# -DPAD_TOP: Pad to add to the top of the input tensor (e.g. -DPAD_TOP=3)
  *       -# -DSRC_HEIGHT: Input tensor's height (e.g. -DSRC_HEIGHT=127)
- * @note In pad also needs to be added to the depth of the tensor, the following compile flags must be passed at compile time:
+ * @note If pad also needs to be added to the depth of the tensor, the following compile flags must be passed at compile time:
  *       -# -DPAD_NEAR: Pad to add before the first plane of the input tensor (e.g. -DPAD_NEAR=3)
  *       -# -DSRC_DEPTH: Input tensor's depth (e.g. -DSRC_DEPTH=32)
+ * @note If pad also needs to be added to the batch of the tensor, the following compile flags must be passed at compile time:
+ *       -# -DPAD_BTOP: Pad to add before the first batch of the input tensor (e.g. -DPAD_BTOP=3)
+ *       -# -DSRC_BATCH: Input tensor's batch size (e.g. -DSRC_BATCH=4)
  *
  * @param[in]  src_ptr                           Pointer to the source image. Supported data types: U8, S8, QASYMM8, U16, S16, U32, S32, F16, F32
  * @param[in]  src_stride_x                      Stride of the source image in X dimension (in bytes)
@@ -78,22 +81,30 @@
  * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination image
  */
 __kernel void pad_layer(TENSOR3D_DECLARATION(src),
-                        TENSOR3D_DECLARATION(dst))
+                        TENSOR3D_DECLARATION(dst),
+                        uint batch)
 {
     const int x = get_global_id(0);
     const int y = get_global_id(1);
     const int z = get_global_id(2);
 
+#if defined(PAD_BTOP) || defined(PAD_NEAR)
+    uint cond = 0;
+#if defined(PAD_BTOP)
+    cond |= batch < PAD_BTOP || batch >= (PAD_BTOP + SRC_BATCH);
+#endif // defined(PAD_BTOP)
+
 #if defined(PAD_NEAR)
-    if(z < PAD_NEAR || z >= (PAD_NEAR + SRC_DEPTH))
+    cond |= z < PAD_NEAR || z >= (PAD_NEAR + SRC_DEPTH);
+#endif // defined(PAD_NEAR)
+    if(cond)
     {
         Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);
-        VSTORE(VEC_SIZE)
-        ((VEC_TYPE)CONST_VAL, 0, (__global DATA_TYPE *)dst.ptr);
+        VSTORE(VEC_SIZE)((VEC_TYPE)CONST_VAL, 0, (__global DATA_TYPE *)dst.ptr);
     }
     else
     {
-#endif // defined(PAD_NEAR)
+#endif // defined(PAD_BTOP) || defined(PAD_NEAR)
 
         Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
         Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);
@@ -115,6 +126,9 @@ __kernel void pad_layer(TENSOR3D_DECLARATION(src),
 #if defined(PAD_NEAR)
         src.ptr -= PAD_NEAR * src_step_z;
 #endif // defined(PAD_NEAR)
+#if defined(PAD_BTOP)
+        src.ptr -= PAD_BTOP * SRC_DEPTH * src_step_z;
+#endif // defined(PAD_BTOP)
 
         VEC_TYPE src_vals = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)src.ptr);
 
@@ -125,8 +139,8 @@ __kernel void pad_layer(TENSOR3D_DECLARATION(src),
 #endif // defined(PAD_TOP)
         VSTORE(VEC_SIZE)
         (select(src_vals, (VEC_TYPE)CONST_VAL, CONVERT_SELECT(cond)), 0, (__global DATA_TYPE *)dst.ptr);
-#if defined(PAD_NEAR)
+#if defined(PAD_NEAR) || defined(PAD_BTOP)
     }
-#endif // defined(PAD_NEAR)
+#endif // defined(PAD_NEAR) || defined(PAD_BTOP)
 }
 #endif // defined(DATA_TYPE) && defined(VEC_SIZE) && defined(CONST_VAL)
diff --git a/src/core/CL/kernels/CLPadLayerKernel.cpp b/src/core/CL/kernels/CLPadLayerKernel.cpp
index 5270e31bbe..52b65c39b1 100644
--- a/src/core/CL/kernels/CLPadLayerKernel.cpp
+++ b/src/core/CL/kernels/CLPadLayerKernel.cpp
@@ -99,6 +99,12 @@ void CLPadLayerKernel::configure(const ICLTensor *input, ICLTensor *output, cons
         {
             build_opts.add_option("-DPAD_NEAR=" + support::cpp11::to_string(padding.at(2).first));
             build_opts.add_option("-DSRC_DEPTH=" + support::cpp11::to_string(input->info()->dimension(2)));
+
+            if(padding.size() > 3)
+            {
+                build_opts.add_option("-DPAD_BTOP=" + support::cpp11::to_string(padding.at(3).first));
+                build_opts.add_option("-DSRC_BATCH=" + support::cpp11::to_string(input->info()->dimension(3)));
+            }
         }
     }
 
@@ -130,11 +136,13 @@ void CLPadLayerKernel::run(const Window &window, cl::CommandQueue &queue)
 
     Window slice_out = window.first_slice_window_3D();
     Window slice_in  = win_in.first_slice_window_3D();
+    unsigned int batch = 0;
     do
     {
         unsigned int idx = 0;
         add_3D_tensor_argument(idx, _input, slice_in);
         add_3D_tensor_argument(idx, _output, slice_out);
+        add_argument<unsigned int>(idx, batch++);
 
         enqueue(queue, *this, slice_out, lws_hint());
     }
diff --git a/src/runtime/CL/functions/CLPadLayer.cpp b/src/runtime/CL/functions/CLPadLayer.cpp
index dcd0a5b1ad..88b1b77a0d 100644
--- a/src/runtime/CL/functions/CLPadLayer.cpp
+++ b/src/runtime/CL/functions/CLPadLayer.cpp
@@ -183,7 +183,6 @@ void CLPadLayer::configure(ICLTensor *input, ICLTensor *output, const PaddingLis
 Status CLPadLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, PixelValue constant_value, PaddingMode mode)
 {
     ARM_COMPUTE_RETURN_ERROR_ON(padding.size() > input->num_dimensions());
-
     TensorShape padded_shape = misc::shape_calculator::compute_padded_shape(input->tensor_shape(), padding);
 
     // Use CLCopyKernel and CLMemsetKernel to validate all padding modes as this includes all of the shape and info validation.
@@ -213,6 +212,7 @@ Status CLPadLayer::validate(const ITensorInfo *input, const ITensorInfo *output,
     {
         case PaddingMode::CONSTANT:
         {
+            ARM_COMPUTE_RETURN_ERROR_ON(padding.size() > 4);
             ARM_COMPUTE_RETURN_ON_ERROR(CLPadLayerKernel::validate(input, output, padding, constant_value, mode));
             break;
         }
diff --git a/tests/validation/CL/PadLayer.cpp b/tests/validation/CL/PadLayer.cpp
index 518f541b8b..431b7ebd19 100644
--- a/tests/validation/CL/PadLayer.cpp
+++ b/tests/validation/CL/PadLayer.cpp
@@ -40,7 +40,8 @@ namespace validation
 {
 namespace
 {
-const auto PaddingSizesDataset = framework::dataset::make("PaddingSize", { PaddingList{ { 0, 0 } },
+const auto PaddingSizesDataset3D = framework::dataset::make("PaddingSize",
+{   PaddingList{ { 0, 0 } },
     PaddingList{ { 1, 1 } },
     PaddingList{ { 33, 33 } },
     PaddingList{ { 1, 1 }, { 5, 5 } },
@@ -48,6 +49,12 @@ const auto PaddingSizesDataset = framework::dataset::make("PaddingSize", { Paddi
     PaddingList{ { 0, 0 }, { 1, 0 }, { 0, 1 } },
     PaddingList{ { 0, 0 }, { 0, 0 }, { 0, 0 } }
 });
+const auto PaddingSizesDataset4D = framework::dataset::make("PaddingSize",
+{   PaddingList{ { 1, 1 }, { 1, 0 }, { 1, 1 }, { 0, 0 } },
+    PaddingList{ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 1, 1 } },
+    PaddingList{ { 0, 1 }, { 1, 0 }, { 2, 2 }, { 1, 0 } },
+    PaddingList{ { 1, 1 }, { 1, 1 }, { 1, 1 }, { 3, 3 } }
+});
 } // namespace
 
 TEST_SUITE(CL)
@@ -57,9 +64,10 @@ TEST_SUITE(PadLayer)
 // clang-format off
 
 DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
-               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),     // Mismatching data type input/output
+               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),    // Mismatching data type input/output
                                                        TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),    // Mismatching shapes with padding
                                                        TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),    // Invalid number of pad dimensions
                                                        TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),    // Mismatching shapes dimension
                                                        TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(32U, 13U), 1, DataType::F32)     // Invalid padding list
@@ -68,29 +76,33 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
                                                        TensorInfo(TensorShape(28U, 11U, 2U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(29U, 17U, 2U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(29U, 15U, 4U, 3U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(29U, 15U, 4U, 3U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(29U, 17U, 2U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(32U, 13U), 1, DataType::F32)
                                                      })),
                framework::dataset::make("PaddingSize", { PaddingList{{0, 0}},
                                                          PaddingList{{1, 1}},
                                                          PaddingList{{1, 1}, {2, 2}},
+                                                         PaddingList{{1,1}, {1,1}, {1,1}, {1,1}},
                                                          PaddingList{{1,1}, {1,1}, {1,1}},
                                                          PaddingList{{1, 1}, {2, 2}},
                                                          PaddingList{{0,0}, {0,0}, {1,1}}
                                                          })),
                framework::dataset::make("PaddingMode", { PaddingMode::CONSTANT,
+                                                         PaddingMode::CONSTANT,
                                                          PaddingMode::CONSTANT,
                                                          PaddingMode::CONSTANT,
                                                          PaddingMode::SYMMETRIC,
                                                          PaddingMode::REFLECT,
                                                          PaddingMode::REFLECT
-})),
+                                                       })),
                framework::dataset::make("Expected", { false,
-                                                   false,
-                                                   true,
-                                                   false,
-                                                   true,
-                                                   false })),
+                                                      false,
+                                                      true,
+                                                      false,
+                                                      false,
+                                                      true,
+                                                      false })),
                input_info, output_info, padding, mode, expected)
 {
     ARM_COMPUTE_EXPECT(bool(CLPadLayer::validate(&input_info.clone()->set_is_resizable(true), &output_info.clone()->set_is_resizable(true), padding, PixelValue(), mode)) == expected, framework::LogLevel::ERRORS);
@@ -106,14 +118,21 @@ TEST_SUITE(Float)
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLPaddingFixture<float>, framework::DatasetMode::ALL,
-                       combine(combine(combine(datasets::Small3DShapes(), framework::dataset::make("DataType", { DataType::F32 })), PaddingSizesDataset),
+                       combine(combine(combine(datasets::Small3DShapes(), framework::dataset::make("DataType", { DataType::F32 })), PaddingSizesDataset3D),
                                framework::dataset::make("PaddingMode", { PaddingMode::CONSTANT, PaddingMode::REFLECT, PaddingMode::SYMMETRIC })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
+FIXTURE_DATA_TEST_CASE(RunSmall4D, CLPaddingFixture<float>, framework::DatasetMode::ALL,
+                       combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", { DataType::F32 })), PaddingSizesDataset4D),
+                               framework::dataset::make("PaddingMode", { PaddingMode::CONSTANT })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
 FIXTURE_DATA_TEST_CASE(RunLarge, CLPaddingFixture<float>, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::Large3DShapes(), framework::dataset::make("DataType", { DataType::F32 })), PaddingSizesDataset),
+                       combine(combine(combine(datasets::Large3DShapes(), framework::dataset::make("DataType", { DataType::F32 })), PaddingSizesDataset3D),
                                framework::dataset::make("PaddingMode", { PaddingMode::CONSTANT, PaddingMode::REFLECT, PaddingMode::SYMMETRIC })))
 {
     // Validate output
@@ -123,7 +142,7 @@ TEST_SUITE_END() // FP32
 
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunLarge, CLPaddingFixture<half>, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::Large3DShapes(), framework::dataset::make("DataType", { DataType::F16 })), PaddingSizesDataset),
+                       combine(combine(combine(datasets::Large3DShapes(), framework::dataset::make("DataType", { DataType::F16 })), PaddingSizesDataset3D),
                                framework::dataset::make("PaddingMode", { PaddingMode::CONSTANT, PaddingMode::REFLECT })))
 {
     // Validate output
@@ -135,14 +154,21 @@ TEST_SUITE_END() // Float
 TEST_SUITE(Quantized)
 TEST_SUITE(QASYMM8)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLPaddingFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::Small3DShapes(), framework::dataset::make("DataType", { DataType::QASYMM8 })), PaddingSizesDataset),
+                       combine(combine(combine(datasets::Small3DShapes(), framework::dataset::make("DataType", { DataType::QASYMM8 })), PaddingSizesDataset3D),
                                framework::dataset::make("PaddingMode", { PaddingMode::CONSTANT, PaddingMode::REFLECT })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
+FIXTURE_DATA_TEST_CASE(RunSmall4D, CLPaddingFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", { DataType::QASYMM8 })), PaddingSizesDataset4D),
+                               framework::dataset::make("PaddingMode", { PaddingMode::CONSTANT })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
 FIXTURE_DATA_TEST_CASE(RunLarge, CLPaddingFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::Large3DShapes(), framework::dataset::make("DataType", { DataType::QASYMM8 })), PaddingSizesDataset),
+                       combine(combine(combine(datasets::Large3DShapes(), framework::dataset::make("DataType", { DataType::QASYMM8 })), PaddingSizesDataset3D),
                                framework::dataset::make("PaddingMode", { PaddingMode::CONSTANT, PaddingMode::REFLECT })))
 {
     // Validate output
-- 
cgit v1.2.1