COMPMID-403:Add support for 7x7 pooling on CL.

Change-Id: I3c2c8d7e8e61d7737170cb1568900ce4ac337068 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/78181 Reviewed-by: Michele DiGiorgio <michele.digiorgio@arm.com> Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Moritz Pflanzer <moritz.pflanzer@arm.com>
author: Georgios Pinitas <georgios.pinitas@arm.com> 2017-06-19 16:11:53 +0100
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-09-17 14:14:20 +0100
commit: ce093143ec7b554edefc533c90e45c80946cde51 (patch)
tree: 1e4aa13ba3fe10c93ca42e2f5477bd2c4888324e
parent: 4c2938ed50a78753bfbdbb2f3cbf43f5fed779f9 (diff)
download: ComputeLibrary-ce093143ec7b554edefc533c90e45c80946cde51.tar.gz
12 files changed, 367 insertions, 62 deletions
diff --git a/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h b/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h
index 546a40b15e..6c5091ff9e 100644
--- a/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h
@@ -52,6 +52,7 @@ public:
      * @param[in]  input     Source tensor. Data types supported: F16, F32.
      * @param[out] output    Destination tensor. Data types supported: Same as @p input.
      * @param[in]  pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
+     *                       Supported pooling sizes : 2, 3 and 7
      */
     void configure(const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info);
 
diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp
index 15a5d90835..3070d4817e 100644
--- a/src/core/CL/CLKernelLibrary.cpp
+++ b/src/core/CL/CLKernelLibrary.cpp
@@ -219,6 +219,7 @@ const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map =
     { "pixelwise_mul_int", "pixelwise_mul_int.cl" },
     { "pooling_layer_2", "pooling_layer.cl" },
     { "pooling_layer_3", "pooling_layer.cl" },
+    { "pooling_layer_7", "pooling_layer.cl" },
     { "remap_nearest_neighbour", "remap.cl" },
     { "remap_bilinear", "remap.cl" },
     { "reshape_to_columns", "convolution_layer.cl" },
diff --git a/src/core/CL/cl_kernels/pooling_layer.cl b/src/core/CL/cl_kernels/pooling_layer.cl
index 1902df9b7d..6bdb174235 100644
--- a/src/core/CL/cl_kernels/pooling_layer.cl
+++ b/src/core/CL/cl_kernels/pooling_layer.cl
@@ -41,7 +41,6 @@ float calculate_avg_scale(const int pool_size, const int upper_bound_w, const in
 
 /** Performs a pooling function of pool size equal to 2.
  *
- * @note Pooling stride must be passed using -DPOOL_STRIDE e.g -DPOOL_STRIDE=2. Supported strides are 1,2,3
  * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16, F32;
  * @note In case of average pooling -DPOOL_AVG must be provided otherwise max pooling will be performed.
  *
@@ -88,7 +87,7 @@ __kernel void pooling_layer_2(
     data0         = POOL_OP(data0, data1);
     DATA_TYPE res = POOL_OP(data0.s0, data0.s1);
 
-    // Divide by 4 in case of average pooling
+    // Divide by pool region in case of average pooling
 #ifdef POOL_AVG
     res *= calculate_avg_scale(2, max_dims.x, max_dims.y, paddings.x, paddings.y, strides.x, strides.y);
 #endif
@@ -99,7 +98,6 @@ __kernel void pooling_layer_2(
 
 /** Performs a pooling function of pool size equal to 3.
  *
- * @note Pooling stride must be passed using -DPOOL_STRIDE e.g -DPOOL_STRIDE=2. Supported strides are 1,2,3
  * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16, F32;
  * @note In case of average pooling -DPOOL_AVG must be provided otherwise max pooling will be performed.
  *
@@ -149,7 +147,7 @@ __kernel void pooling_layer_3(
     data0         = POOL_OP(data0, data2);
     DATA_TYPE res = POOL_OP(POOL_OP(data0.s0, data0.s1), data0.s2);
 
-    // Divide by 4 in case of average pooling
+    // Divide by pool region in case of average pooling
 #ifdef POOL_AVG
     res *= calculate_avg_scale(3, max_dims.x, max_dims.y, paddings.x, paddings.y, strides.x, strides.y);
 #endif
@@ -157,3 +155,88 @@ __kernel void pooling_layer_3(
     // Store result
     *(__global DATA_TYPE *)output.ptr = res;
 }
+
+/** Performs a pooling function of pool size equal to 7.
+ *
+ * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16, F32;
+ * @note In case of average pooling -DPOOL_AVG must be provided otherwise max pooling will be performed.
+ *
+ * @param[in]  input_ptr                            Pointer to the source image. Supported data types: F16, F32
+ * @param[in]  input_stride_x                       Stride of the source image in X dimension (in bytes)
+ * @param[in]  input_step_x                         input_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  input_stride_y                       Stride of the source image in Y dimension (in bytes)
+ * @param[in]  input_step_y                         input_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  input_stride_z                       Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  input_step_z                         input_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  input_offset_first_element_in_bytes  The offset of the first element in the source image
+ * @param[out] output_ptr                           Pointer to the destination image. Supported data types: F16, F32
+ * @param[in]  output_stride_x                      Stride of the destination image in X dimension (in bytes)
+ * @param[in]  output_step_x                        output_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  output_stride_y                      Stride of the destination image in Y dimension (in bytes)
+ * @param[in]  output_step_y                        output_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  output_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  output_step_z                        output_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  output_offset_first_element_in_bytes The offset of the first element in the destination image
+ * @param[in]  max_dims                             The maximum index that can be accessed in x and y dimension (width + pad)
+ * @param[in]  strides                              The pooling operation strides in each dimension
+ * @param[in]  paddings                             The pooling operation paddings in each dimension
+ */
+__kernel void pooling_layer_7(
+    TENSOR3D_DECLARATION(input),
+    TENSOR3D_DECLARATION(output)
+#ifdef POOL_AVG
+    ,
+    int2 max_dims, int2 strides, int2 paddings
+#endif
+)
+{
+    // Get pixels pointer
+    Tensor3D input  = CONVERT_TO_TENSOR3D_STRUCT(input);
+    Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
+
+    // Load data
+    VEC_DATA_TYPE(DATA_TYPE, 8)
+    data0 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0));
+    VEC_DATA_TYPE(DATA_TYPE, 8)
+    data1 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 1, 0));
+    VEC_DATA_TYPE(DATA_TYPE, 8)
+    data2 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 2, 0));
+    VEC_DATA_TYPE(DATA_TYPE, 8)
+    data3 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 3, 0));
+    VEC_DATA_TYPE(DATA_TYPE, 8)
+    data4 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 4, 0));
+    VEC_DATA_TYPE(DATA_TYPE, 8)
+    data5 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 5, 0));
+    VEC_DATA_TYPE(DATA_TYPE, 8)
+    data6 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 6, 0));
+
+    // Pool operation of all rows
+    data0 = POOL_OP(data0, data1);
+    data2 = POOL_OP(data2, data3);
+    data4 = POOL_OP(data4, data5);
+    data0 = POOL_OP(data0, data2);
+    data4 = POOL_OP(data4, data6);
+    data0 = POOL_OP(data0, data4);
+
+    // Set last element
+#ifdef POOL_AVG
+    data0.s7 = 0;
+#else
+    data0.s7 = data0.s6;
+#endif
+
+    // Reduce result
+    VEC_DATA_TYPE(DATA_TYPE, 4)
+    reduce4 = POOL_OP(data0.s0123, data0.s4567);
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    reduce2       = POOL_OP(reduce4.s01, reduce4.s23);
+    DATA_TYPE res = POOL_OP(reduce2.s0, reduce2.s1);
+
+    // Divide by pool region in case of average pooling
+#ifdef POOL_AVG
+    res *= calculate_avg_scale(7, max_dims.x, max_dims.y, paddings.x, paddings.y, strides.x, strides.y);
+#endif
+
+    // Store result
+    *(__global DATA_TYPE *)output.ptr = res;
+}
diff --git a/src/core/CL/kernels/CLPoolingLayerKernel.cpp b/src/core/CL/kernels/CLPoolingLayerKernel.cpp
index dc5ae4ec7a..7648025caa 100644
--- a/src/core/CL/kernels/CLPoolingLayerKernel.cpp
+++ b/src/core/CL/kernels/CLPoolingLayerKernel.cpp
@@ -65,10 +65,13 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output,
     std::tie(pool_pad_x, pool_pad_y)       = pad_stride_info.pad();
     std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride();
 
+    static const std::set<int> supported_pool_sizes = { 2, 3, 7 };
+    ARM_COMPUTE_UNUSED(supported_pool_sizes);
+
     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32);
     ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-    ARM_COMPUTE_ERROR_ON(2 != pool_size && 3 != pool_size);
+    ARM_COMPUTE_ERROR_ON(supported_pool_sizes.find(pool_size) == supported_pool_sizes.end());
     ARM_COMPUTE_ERROR_ON(pool_pad_x >= pool_size || pool_pad_y >= pool_size);
 
     // Check output dimensions
@@ -82,10 +85,11 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output,
     ARM_COMPUTE_UNUSED(pooled_h);
     ARM_COMPUTE_ERROR_ON((output->info()->dimension(0) != pooled_w) || (output->info()->dimension(1) != pooled_h));
 
-    const int input_width   = input->info()->dimension(0);
-    const int input_height  = input->info()->dimension(1);
-    const int upper_bound_w = ((pooled_w - 1) * pool_stride_x - pool_pad_x + pool_size) - input_width;
-    const int upper_bound_h = ((pooled_h - 1) * pool_stride_y - pool_pad_y + pool_size) - input_height;
+    const int num_elements_read_per_iteration = (pool_size == 7) ? 8 : pool_size;
+    const int input_width                     = input->info()->dimension(0);
+    const int input_height                    = input->info()->dimension(1);
+    const int upper_bound_w                   = ((pooled_w - 1) * pool_stride_x - pool_pad_x + num_elements_read_per_iteration) - input_width;
+    const int upper_bound_h                   = ((pooled_h - 1) * pool_stride_y - pool_pad_y + pool_size) - input_height;
 
     // Set instance variables
     _input              = input;
@@ -138,17 +142,12 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output,
     }
 
     // Configure kernel window
-    const unsigned int num_elems_processed_per_iteration = 1;
-
-    Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
-
+    const unsigned int     num_elems_processed_per_iteration = 1;
+    Window                 win                               = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
     AccessWindowStatic     input_access(input->info(), -pool_pad_x, -pool_pad_y, input_width + _border_size.right, input_height + _border_size.bottom);
     AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
     update_window_and_padding(win, input_access, output_access);
-
     output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
-
     ICLKernel::configure(win);
 }
 
diff --git a/tests/Utils.h b/tests/Utils.h
index 53f749df48..f3622cafaa 100644
--- a/tests/Utils.h
+++ b/tests/Utils.h
@@ -615,13 +615,47 @@ inline int coord2index(const TensorShape &shape, const Coordinates &coord)
     return index;
 }
 
+/** Check if Coordinates dimensionality can match the respective shape one.
+ *
+ * @param coords Coordinates
+ * @param shape  Shape to match dimensionality
+ *
+ * @return True if Coordinates can match the dimensionality of the shape else false.
+ */
+inline bool match_shape(Coordinates &coords, const TensorShape &shape)
+{
+    auto check_nz = [](unsigned int i)
+    {
+        return i != 0;
+    };
+
+    unsigned int coords_dims = coords.num_dimensions();
+    unsigned int shape_dims  = shape.num_dimensions();
+
+    // Increase coordinates scenario
+    if(coords_dims < shape_dims)
+    {
+        coords.set_num_dimensions(shape_dims);
+        return true;
+    }
+    // Decrease coordinates scenario
+    if(coords_dims > shape_dims && !std::any_of(coords.begin() + shape_dims, coords.end(), check_nz))
+    {
+        coords.set_num_dimensions(shape_dims);
+        return true;
+    }
+
+    return (coords_dims == shape_dims);
+}
+
 /** Check if a coordinate is within a valid region */
 inline bool is_in_valid_region(const ValidRegion &valid_region, const Coordinates &coord)
 {
-    ARM_COMPUTE_ERROR_ON_MSG(valid_region.shape.num_dimensions() != coord.num_dimensions(), "Shapes of valid region and coordinates do not agree");
-    for(int d = 0; static_cast<size_t>(d) < coord.num_dimensions(); ++d)
+    Coordinates coords(coord);
+    ARM_COMPUTE_ERROR_ON_MSG(!match_shape(coords, valid_region.shape), "Shapes of valid region and coordinates do not agree");
+    for(int d = 0; static_cast<size_t>(d) < coords.num_dimensions(); ++d)
     {
-        if(coord[d] < valid_region.start(d) || coord[d] >= valid_region.end(d))
+        if(coords[d] < valid_region.start(d) || coords[d] >= valid_region.end(d))
         {
             return false;
         }
diff --git a/tests/dataset/PoolingTypesDataset.h b/tests/dataset/PoolingTypesDataset.h
new file mode 100644
index 0000000000..c78a20b809
--- /dev/null
+++ b/tests/dataset/PoolingTypesDataset.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_DATASET_POOLING_TYPE_DATASET_H__
+#define __ARM_COMPUTE_TEST_DATASET_POOLING_TYPE_DATASET_H__
+
+#include "arm_compute/core/Types.h"
+#include "dataset/GenericDataset.h"
+
+#ifdef BOOST
+#include "boost_wrapper.h"
+#endif
+
+namespace arm_compute
+{
+namespace test
+{
+/** Data set containing all possible pooling types.
+ *
+ * Can be used as input for Boost data test cases to automatically run a test
+ * case on all pooling types.
+ */
+class PoolingTypes final : public GenericDataset<PoolingType, 2>
+{
+public:
+    PoolingTypes()
+        : GenericDataset{ PoolingType::MAX, PoolingType::AVG }
+    {
+    }
+
+    ~PoolingTypes() = default;
+};
+} // namespace test
+} // namespace arm_compute
+#endif //__ARM_COMPUTE_TEST_DATASET_POOLING_TYPE_DATASET_H__
diff --git a/tests/dataset/ShapeDatasets.h b/tests/dataset/ShapeDatasets.h
index d2b82cae40..ecb478dbf0 100644
--- a/tests/dataset/ShapeDatasets.h
+++ b/tests/dataset/ShapeDatasets.h
@@ -118,7 +118,7 @@ class SmallShapes final : public ShapeDataset<3>
 {
 public:
     SmallShapes()
-        : ShapeDataset(TensorShape(5U, 5U),
+        : ShapeDataset(TensorShape(7U, 7U),
                        TensorShape(27U, 13U, 2U),
                        TensorShape(128U, 64U, 1U, 3U))
     {
diff --git a/tests/dataset/ThresholdDataset.h b/tests/dataset/ThresholdDataset.h
index 956cf3d54d..a2d76e3c48 100644
--- a/tests/dataset/ThresholdDataset.h
+++ b/tests/dataset/ThresholdDataset.h
@@ -58,8 +58,8 @@ public:
         std::stringstream ss;
         ss << "Threshold";
         ss << "_threshold_value" << threshold;
-        ss << "_false_value" << false_value;
-        ss << "_true_value" << true_value;
+        ss << "_false_value" << std::boolalpha << false_value;
+        ss << "_true_value" << std::boolalpha << true_value;
         ss << "_type";
         ss << ((type == ThresholdType::BINARY) ? "binary" : "range");
         ss << "_upper" << upper;
diff --git a/tests/validation/CL/PoolingLayer.cpp b/tests/validation/CL/PoolingLayer.cpp
new file mode 100644
index 0000000000..1d0e745088
--- /dev/null
+++ b/tests/validation/CL/PoolingLayer.cpp
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "CL/CLAccessor.h"
+#include "CL/Helper.h"
+#include "TypePrinter.h"
+#include "arm_compute/runtime/CL/functions/CLPoolingLayer.h"
+#include "tests/dataset/PoolingLayerDataset.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+
+#include <random>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::cl;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+const float tolerance_f = 1e-05; /**< Tolerance value for comparing reference's output against implementation's output for float input */
+
+/** Compute CL pooling layer function.
+ *
+ * @param[in] shape     Shape of the input and output tensors.
+ * @param[in] dt        Data type of input and output tensors.
+ * @param[in] pool_info Pooling Layer information.
+ *
+ * @return Computed output tensor.
+ */
+CLTensor compute_pooling_layer(const TensorShape &shape_in, const TensorShape &shape_out, DataType dt, PoolingLayerInfo pool_info)
+{
+    // Create tensors
+    CLTensor src = create_tensor(shape_in, dt);
+    CLTensor dst = create_tensor(shape_out, dt);
+
+    // Create and configure function
+    CLPoolingLayer pool;
+    pool.configure(&src, &dst, pool_info);
+
+    // Allocate tensors
+    src.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors
+    std::uniform_real_distribution<> distribution(-1, 1);
+    library->fill(CLAccessor(src), distribution, 0);
+
+    // Compute function
+    pool.run();
+
+    return dst;
+}
+
+TensorShape get_output_shape(TensorShape in_shape, const PoolingLayerInfo &pool_info)
+{
+    TensorShape out_shape(in_shape);
+    const std::pair<unsigned int, unsigned int> scaled_dims = arm_compute::scaled_dimensions(in_shape.x(),
+                                                                                             in_shape.y(),
+                                                                                             pool_info.pool_size(),
+                                                                                             pool_info.pad_stride_info().stride().first, pool_info.pad_stride_info().stride().second,
+                                                                                             pool_info.pad_stride_info().pad().first, pool_info.pad_stride_info().pad().second,
+                                                                                             pool_info.pad_stride_info().round());
+    out_shape.set(0, scaled_dims.first);
+    out_shape.set(1, scaled_dims.second);
+    return out_shape;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(CL)
+BOOST_AUTO_TEST_SUITE(PoolingLayer)
+
+BOOST_AUTO_TEST_SUITE(Float)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * CNNFloatDataTypes() * PoolingTypes() * boost::unit_test::data::make({ 2, 3, 7 }) * boost::unit_test::data::make({ 1, 2 }) * boost::unit_test::data::make({ 0, 1 }),
+                     src_shape, dt, pool_type, pool_size, pool_stride, pool_pad)
+{
+    PoolingLayerInfo pool_info(pool_type, pool_size, PadStrideInfo(pool_stride, pool_stride, pool_pad, pool_pad, DimensionRoundingType::CEIL));
+    TensorShape      dst_shape = get_output_shape(src_shape, pool_info);
+
+    // Compute function
+    CLTensor dst = compute_pooling_layer(src_shape, dst_shape, dt, pool_info);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_pooling_layer(src_shape, dst_shape, dt, pool_info);
+
+    // Validate output
+    validate(CLAccessor(dst), ref_dst, tolerance_f);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/Datasets.h b/tests/validation/Datasets.h
index ab21787f45..33776d2e44 100644
--- a/tests/validation/Datasets.h
+++ b/tests/validation/Datasets.h
@@ -36,6 +36,7 @@
 #include "dataset/InterpolationPolicyDataset.h"
 #include "dataset/NormalizationTypeDataset.h"
 #include "dataset/PoolingLayerDataset.h"
+#include "dataset/PoolingTypesDataset.h"
 #include "dataset/RoundingPolicyDataset.h"
 #include "dataset/ShapeDatasets.h"
 #include "dataset/ThresholdDataset.h"
@@ -186,6 +187,12 @@ struct is_dataset<arm_compute::test::RoundingPolicies> : boost::mpl::true_
 
 /// Register the data set with Boost
 template <>
+struct is_dataset<arm_compute::test::PoolingTypes> : boost::mpl::true_
+{
+};
+
+/// Register the data set with Boost
+template <>
 struct is_dataset<arm_compute::test::AlexNetConvolutionLayerDataset> : boost::mpl::true_
 {
 };
diff --git a/tests/validation/NEON/Pooling/PoolingLayer.cpp b/tests/validation/NEON/PoolingLayer.cpp
index b15ad1c5e6..10b9a5250e 100644
--- a/tests/validation/NEON/Pooling/PoolingLayer.cpp
+++ b/tests/validation/NEON/PoolingLayer.cpp
@@ -96,7 +96,6 @@ Tensor compute_pooling_layer(const TensorShape &shape_in, const TensorShape &sha
 
 #ifndef DOXYGEN_SKIP_THIS
 BOOST_AUTO_TEST_SUITE(NEON)
-BOOST_AUTO_TEST_SUITE(Pooling)
 BOOST_AUTO_TEST_SUITE(PoolingLayer)
 
 BOOST_AUTO_TEST_SUITE(Float)
@@ -135,5 +134,4 @@ BOOST_AUTO_TEST_SUITE_END()
 
 BOOST_AUTO_TEST_SUITE_END()
 BOOST_AUTO_TEST_SUITE_END()
-BOOST_AUTO_TEST_SUITE_END()
 #endif
diff --git a/tests/validation/TensorOperations.h b/tests/validation/TensorOperations.h
index 119be02423..7337924b47 100644
--- a/tests/validation/TensorOperations.h
+++ b/tests/validation/TensorOperations.h
@@ -1154,47 +1154,57 @@ void pooling_layer(const Tensor<T> &in, Tensor<T> &out, PoolingLayerInfo pool_in
     std::tie(pool_stride_x, pool_stride_y) = pool_info.pad_stride_info().stride();
     std::tie(pad_x, pad_y)                 = pool_info.pad_stride_info().pad();
 
-    const int cols_in = static_cast<int>(in.shape()[0]);
-    const int rows_in = static_cast<int>(in.shape()[1]);
+    const int w_in = static_cast<int>(in.shape()[0]);
+    const int h_in = static_cast<int>(in.shape()[1]);
 
-    const int cols_out = static_cast<int>(out.shape()[0]);
-    const int rows_out = static_cast<int>(out.shape()[1]);
+    const int w_out = static_cast<int>(out.shape()[0]);
+    const int h_out = static_cast<int>(out.shape()[1]);
 
-    int upper_dims = in.shape().total_size() / (cols_in * rows_in);
+    int upper_dims = in.shape().total_size() / (w_in * h_in);
 
-    int pooled_height = static_cast<int>(ceil(static_cast<float>(rows_in + 2 * pad_x - pool_size) / pool_stride_x)) + 1;
-    int pooled_width  = static_cast<int>(ceil(static_cast<float>(cols_in + 2 * pad_y - pool_size) / pool_stride_y)) + 1;
+    int pooled_w = 0;
+    int pooled_h = 0;
+    if(pool_info.pad_stride_info().round() == DimensionRoundingType::CEIL)
+    {
+        pooled_w = static_cast<int>(ceil(static_cast<float>(w_in + 2 * pad_x - pool_size) / pool_stride_x)) + 1;
+        pooled_h = static_cast<int>(ceil(static_cast<float>(h_in + 2 * pad_y - pool_size) / pool_stride_y)) + 1;
+    }
+    else
+    {
+        pooled_w = static_cast<int>(floor(static_cast<float>(w_in + 2 * pad_x - pool_size) / pool_stride_x)) + 1;
+        pooled_h = static_cast<int>(floor(static_cast<float>(h_in + 2 * pad_y - pool_size) / pool_stride_y)) + 1;
+    }
 
-    if((pooled_height - 1) * pool_stride_x >= rows_in + pad_x)
+    if((pooled_w - 1) * pool_stride_x >= w_in + pad_x)
     {
-        --pooled_height;
+        --pooled_w;
     }
-    if((pooled_width - 1) * pool_stride_y >= cols_in + pad_y)
+    if((pooled_h - 1) * pool_stride_y >= h_in + pad_y)
     {
-        --pooled_width;
+        --pooled_h;
     }
 
     if(type == PoolingType::MAX)
     {
         for(int r = 0; r < upper_dims; ++r)
         {
-            for(int i = 0; i < pooled_height; ++i)
+            for(int h = 0; h < pooled_h; ++h)
             {
-                for(int k = 0; k < pooled_width; ++k)
+                for(int w = 0; w < pooled_w; ++w)
                 {
-                    int hstart = i * pool_stride_x - pad_x;
-                    int wstart = k * pool_stride_y - pad_y;
-                    int hend   = std::min(hstart + pool_size, rows_in);
-                    int wend   = std::min(wstart + pool_size, cols_in);
-                    hstart     = std::max(hstart, 0);
+                    int wstart = w * pool_stride_x - pad_x;
+                    int hstart = h * pool_stride_y - pad_y;
+                    int wend   = std::min(wstart + pool_size, w_in);
+                    int hend   = std::min(hstart + pool_size, h_in);
                     wstart     = std::max(wstart, 0);
+                    hstart     = std::max(hstart, 0);
 
                     T max_val = std::numeric_limits<T>::lowest();
                     for(int y = hstart; y < hend; ++y)
                     {
                         for(int x = wstart; x < wend; ++x)
                         {
-                            T val = in[r * cols_in * rows_in + y * cols_in + x];
+                            T val = in[r * h_in * w_in + y * w_in + x];
                             if(val > max_val)
                             {
                                 max_val = val;
@@ -1202,7 +1212,7 @@ void pooling_layer(const Tensor<T> &in, Tensor<T> &out, PoolingLayerInfo pool_in
                         }
                     }
 
-                    out[r * rows_out * cols_out + i * pooled_width + k] = max_val;
+                    out[r * h_out * w_out + h * pooled_w + w] = max_val;
                 }
             }
         }
@@ -1211,32 +1221,30 @@ void pooling_layer(const Tensor<T> &in, Tensor<T> &out, PoolingLayerInfo pool_in
     {
         for(int r = 0; r < upper_dims; ++r)
         {
-            for(int i = 0; i < pooled_height; ++i)
+            for(int h = 0; h < pooled_h; ++h)
             {
-                for(int k = 0; k < pooled_width; ++k)
+                for(int w = 0; w < pooled_w; ++w)
                 {
-                    T avg_val = 0;
-
-                    int hstart = i * pool_stride_x - pad_x;
-                    int wstart = k * pool_stride_y - pad_y;
-                    int hend   = std::min(hstart + pool_size, cols_in + pad_x);
-                    int wend   = std::min(wstart + pool_size, rows_in + pad_y);
-                    int pool   = (hend - hstart) * (wend - wstart);
-                    hstart     = std::max(hstart, 0);
-                    wstart     = std::max(wstart, 0);
-                    hend       = std::min(hend, rows_in);
-                    wend       = std::min(wend, cols_in);
-
+                    T   avg_val = 0;
+                    int wstart  = w * pool_stride_x - pad_x;
+                    int hstart  = h * pool_stride_y - pad_y;
+                    int wend    = std::min(wstart + pool_size, w_in + pad_x);
+                    int hend    = std::min(hstart + pool_size, h_in + pad_y);
+                    int pool    = (hend - hstart) * (wend - wstart);
+                    wstart      = std::max(wstart, 0);
+                    hstart      = std::max(hstart, 0);
+                    wend        = std::min(wend, w_in);
+                    hend        = std::min(hend, h_in);
                     if(std::is_floating_point<T>::value)
                     {
                         for(int y = hstart; y < hend; ++y)
                         {
                             for(int x = wstart; x < wend; ++x)
                             {
-                                avg_val += in[r * cols_in * rows_in + y * cols_in + x];
+                                avg_val += in[r * h_in * w_in + y * w_in + x];
                             }
                         }
-                        out[r * rows_out * cols_out + i * pooled_width + k] = avg_val / pool;
+                        out[r * h_out * w_out + h * pooled_w + w] = avg_val / pool;
                     }
                     else
                     {
@@ -1247,10 +1255,10 @@ void pooling_layer(const Tensor<T> &in, Tensor<T> &out, PoolingLayerInfo pool_in
                         {
                             for(int x = wstart; x < wend; ++x)
                             {
-                                avg_val = sqadd_qs8(avg_val, in[r * cols_in * rows_in + y * cols_in + x]);
+                                avg_val = sqadd_qs8(avg_val, in[r * h_in * w_in + y * w_in + x]);
                             }
                         }
-                        out[r * rows_out * cols_out + i * pooled_width + k] = sqmul_qs8(avg_val, (scale_values_q8[pool] >> (7 - fixed_point_position)), fixed_point_position);
+                        out[r * h_out * w_out + h * pooled_w + w] = sqmul_qs8(avg_val, (scale_values_q8[pool] >> (7 - fixed_point_position)), fixed_point_position);
                     }
                 }
             }
author	Georgios Pinitas <georgios.pinitas@arm.com>	2017-06-19 16:11:53 +0100
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-09-17 14:14:20 +0100
commit	ce093143ec7b554edefc533c90e45c80946cde51 (patch)
tree	1e4aa13ba3fe10c93ca42e2f5477bd2c4888324e
parent	4c2938ed50a78753bfbdbb2f3cbf43f5fed779f9 (diff)
download	ComputeLibrary-ce093143ec7b554edefc533c90e45c80946cde51.tar.gz