From ce093143ec7b554edefc533c90e45c80946cde51 Mon Sep 17 00:00:00 2001
From: Georgios Pinitas <georgios.pinitas@arm.com>
Date: Mon, 19 Jun 2017 16:11:53 +0100
Subject: COMPMID-403:Add support for 7x7 pooling on CL.

Change-Id: I3c2c8d7e8e61d7737170cb1568900ce4ac337068
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/78181
Reviewed-by: Michele DiGiorgio <michele.digiorgio@arm.com>
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Moritz Pflanzer <moritz.pflanzer@arm.com>
---
 tests/validation/CL/PoolingLayer.cpp           | 119 +++++++++++++++++++++
 tests/validation/Datasets.h                    |   7 ++
 tests/validation/NEON/Pooling/PoolingLayer.cpp | 139 -------------------------
 tests/validation/NEON/PoolingLayer.cpp         | 137 ++++++++++++++++++++++++
 tests/validation/TensorOperations.h            |  84 ++++++++-------
 5 files changed, 309 insertions(+), 177 deletions(-)
 create mode 100644 tests/validation/CL/PoolingLayer.cpp
 delete mode 100644 tests/validation/NEON/Pooling/PoolingLayer.cpp
 create mode 100644 tests/validation/NEON/PoolingLayer.cpp

(limited to 'tests/validation')
diff --git a/tests/validation/CL/PoolingLayer.cpp b/tests/validation/CL/PoolingLayer.cpp
new file mode 100644
index 0000000000..1d0e745088
--- /dev/null
+++ b/tests/validation/CL/PoolingLayer.cpp
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "CL/CLAccessor.h"
+#include "CL/Helper.h"
+#include "TypePrinter.h"
+#include "arm_compute/runtime/CL/functions/CLPoolingLayer.h"
+#include "tests/dataset/PoolingLayerDataset.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+
+#include <random>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::cl;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+const float tolerance_f = 1e-05; /**< Tolerance value for comparing reference's output against implementation's output for float input */
+
+/** Compute CL pooling layer function.
+ *
+ * @param[in] shape     Shape of the input and output tensors.
+ * @param[in] dt        Data type of input and output tensors.
+ * @param[in] pool_info Pooling Layer information.
+ *
+ * @return Computed output tensor.
+ */
+CLTensor compute_pooling_layer(const TensorShape &shape_in, const TensorShape &shape_out, DataType dt, PoolingLayerInfo pool_info)
+{
+    // Create tensors
+    CLTensor src = create_tensor(shape_in, dt);
+    CLTensor dst = create_tensor(shape_out, dt);
+
+    // Create and configure function
+    CLPoolingLayer pool;
+    pool.configure(&src, &dst, pool_info);
+
+    // Allocate tensors
+    src.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors
+    std::uniform_real_distribution<> distribution(-1, 1);
+    library->fill(CLAccessor(src), distribution, 0);
+
+    // Compute function
+    pool.run();
+
+    return dst;
+}
+
+TensorShape get_output_shape(TensorShape in_shape, const PoolingLayerInfo &pool_info)
+{
+    TensorShape out_shape(in_shape);
+    const std::pair<unsigned int, unsigned int> scaled_dims = arm_compute::scaled_dimensions(in_shape.x(),
+                                                                                             in_shape.y(),
+                                                                                             pool_info.pool_size(),
+                                                                                             pool_info.pad_stride_info().stride().first, pool_info.pad_stride_info().stride().second,
+                                                                                             pool_info.pad_stride_info().pad().first, pool_info.pad_stride_info().pad().second,
+                                                                                             pool_info.pad_stride_info().round());
+    out_shape.set(0, scaled_dims.first);
+    out_shape.set(1, scaled_dims.second);
+    return out_shape;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(CL)
+BOOST_AUTO_TEST_SUITE(PoolingLayer)
+
+BOOST_AUTO_TEST_SUITE(Float)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * CNNFloatDataTypes() * PoolingTypes() * boost::unit_test::data::make({ 2, 3, 7 }) * boost::unit_test::data::make({ 1, 2 }) * boost::unit_test::data::make({ 0, 1 }),
+                     src_shape, dt, pool_type, pool_size, pool_stride, pool_pad)
+{
+    PoolingLayerInfo pool_info(pool_type, pool_size, PadStrideInfo(pool_stride, pool_stride, pool_pad, pool_pad, DimensionRoundingType::CEIL));
+    TensorShape      dst_shape = get_output_shape(src_shape, pool_info);
+
+    // Compute function
+    CLTensor dst = compute_pooling_layer(src_shape, dst_shape, dt, pool_info);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_pooling_layer(src_shape, dst_shape, dt, pool_info);
+
+    // Validate output
+    validate(CLAccessor(dst), ref_dst, tolerance_f);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/Datasets.h b/tests/validation/Datasets.h
index ab21787f45..33776d2e44 100644
--- a/tests/validation/Datasets.h
+++ b/tests/validation/Datasets.h
@@ -36,6 +36,7 @@
 #include "dataset/InterpolationPolicyDataset.h"
 #include "dataset/NormalizationTypeDataset.h"
 #include "dataset/PoolingLayerDataset.h"
+#include "dataset/PoolingTypesDataset.h"
 #include "dataset/RoundingPolicyDataset.h"
 #include "dataset/ShapeDatasets.h"
 #include "dataset/ThresholdDataset.h"
@@ -184,6 +185,12 @@ struct is_dataset<arm_compute::test::RoundingPolicies> : boost::mpl::true_
 {
 };
 
+/// Register the data set with Boost
+template <>
+struct is_dataset<arm_compute::test::PoolingTypes> : boost::mpl::true_
+{
+};
+
 /// Register the data set with Boost
 template <>
 struct is_dataset<arm_compute::test::AlexNetConvolutionLayerDataset> : boost::mpl::true_
diff --git a/tests/validation/NEON/Pooling/PoolingLayer.cpp b/tests/validation/NEON/Pooling/PoolingLayer.cpp
deleted file mode 100644
index b15ad1c5e6..0000000000
--- a/tests/validation/NEON/Pooling/PoolingLayer.cpp
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "NEON/Helper.h"
-#include "NEON/NEAccessor.h"
-#include "TypePrinter.h"
-#include "arm_compute/runtime/NEON/functions/NEPoolingLayer.h"
-#include "tests/dataset/PoolingLayerDataset.h"
-#include "validation/Datasets.h"
-#include "validation/Reference.h"
-#include "validation/Validation.h"
-
-#include <iostream>
-#include <random>
-
-using namespace arm_compute;
-using namespace arm_compute::test;
-using namespace arm_compute::test::neon;
-using namespace arm_compute::test::validation;
-
-namespace
-{
-const float tolerance_q = 0;     /**< Tolerance value for comparing reference's output against implementation's output for quantized input */
-const float tolerance_f = 1e-05; /**< Tolerance value for comparing reference's output against implementation's output for float input */
-
-/** Compute Neon pooling layer function.
- *
- * @param[in] shape     Shape of the input and output tensors.
- * @param[in] dt        Data type of input and output tensors.
- * @param[in] pool_info Pooling Layer information.
- *
- * @return Computed output tensor.
- */
-Tensor compute_pooling_layer(const TensorShape &shape_in, const TensorShape &shape_out, DataType dt, PoolingLayerInfo pool_info, int fixed_point_position = 0)
-{
-    // Create tensors
-    Tensor src = create_tensor(shape_in, dt, 1, fixed_point_position);
-    Tensor dst = create_tensor(shape_out, dt, 1, fixed_point_position);
-
-    // Create and configure function
-    NEPoolingLayer pool;
-    pool.configure(&src, &dst, pool_info);
-
-    // Allocate tensors
-    src.allocator()->allocate();
-    dst.allocator()->allocate();
-
-    BOOST_TEST(!src.info()->is_resizable());
-    BOOST_TEST(!dst.info()->is_resizable());
-
-    // Fill tensors
-    int min = 0;
-    int max = 0;
-    switch(dt)
-    {
-        case DataType::F32:
-            min = -1;
-            max = 1;
-            break;
-        case DataType::QS8:
-            min = -(1 << fixed_point_position);
-            max = (1 << fixed_point_position);
-            break;
-        default:
-            ARM_COMPUTE_ERROR("DataType not supported.");
-    }
-    std::uniform_real_distribution<> distribution(min, max);
-    library->fill(NEAccessor(src), distribution, 0);
-
-    // Compute function
-    pool.run();
-
-    return dst;
-}
-} // namespace
-
-#ifndef DOXYGEN_SKIP_THIS
-BOOST_AUTO_TEST_SUITE(NEON)
-BOOST_AUTO_TEST_SUITE(Pooling)
-BOOST_AUTO_TEST_SUITE(PoolingLayer)
-
-BOOST_AUTO_TEST_SUITE(Float)
-BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
-BOOST_DATA_TEST_CASE(RandomDataset,
-                     RandomPoolingLayerDataset() * boost::unit_test::data::make(DataType::F32),
-                     obj, dt)
-{
-    // Compute function
-    Tensor dst = compute_pooling_layer(obj.src_shape, obj.dst_shape, dt, obj.info);
-
-    // Compute reference
-    RawTensor ref_dst = Reference::compute_reference_pooling_layer(obj.src_shape, obj.dst_shape, dt, obj.info);
-
-    // Validate output
-    validate(NEAccessor(dst), ref_dst, tolerance_f, 0);
-}
-BOOST_AUTO_TEST_SUITE_END()
-
-BOOST_AUTO_TEST_SUITE(Quantized)
-BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
-BOOST_DATA_TEST_CASE(RandomDataset,
-                     RandomPoolingLayerDataset() * boost::unit_test::data::make(DataType::QS8) * boost::unit_test::data::xrange(1, 5),
-                     obj, dt, fixed_point_position)
-{
-    // Compute function
-    Tensor dst = compute_pooling_layer(obj.src_shape, obj.dst_shape, dt, obj.info, fixed_point_position);
-
-    // Compute reference
-    RawTensor ref_dst = Reference::compute_reference_pooling_layer(obj.src_shape, obj.dst_shape, dt, obj.info, fixed_point_position);
-
-    // Validate output
-    validate(NEAccessor(dst), ref_dst, tolerance_q, 0);
-}
-BOOST_AUTO_TEST_SUITE_END()
-
-BOOST_AUTO_TEST_SUITE_END()
-BOOST_AUTO_TEST_SUITE_END()
-BOOST_AUTO_TEST_SUITE_END()
-#endif
diff --git a/tests/validation/NEON/PoolingLayer.cpp b/tests/validation/NEON/PoolingLayer.cpp
new file mode 100644
index 0000000000..10b9a5250e
--- /dev/null
+++ b/tests/validation/NEON/PoolingLayer.cpp
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TypePrinter.h"
+#include "arm_compute/runtime/NEON/functions/NEPoolingLayer.h"
+#include "tests/dataset/PoolingLayerDataset.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+
+#include <iostream>
+#include <random>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+const float tolerance_q = 0;     /**< Tolerance value for comparing reference's output against implementation's output for quantized input */
+const float tolerance_f = 1e-05; /**< Tolerance value for comparing reference's output against implementation's output for float input */
+
+/** Compute Neon pooling layer function.
+ *
+ * @param[in] shape     Shape of the input and output tensors.
+ * @param[in] dt        Data type of input and output tensors.
+ * @param[in] pool_info Pooling Layer information.
+ *
+ * @return Computed output tensor.
+ */
+Tensor compute_pooling_layer(const TensorShape &shape_in, const TensorShape &shape_out, DataType dt, PoolingLayerInfo pool_info, int fixed_point_position = 0)
+{
+    // Create tensors
+    Tensor src = create_tensor(shape_in, dt, 1, fixed_point_position);
+    Tensor dst = create_tensor(shape_out, dt, 1, fixed_point_position);
+
+    // Create and configure function
+    NEPoolingLayer pool;
+    pool.configure(&src, &dst, pool_info);
+
+    // Allocate tensors
+    src.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors
+    int min = 0;
+    int max = 0;
+    switch(dt)
+    {
+        case DataType::F32:
+            min = -1;
+            max = 1;
+            break;
+        case DataType::QS8:
+            min = -(1 << fixed_point_position);
+            max = (1 << fixed_point_position);
+            break;
+        default:
+            ARM_COMPUTE_ERROR("DataType not supported.");
+    }
+    std::uniform_real_distribution<> distribution(min, max);
+    library->fill(NEAccessor(src), distribution, 0);
+
+    // Compute function
+    pool.run();
+
+    return dst;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(NEON)
+BOOST_AUTO_TEST_SUITE(PoolingLayer)
+
+BOOST_AUTO_TEST_SUITE(Float)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RandomDataset,
+                     RandomPoolingLayerDataset() * boost::unit_test::data::make(DataType::F32),
+                     obj, dt)
+{
+    // Compute function
+    Tensor dst = compute_pooling_layer(obj.src_shape, obj.dst_shape, dt, obj.info);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_pooling_layer(obj.src_shape, obj.dst_shape, dt, obj.info);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, tolerance_f, 0);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(Quantized)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RandomDataset,
+                     RandomPoolingLayerDataset() * boost::unit_test::data::make(DataType::QS8) * boost::unit_test::data::xrange(1, 5),
+                     obj, dt, fixed_point_position)
+{
+    // Compute function
+    Tensor dst = compute_pooling_layer(obj.src_shape, obj.dst_shape, dt, obj.info, fixed_point_position);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_pooling_layer(obj.src_shape, obj.dst_shape, dt, obj.info, fixed_point_position);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, tolerance_q, 0);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/TensorOperations.h b/tests/validation/TensorOperations.h
index 119be02423..7337924b47 100644
--- a/tests/validation/TensorOperations.h
+++ b/tests/validation/TensorOperations.h
@@ -1154,47 +1154,57 @@ void pooling_layer(const Tensor<T> &in, Tensor<T> &out, PoolingLayerInfo pool_in
     std::tie(pool_stride_x, pool_stride_y) = pool_info.pad_stride_info().stride();
     std::tie(pad_x, pad_y)                 = pool_info.pad_stride_info().pad();
 
-    const int cols_in = static_cast<int>(in.shape()[0]);
-    const int rows_in = static_cast<int>(in.shape()[1]);
+    const int w_in = static_cast<int>(in.shape()[0]);
+    const int h_in = static_cast<int>(in.shape()[1]);
 
-    const int cols_out = static_cast<int>(out.shape()[0]);
-    const int rows_out = static_cast<int>(out.shape()[1]);
+    const int w_out = static_cast<int>(out.shape()[0]);
+    const int h_out = static_cast<int>(out.shape()[1]);
 
-    int upper_dims = in.shape().total_size() / (cols_in * rows_in);
+    int upper_dims = in.shape().total_size() / (w_in * h_in);
 
-    int pooled_height = static_cast<int>(ceil(static_cast<float>(rows_in + 2 * pad_x - pool_size) / pool_stride_x)) + 1;
-    int pooled_width  = static_cast<int>(ceil(static_cast<float>(cols_in + 2 * pad_y - pool_size) / pool_stride_y)) + 1;
+    int pooled_w = 0;
+    int pooled_h = 0;
+    if(pool_info.pad_stride_info().round() == DimensionRoundingType::CEIL)
+    {
+        pooled_w = static_cast<int>(ceil(static_cast<float>(w_in + 2 * pad_x - pool_size) / pool_stride_x)) + 1;
+        pooled_h = static_cast<int>(ceil(static_cast<float>(h_in + 2 * pad_y - pool_size) / pool_stride_y)) + 1;
+    }
+    else
+    {
+        pooled_w = static_cast<int>(floor(static_cast<float>(w_in + 2 * pad_x - pool_size) / pool_stride_x)) + 1;
+        pooled_h = static_cast<int>(floor(static_cast<float>(h_in + 2 * pad_y - pool_size) / pool_stride_y)) + 1;
+    }
 
-    if((pooled_height - 1) * pool_stride_x >= rows_in + pad_x)
+    if((pooled_w - 1) * pool_stride_x >= w_in + pad_x)
     {
-        --pooled_height;
+        --pooled_w;
     }
-    if((pooled_width - 1) * pool_stride_y >= cols_in + pad_y)
+    if((pooled_h - 1) * pool_stride_y >= h_in + pad_y)
     {
-        --pooled_width;
+        --pooled_h;
     }
 
     if(type == PoolingType::MAX)
     {
         for(int r = 0; r < upper_dims; ++r)
         {
-            for(int i = 0; i < pooled_height; ++i)
+            for(int h = 0; h < pooled_h; ++h)
             {
-                for(int k = 0; k < pooled_width; ++k)
+                for(int w = 0; w < pooled_w; ++w)
                 {
-                    int hstart = i * pool_stride_x - pad_x;
-                    int wstart = k * pool_stride_y - pad_y;
-                    int hend   = std::min(hstart + pool_size, rows_in);
-                    int wend   = std::min(wstart + pool_size, cols_in);
-                    hstart     = std::max(hstart, 0);
+                    int wstart = w * pool_stride_x - pad_x;
+                    int hstart = h * pool_stride_y - pad_y;
+                    int wend   = std::min(wstart + pool_size, w_in);
+                    int hend   = std::min(hstart + pool_size, h_in);
                     wstart     = std::max(wstart, 0);
+                    hstart     = std::max(hstart, 0);
 
                     T max_val = std::numeric_limits<T>::lowest();
                     for(int y = hstart; y < hend; ++y)
                     {
                         for(int x = wstart; x < wend; ++x)
                         {
-                            T val = in[r * cols_in * rows_in + y * cols_in + x];
+                            T val = in[r * h_in * w_in + y * w_in + x];
                             if(val > max_val)
                             {
                                 max_val = val;
@@ -1202,7 +1212,7 @@ void pooling_layer(const Tensor<T> &in, Tensor<T> &out, PoolingLayerInfo pool_in
                         }
                     }
 
-                    out[r * rows_out * cols_out + i * pooled_width + k] = max_val;
+                    out[r * h_out * w_out + h * pooled_w + w] = max_val;
                 }
             }
         }
@@ -1211,32 +1221,30 @@ void pooling_layer(const Tensor<T> &in, Tensor<T> &out, PoolingLayerInfo pool_in
     {
         for(int r = 0; r < upper_dims; ++r)
         {
-            for(int i = 0; i < pooled_height; ++i)
+            for(int h = 0; h < pooled_h; ++h)
             {
-                for(int k = 0; k < pooled_width; ++k)
+                for(int w = 0; w < pooled_w; ++w)
                 {
-                    T avg_val = 0;
-
-                    int hstart = i * pool_stride_x - pad_x;
-                    int wstart = k * pool_stride_y - pad_y;
-                    int hend   = std::min(hstart + pool_size, cols_in + pad_x);
-                    int wend   = std::min(wstart + pool_size, rows_in + pad_y);
-                    int pool   = (hend - hstart) * (wend - wstart);
-                    hstart     = std::max(hstart, 0);
-                    wstart     = std::max(wstart, 0);
-                    hend       = std::min(hend, rows_in);
-                    wend       = std::min(wend, cols_in);
-
+                    T   avg_val = 0;
+                    int wstart  = w * pool_stride_x - pad_x;
+                    int hstart  = h * pool_stride_y - pad_y;
+                    int wend    = std::min(wstart + pool_size, w_in + pad_x);
+                    int hend    = std::min(hstart + pool_size, h_in + pad_y);
+                    int pool    = (hend - hstart) * (wend - wstart);
+                    wstart      = std::max(wstart, 0);
+                    hstart      = std::max(hstart, 0);
+                    wend        = std::min(wend, w_in);
+                    hend        = std::min(hend, h_in);
                     if(std::is_floating_point<T>::value)
                     {
                         for(int y = hstart; y < hend; ++y)
                         {
                             for(int x = wstart; x < wend; ++x)
                             {
-                                avg_val += in[r * cols_in * rows_in + y * cols_in + x];
+                                avg_val += in[r * h_in * w_in + y * w_in + x];
                             }
                         }
-                        out[r * rows_out * cols_out + i * pooled_width + k] = avg_val / pool;
+                        out[r * h_out * w_out + h * pooled_w + w] = avg_val / pool;
                     }
                     else
                     {
@@ -1247,10 +1255,10 @@ void pooling_layer(const Tensor<T> &in, Tensor<T> &out, PoolingLayerInfo pool_in
                         {
                             for(int x = wstart; x < wend; ++x)
                             {
-                                avg_val = sqadd_qs8(avg_val, in[r * cols_in * rows_in + y * cols_in + x]);
+                                avg_val = sqadd_qs8(avg_val, in[r * h_in * w_in + y * w_in + x]);
                             }
                         }
-                        out[r * rows_out * cols_out + i * pooled_width + k] = sqmul_qs8(avg_val, (scale_values_q8[pool] >> (7 - fixed_point_position)), fixed_point_position);
+                        out[r * h_out * w_out + h * pooled_w + w] = sqmul_qs8(avg_val, (scale_values_q8[pool] >> (7 - fixed_point_position)), fixed_point_position);
                     }
                 }
             }
-- 
cgit v1.2.1