From 4a626a7d52e9c4759bdc16b65401a53779dd975f Mon Sep 17 00:00:00 2001
From: Pablo Tello <pablo.tello@arm.com>
Date: Wed, 4 Apr 2018 10:01:14 +0100
Subject: COMPMID-801: NHWC support in CLIm2Col.

And extended tests coverage adding kernel shapes 3x1, 1x5 and 7x7

Change-Id: Ia7c1d4da2368d5f5fbc1a41187f4ac1aca5f150f
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/127727
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
---
 tests/validation/reference/Im2Col.cpp | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

(limited to 'tests/validation/reference')
diff --git a/tests/validation/reference/Im2Col.cpp b/tests/validation/reference/Im2Col.cpp
index 5685b60026..83ef8b40a5 100644
--- a/tests/validation/reference/Im2Col.cpp
+++ b/tests/validation/reference/Im2Col.cpp
@@ -55,11 +55,16 @@ void im2col_nchw(const SimpleTensor<T> &src, SimpleTensor<T> &dst, const Size2D
     const int pad_val       = is_data_type_quantized_asymmetric(src.data_type()) ? src.quantization_info().offset : 0;
 
     int dst_idx = 0;
+    // dst[dst_idx++] will write out of bounds if kernel_height == kernel_width == 1 because lasty will be the bottom padding row
+    // and this is not present in the dst buffer
+    const int lasty = src_height + (kernel_height > 1 ? pad_y : 0) - kernel_height;
+    const int lastx = src_width + (kernel_width > 1 ? pad_x : 0) - kernel_width;
+
     for(int b = 0; b < batches; ++b)
     {
-        for(int y = -pad_y; y <= (src_height + pad_y - kernel_height); y += stride_y)
+        for(int y = -pad_y; y <= lasty; y += stride_y)
         {
-            for(int x = -pad_x; x <= (src_width + pad_x - kernel_width); x += stride_x)
+            for(int x = -pad_x; x <= lastx; x += stride_x)
             {
                 for(int z = 0; z < src_depth; ++z)
                 {
@@ -97,11 +102,15 @@ void im2col_nhwc(const SimpleTensor<T> &src, SimpleTensor<T> &dst, const Size2D
     const int batches       = src.shape().total_size_upper(3);
     const int pad_val       = is_data_type_quantized_asymmetric(src.data_type()) ? src.quantization_info().offset : 0;
     int       dst_idx       = 0;
+
+    const int lasty = src_height + (kernel_height > 1 ? pad_y : 0) - kernel_height;
+    const int lastx = src_width + (kernel_width > 1 ? pad_x : 0) - kernel_width;
+
     for(int b = 0; b < batches; ++b)
     {
-        for(int y = -pad_y; y <= (src_height + pad_y - kernel_height); y += stride_y)
+        for(int y = -pad_y; y <= lasty; y += stride_y)
         {
-            for(int x = -pad_x; x <= (src_width + pad_x - kernel_width); x += stride_x)
+            for(int x = -pad_x; x <= lastx; x += stride_x)
             {
                 for(int z = 0; z < src_depth; ++z)
                 {
-- 
cgit v1.2.1