From b6f182d3e5b69cc193d7e5ec397c4d61083572d5 Mon Sep 17 00:00:00 2001
From: Georgios Pinitas <georgios.pinitas@arm.com>
Date: Wed, 29 Nov 2017 10:17:56 +0000
Subject: COMPMID-556: Fix CLDepthwiseConvolution3x3 Kernel.

Kernel was not sliding the input window.

Change-Id: Ia5903ceaed1243e86bee773a84102d8a1132dfa5
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/111055
Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
---
 tests/datasets/DepthwiseConvolutionDataset.h  | 10 +++---
 tests/validation/CPP/DepthwiseConvolution.cpp | 52 ++++++++++++++-------------
 2 files changed, 33 insertions(+), 29 deletions(-)

(limited to 'tests')

diff --git a/tests/datasets/DepthwiseConvolutionDataset.h b/tests/datasets/DepthwiseConvolutionDataset.h
index 430d2c9aca..2c8347fc8c 100644
--- a/tests/datasets/DepthwiseConvolutionDataset.h
+++ b/tests/datasets/DepthwiseConvolutionDataset.h
@@ -161,10 +161,10 @@ class SmallDepthwiseConvolutionDataset3x3 final : public DepthwiseConvolutionDat
 public:
     SmallDepthwiseConvolutionDataset3x3()
     {
-        add_config(TensorShape(7U, 7U, 3U), TensorShape(3U, 3U, 3U), TensorShape(3U), TensorShape(5U, 5U, 3U), PadStrideInfo(1, 1, 0, 0));
+        add_config(TensorShape(7U, 7U, 3U, 2U), TensorShape(3U, 3U, 3U), TensorShape(3U), TensorShape(5U, 5U, 3U, 2U), PadStrideInfo(1, 1, 0, 0));
         add_config(TensorShape(33U, 27U, 11U), TensorShape(3U, 3U, 11U), TensorShape(11U), TensorShape(11U, 14U, 11U), PadStrideInfo(3, 2, 1, 1));
-        add_config(TensorShape(21U, 31U, 9U), TensorShape(3U, 3U, 9U), TensorShape(9U), TensorShape(21U, 15U, 9U), PadStrideInfo(1, 2, 1, 0));
-        add_config(TensorShape(33U, 27U, 11U), TensorShape(3U, 3U, 11U), TensorShape(11U), TensorShape(31U, 14U, 11U), PadStrideInfo(1, 2, 0, 1));
+        add_config(TensorShape(21U, 31U, 9U, 4U), TensorShape(3U, 3U, 9U), TensorShape(9U), TensorShape(21U, 15U, 9U, 4U), PadStrideInfo(1, 2, 1, 0));
+        add_config(TensorShape(33U, 27U, 11U, 3U), TensorShape(3U, 3U, 11U), TensorShape(11U), TensorShape(31U, 14U, 11U, 3U), PadStrideInfo(1, 2, 0, 1));
     }
 };
 
@@ -173,11 +173,11 @@ class LargeDepthwiseConvolutionDataset3x3 final : public DepthwiseConvolutionDat
 public:
     LargeDepthwiseConvolutionDataset3x3()
     {
-        add_config(TensorShape(233U, 277U, 55U), TensorShape(3U, 3U, 55U), TensorShape(55U), TensorShape(116U, 275U, 55U), PadStrideInfo(2, 1, 0, 0));
+        add_config(TensorShape(233U, 277U, 55U, 3U), TensorShape(3U, 3U, 55U), TensorShape(55U), TensorShape(116U, 275U, 55U, 3U), PadStrideInfo(2, 1, 0, 0));
         add_config(TensorShape(333U, 277U, 77U), TensorShape(3U, 3U, 77U), TensorShape(77U), TensorShape(111U, 138U, 77U), PadStrideInfo(3, 2, 1, 0));
         add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(22U), TensorShape(177U, 156U, 22U), PadStrideInfo(1, 2, 1, 1));
         add_config(TensorShape(233U, 277U, 55U), TensorShape(3U, 3U, 55U), TensorShape(55U), TensorShape(231U, 138U, 55U), PadStrideInfo(1, 2, 0, 0));
-        add_config(TensorShape(333U, 277U, 77U), TensorShape(3U, 3U, 77U), TensorShape(77U), TensorShape(166U, 93U, 77U), PadStrideInfo(2, 3, 0, 1));
+        add_config(TensorShape(333U, 277U, 77U, 5U), TensorShape(3U, 3U, 77U), TensorShape(77U), TensorShape(166U, 93U, 77U, 5U), PadStrideInfo(2, 3, 0, 1));
         add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(22U), TensorShape(89U, 311U, 22U), PadStrideInfo(2, 1, 1, 1));
     }
 };
diff --git a/tests/validation/CPP/DepthwiseConvolution.cpp b/tests/validation/CPP/DepthwiseConvolution.cpp
index ad0653846b..229e044783 100644
--- a/tests/validation/CPP/DepthwiseConvolution.cpp
+++ b/tests/validation/CPP/DepthwiseConvolution.cpp
@@ -137,6 +137,7 @@ SimpleTensor<uint8_t> depthwise_convolution(const SimpleTensor<uint8_t> &src, co
     const int input_width   = src.shape().x();
     const int input_height  = src.shape().y();
     const int input_depth   = src.shape().z();
+    const int num_batches   = src.shape().total_size() / (input_width * input_height * input_depth);
 
     const int filter_half_size = filter_width / 2;
     const int pad_x            = std::min(filter_half_size, static_cast<int>(conv_info.pad().first));
@@ -145,37 +146,40 @@ SimpleTensor<uint8_t> depthwise_convolution(const SimpleTensor<uint8_t> &src, co
     const int minimum_y        = -pad_y + filter_half_size;
 
     int out_pos = 0;
-    for(int z = 0; z < input_depth; ++z)
+    for(int r = 0; r < num_batches; ++r)
     {
-        int32_t bias_val = *static_cast<const int32_t *>(biases(Coordinates(z)));
-        for(int y = minimum_y; y < input_height + pad_y - filter_half_size; y += conv_info.stride().second)
+        for(int z = 0; z < input_depth; ++z)
         {
-            for(int x = minimum_x; x < input_width + pad_x - filter_half_size; x += conv_info.stride().first)
+            int32_t bias_val = *static_cast<const int32_t *>(biases(Coordinates(z)));
+            for(int y = minimum_y; y < input_height + pad_y - filter_half_size; y += conv_info.stride().second)
             {
-                Coordinates coords(x, y, z);
-                int         filter_offset = filter_plane * z;
-
-                uint32_t val = 0;
-                for(int j = y - filter_half_size; j <= (y + filter_half_size); ++j)
+                for(int x = minimum_x; x < input_width + pad_x - filter_half_size; x += conv_info.stride().first)
                 {
-                    for(int i = x - filter_half_size; i <= (x + filter_half_size); ++i)
+                    Coordinates coords(x, y, z);
+                    int         filter_offset = filter_plane * z;
+
+                    uint32_t val = 0;
+                    for(int j = y - filter_half_size; j <= (y + filter_half_size); ++j)
                     {
-                        coords.set(0, i);
-                        coords.set(1, j);
-                        auto    in_val = tensor_elem_at<uint8_t>(src, coords, BorderMode::CONSTANT, 0);
-                        uint8_t w_val  = *(weights.data() + filter_offset);
-                        val += (in_val + input_offset) * (w_val + weights_offset);
-                        ++filter_offset;
+                        for(int i = x - filter_half_size; i <= (x + filter_half_size); ++i)
+                        {
+                            coords.set(0, i);
+                            coords.set(1, j);
+                            auto    in_val = tensor_elem_at<uint8_t>(src, coords, BorderMode::CONSTANT, 0);
+                            uint8_t w_val  = *(weights.data() + filter_offset);
+                            val += (in_val + input_offset) * (w_val + weights_offset);
+                            ++filter_offset;
+                        }
                     }
+                    val += bias_val;
+                    val = asymm_rounding_divide_by_pow2(asymm_int_mult(val, output_multiplier), output_shift);
+                    val += output_offset;
+                    val = std::max<int32_t>(val, 0);
+                    val = std::min<int32_t>(val, 255);
+
+                    // Store the result
+                    dst[out_pos++] = val;
                 }
-                val += bias_val;
-                val = asymm_rounding_divide_by_pow2(asymm_int_mult(val, output_multiplier), output_shift);
-                val += output_offset;
-                val = std::max<int32_t>(val, 0);
-                val = std::min<int32_t>(val, 255);
-
-                // Store the result
-                dst[out_pos++] = val;
             }
         }
     }
-- 
cgit v1.2.1