From 1a57ad1edf755bd284c8a387976c292913616081 Mon Sep 17 00:00:00 2001
From: Georgios Pinitas <georgios.pinitas@arm.com>
Date: Wed, 9 Jan 2019 16:11:51 +0000
Subject: COMPMID-1710: Add explicit padding arguments to depthwise convolution

Change-Id: I3011640f4d4d80b7f4e488ec8df47454d3220c5d
Reviewed-on: https://review.mlplatform.org/484
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Giuseppe Rossini <giuseppe.rossini@arm.com>
---
 .../kernels/convolution/depthwise/depthwise.hpp    | 301 +++++++++++++++------
 .../kernels/convolution/depthwise/impl_base.hpp    |  77 +++++-
 2 files changed, 289 insertions(+), 89 deletions(-)

diff --git a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp b/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp
index 472c44f97a..6d9cb18f44 100644
--- a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp
+++ b/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -32,6 +32,12 @@ class IDepthwiseConvolution
   public:
     virtual ~IDepthwiseConvolution() = default;
     virtual int output_size(const int dim_size, const bool padding_same) const = 0;
+    virtual int output_size(
+      int dim_size,
+      unsigned int padding_before,
+      unsigned int padding_after
+    ) const = 0;
+
     virtual unsigned int get_window(void) const = 0;
     virtual void set_offsets(int input_offset, int weights_offset) = 0;
     virtual void run(const unsigned int start, const unsigned int stop) = 0;
@@ -65,18 +71,18 @@ class DepthwiseConvolution : public IDepthwiseConvolution
 
     /** Create a new depthwise convolution engine.
      *
-     * @param[in] n_batches Number of batches tensors.
-     * @param[in] n_input_rows Number of rows in input tensor.
-     * @param[in] n_input_cols Number of columns in input tensor.
-     * @param[in] n_channels Number of channels in input and output tensors.
-     * @param[in] padding_same True if padding is SAME, else VALID.
-     * @param[in] weights Pointer to Height x Width x Channel ordered weights.
-     * @param[in] input Pointer to NHWC ordered input tensor.
-     * @param[output] output Pointer to NHWC ordered output tensor.
+     * @param[in]  n_batches Number of batches tensors.
+     * @param[in]  n_input_rows Number of rows in input tensor.
+     * @param[in]  n_input_cols Number of columns in input tensor.
+     * @param[in]  n_channels Number of channels in input and output tensors.
+     * @param[in]  padding_same True if padding is SAME, else VALID.
+     * @param[in]  weights Pointer to Height x Width x Channel ordered weights.
+     * @param[in]  input Pointer to NHWC ordered input tensor.
+     * @param[out] output Pointer to NHWC ordered output tensor.
      */
     DepthwiseConvolution(
-      const int n_batches, const int n_input_rows, const int n_input_cols,
-      const int n_channels, const bool padding_same,
+      int n_batches, int n_input_rows, int n_input_cols,
+      int n_channels, bool padding_same,
       const TIn* const weights,
       const TIn* const input,
       TOut* const output
@@ -87,21 +93,53 @@ class DepthwiseConvolution : public IDepthwiseConvolution
     {
     }
 
+    /** Create a new depthwise convolution engine.
+     *
+     * @param[in]  n_batches Number of batches tensors.
+     * @param[in]  n_input_rows Number of rows in input tensor.
+     * @param[in]  n_input_cols Number of columns in input tensor.
+     * @param[in]  n_channels Number of channels in input and output tensors.
+     * @param[in]  padding_top Padding to apply to top of input.
+     * @param[in]  padding_left Padding to apply to left of input.
+     * @param[in]  padding_bottom Padding to apply to bottom of input.
+     * @param[in]  padding_right Padding to apply to right of input.
+     * @param[in]  weights Pointer to Height x Width x Channel ordered weights.
+     * @param[in]  input Pointer to NHWC ordered input tensor.
+     * @param[out] output Pointer to NHWC ordered output tensor.
+     */
+    DepthwiseConvolution(
+      int n_batches, int n_input_rows, int n_input_cols,
+      int n_channels,
+      unsigned int padding_top,
+      unsigned int padding_left,
+      unsigned int padding_bottom,
+      unsigned int padding_right,
+      const TIn* const weights,
+      const TIn* const input,
+      TOut* const output
+    ) : DepthwiseConvolution(
+      n_batches, n_input_rows, n_input_cols, n_channels,
+      padding_top, padding_left, padding_bottom, padding_right,
+      weights, input, output, 0 /* column stride = default */
+    )
+    {
+    }
+
     /** Create a new depthwise convolution engine with a specified column stride.
      *
-     * @param[in] n_batches Number of batches tensors.
-     * @param[in] n_input_rows Number of rows in input tensor.
-     * @param[in] n_input_cols Number of columns in input tensor.
-     * @param[in] n_channels Number of channels in input and output tensors.
-     * @param[in] padding_same True if padding is SAME, else VALID.
-     * @param[in] weights Pointer to Height x Width x Channel ordered weights.
-     * @param[in] input Pointer to NHWC ordered input tensor.
-     * @param[output] output Pointer to NHWC ordered output tensor.
-     * @param[in] col_stride Stride between columns of the weights, inputs and output tensors.
+     * @param[in]  n_batches Number of batches tensors.
+     * @param[in]  n_input_rows Number of rows in input tensor.
+     * @param[in]  n_input_cols Number of columns in input tensor.
+     * @param[in]  n_channels Number of channels in input and output tensors.
+     * @param[in]  padding_same True if padding is SAME, else VALID.
+     * @param[in]  weights Pointer to Height x Width x Channel ordered weights.
+     * @param[in]  input Pointer to NHWC ordered input tensor.
+     * @param[out] output Pointer to NHWC ordered output tensor.
+     * @param[in]  col_stride Stride between columns of the weights, inputs and output tensors.
      */
     DepthwiseConvolution(
-      const int n_batches, const int n_input_rows, const int n_input_cols,
-      const int n_channels, const bool padding_same,
+      int n_batches, int n_input_rows, int n_input_cols,
+      int n_channels, bool padding_same,
       const TIn* const weights,
       const TIn* const input,
       TOut* const output,
@@ -116,39 +154,118 @@ class DepthwiseConvolution : public IDepthwiseConvolution
     {
     }
 
+    /** Create a new depthwise convolution engine with a specified column stride.
+     *
+     * @param[in]  n_batches Number of batches tensors.
+     * @param[in]  n_input_rows Number of rows in input tensor.
+     * @param[in]  n_input_cols Number of columns in input tensor.
+     * @param[in]  n_channels Number of channels in input and output tensors.
+     * @param[in]  padding_top Padding to apply to top of input.
+     * @param[in]  padding_left Padding to apply to left of input.
+     * @param[in]  padding_bottom Padding to apply to bottom of input.
+     * @param[in]  padding_right Padding to apply to right of input.
+     * @param[in]  weights Pointer to Height x Width x Channel ordered weights.
+     * @param[in]  input Pointer to NHWC ordered input tensor.
+     * @param[out] output Pointer to NHWC ordered output tensor.
+     * @param[in]  col_stride Stride between columns of the weights, inputs and output tensors.
+     */
+    DepthwiseConvolution(
+      int n_batches, int n_input_rows, int n_input_cols,
+      int n_channels,
+      unsigned int padding_top,
+      unsigned int padding_left,
+      unsigned int padding_bottom,
+      unsigned int padding_right,
+      const TIn* const weights,
+      const TIn* const input,
+      TOut* const output,
+      const int col_stride
+    ) : DepthwiseConvolution(
+      n_batches, n_input_rows, n_input_cols, n_channels,
+      padding_top, padding_left, padding_bottom, padding_right,
+      weights, input, output,
+      col_stride, 0,    /* Weight row stride = default */
+      col_stride, 0, 0, /* Input row stride, batch stride = default */
+      col_stride, 0, 0  /* Output row stride, batch stride = default */
+    )
+    {
+    }
+
     /** Create a new depthwise convolution engine.
      *
-     * @param[in] n_batches Number of batches tensors.
-     * @param[in] n_input_rows Number of rows in input tensor.
-     * @param[in] n_input_cols Number of columns in input tensor.
-     * @param[in] n_channels Number of channels in input and output tensors.
-     * @param[in] padding_same True if padding is SAME, else VALID.
-     * @param[in] weights Pointer to Height x Width x Channel ordered weights.
-     * @param[in] input Pointer to NHWC ordered input tensor.
-     * @param[output] output Pointer to NHWC ordered output tensor.
-     * @param[in] weight_col_stride Stride between columns of the weights (if 0, defaults appropriately).
-     * @param[in] weight_row_stride Stride between rows of the weights (if 0, defaults appropriately).
-     * @param[in] input_col_stride Stride between columns of the input tensor (if 0, defaults appropriately).
-     * @param[in] input_row_stride Stride between rows of the input tensor (if 0, defaults appropriately).
-     * @param[in] input_batch_stride Stride between batches of the input tensor (if 0, defaults appropriately).
-     * @param[in] output_col_stride Stride between columns of the output tensor (if 0, defaults appropriately).
-     * @param[in] output_row_stride Stride between rows of the output tensor (if 0, defaults appropriately).
-     * @param[in] output_batch_stride Stride between batches of the output tensor (if 0, defaults appropriately).
+     * @param[in]  n_batches Number of batches tensors.
+     * @param[in]  n_input_rows Number of rows in input tensor.
+     * @param[in]  n_input_cols Number of columns in input tensor.
+     * @param[in]  n_channels Number of channels in input and output tensors.
+     * @param[in]  padding_same True if padding is SAME, else VALID.
+     * @param[in]  weights Pointer to Height x Width x Channel ordered weights.
+     * @param[in]  input Pointer to NHWC ordered input tensor.
+     * @param[out] output Pointer to NHWC ordered output tensor.
+     * @param[in]  weight_col_stride Stride between columns of the weights (if 0, defaults appropriately).
+     * @param[in]  weight_row_stride Stride between rows of the weights (if 0, defaults appropriately).
+     * @param[in]  input_col_stride Stride between columns of the input tensor (if 0, defaults appropriately).
+     * @param[in]  input_row_stride Stride between rows of the input tensor (if 0, defaults appropriately).
+     * @param[in]  input_batch_stride Stride between batches of the input tensor (if 0, defaults appropriately).
+     * @param[in]  output_col_stride Stride between columns of the output tensor (if 0, defaults appropriately).
+     * @param[in]  output_row_stride Stride between rows of the output tensor (if 0, defaults appropriately).
+     * @param[in]  output_batch_stride Stride between batches of the output tensor (if 0, defaults appropriately).
      */
     DepthwiseConvolution(
-      const int n_batches, const int n_input_rows, const int n_input_cols,
-      const int n_channels, const bool padding_same,
+      int n_batches, int n_input_rows, int n_input_cols,
+      int n_channels, bool padding_same,
       const TIn* const weights,
       const TIn* const input,
       TOut* const output,
-      const int weight_col_stride,
-      const int weight_row_stride,
-      const int input_col_stride,
-      const int input_row_stride,
-      const int input_batch_stride,
-      const int output_col_stride,
-      const int output_row_stride,
-      const int output_batch_stride
+      int weight_col_stride,
+      int weight_row_stride,
+      int input_col_stride,
+      int input_row_stride,
+      int input_batch_stride,
+      int output_col_stride,
+      int output_row_stride,
+      int output_batch_stride
+    );
+
+    /** Create a new depthwise convolution engine.
+     *
+     * @param[in]  n_batches Number of batches tensors.
+     * @param[in]  n_input_rows Number of rows in input tensor.
+     * @param[in]  n_input_cols Number of columns in input tensor.
+     * @param[in]  n_channels Number of channels in input and output tensors.
+     * @param[in]  padding_top Padding to apply to top of input.
+     * @param[in]  padding_left Padding to apply to left of input.
+     * @param[in]  padding_bottom Padding to apply to bottom of input.
+     * @param[in]  padding_right Padding to apply to right of input.
+     * @param[in]  weights Pointer to Height x Width x Channel ordered weights.
+     * @param[in]  input Pointer to NHWC ordered input tensor.
+     * @param[out] output Pointer to NHWC ordered output tensor.
+     * @param[in]  weight_col_stride Stride between columns of the weights (if 0, defaults appropriately).
+     * @param[in]  weight_row_stride Stride between rows of the weights (if 0, defaults appropriately).
+     * @param[in]  input_col_stride Stride between columns of the input tensor (if 0, defaults appropriately).
+     * @param[in]  input_row_stride Stride between rows of the input tensor (if 0, defaults appropriately).
+     * @param[in]  input_batch_stride Stride between batches of the input tensor (if 0, defaults appropriately).
+     * @param[in]  output_col_stride Stride between columns of the output tensor (if 0, defaults appropriately).
+     * @param[in]  output_row_stride Stride between rows of the output tensor (if 0, defaults appropriately).
+     * @param[in]  output_batch_stride Stride between batches of the output tensor (if 0, defaults appropriately).
+     */
+    DepthwiseConvolution(
+      int n_batches, int n_input_rows, int n_input_cols,
+      int n_channels,
+      unsigned int padding_top,
+      unsigned int padding_left,
+      unsigned int padding_bottom,
+      unsigned int padding_right,
+      const TIn* const weights,
+      const TIn* const input,
+      TOut* const output,
+      int weight_col_stride,
+      int weight_row_stride,
+      int input_col_stride,
+      int input_row_stride,
+      int input_batch_stride,
+      int output_col_stride,
+      int output_row_stride,
+      int output_batch_stride
     );
 
     // Cannot copy or move a DepthwiseConvolution.
@@ -160,14 +277,19 @@ class DepthwiseConvolution : public IDepthwiseConvolution
      * @param[in] dim_size Number of elements in the dimension (rows/columns)
      * @param[in] same_padding True if the padding is SAME, otherwise false.
      */
-    static int get_output_size(const int dim_size, const bool padding_same);
+    static int get_output_size(int dim_size, bool padding_same);
+    static int get_output_size(
+      int dim_size,
+      unsigned int padding_before,
+      unsigned int padding_after
+    );
 
     /** Get the number of output rows/columns.
      *
      * @param[in] dim_size Number of elements in the dimension (rows/columns)
      * @param[in] same_padding True if the padding is SAME, otherwise false.
      */
-    int output_size(const int dim_size, const bool padding_same) const override
+    int output_size(int dim_size, bool padding_same) const override
     {
       return DepthwiseConvolution<
         OutputTileRows,
@@ -180,6 +302,23 @@ class DepthwiseConvolution : public IDepthwiseConvolution
       >::get_output_size(dim_size, padding_same);
     }
 
+    int output_size(
+        int dim_size,
+        unsigned int padding_before,
+        unsigned int padding_after
+    ) const override
+    {
+      return DepthwiseConvolution<
+        OutputTileRows,
+        OutputTileCols,
+        KernelRows,
+        KernelCols,
+        StrideRows,
+        StrideCols,
+        TIn, TOut
+      >::get_output_size(dim_size, padding_before, padding_after);
+    }
+
     /** Sets quantization offsets
      *
      * @param[in] input_offset   Input offset
@@ -198,31 +337,31 @@ class DepthwiseConvolution : public IDepthwiseConvolution
      * @param[in] start Start of the window of work to perform.
      * @param[in] stop End of the work to perform.
      */
-    void run(const unsigned int start, const unsigned int stop) override;
+    void run(unsigned int start, unsigned int stop) override;
 
   protected:
     /** Process a tile-row of the tensors.
      */
     static void process_tile_row(
-      const int n_channels,
+      int n_channels,
       const TIn* const weights,
       const int weight_row_stride,
       const int weight_col_stride,
       const TIn* const inptr,
-      const int in_row_stride,
-      const int in_col_stride,
+      int in_row_stride,
+      int in_col_stride,
       TOut* const outptr,
-      const int out_row_stride,
-      const int out_col_stride,
-      const int row_pad_in_top,
-      const int row_pad_in_left,
-      const int row_pad_in_bottom,
-      const int row_pad_out_bottom,
-      const int n_tiles,
-      const int n_input_cols,
-      const int n_output_cols,
-      const int input_offset,
-      const int weights_offset
+      int out_row_stride,
+      int out_col_stride,
+      int row_pad_in_top,
+      int row_pad_in_left,
+      int row_pad_in_bottom,
+      int row_pad_out_bottom,
+      int n_tiles,
+      int n_input_cols,
+      int n_output_cols,
+      int input_offset,
+      int weights_offset
     );
 
     // Determine the maximum (and minimum) padding values which can be applied
@@ -267,24 +406,24 @@ class DepthwiseConvolution : public IDepthwiseConvolution
      * @param[in] _out_pad_right Null cells at right of output tile.
      */
     typedef void (*TileFn)(
-      const int n_channels,
+      int n_channels,
       const TIn* const weights,
-      const int weight_row_stride,
-      const int weight_col_stride,
+      int weight_row_stride,
+      int weight_col_stride,
       const TIn* const inptr,
-      const int in_row_stride,
-      const int in_col_stride,
+      int in_row_stride,
+      int in_col_stride,
       TOut* const outptr,
-      const int out_row_stride,
-      const int out_col_stride,
-      const int _in_pad_top,
-      const int _in_pad_left,
-      const int _in_pad_bottom,
-      const int _in_pad_right,
-      const int _out_pad_bottom,
-      const int _out_pad_right,
-      const int _input_offset,
-      const int _weights_offset
+      int out_row_stride,
+      int out_col_stride,
+      int _in_pad_top,
+      int _in_pad_left,
+      int _in_pad_bottom,
+      int _in_pad_right,
+      int _out_pad_bottom,
+      int _out_pad_right,
+      int _input_offset,
+      int _weights_offset
     );
 
     /* Arrays of methods to process tensor tiles.
@@ -306,7 +445,7 @@ class DepthwiseConvolution : public IDepthwiseConvolution
     TOut* const _output;
     const int _n_batches, _n_input_rows, _n_input_cols, _n_channels,
               _n_output_rows, _n_output_cols, _n_tile_rows, _n_tile_cols;
-    const bool _padding_same;
+    const unsigned int _padding_top, _padding_left, _padding_bottom, _padding_right;
 
     // Stride information for a convolution instance
     const int _weight_col_stride, _weight_row_stride;
diff --git a/arm_compute/core/NEON/kernels/convolution/depthwise/impl_base.hpp b/arm_compute/core/NEON/kernels/convolution/depthwise/impl_base.hpp
index e262817a3c..b33f2768ad 100644
--- a/arm_compute/core/NEON/kernels/convolution/depthwise/impl_base.hpp
+++ b/arm_compute/core/NEON/kernels/convolution/depthwise/impl_base.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -41,6 +41,24 @@ namespace depthwise
 
 const unsigned int CHANNEL_BLOCK = 16;
 
+namespace
+{
+  inline int pad_along_dim(
+    const bool padding_same,
+    const int kernel_dim,
+    const int stride_dim,
+    const int input_dim
+  )
+  {
+    if (!padding_same)
+      return 0;
+    if (input_dim % stride_dim)
+      return std::max(kernel_dim - (input_dim % stride_dim), 0);
+    else
+      return std::max(kernel_dim - stride_dim, 0);
+  }
+}  // namespace
+
 template <int OTR, int OTC, int KR, int KC, int SR, int SC, typename TIn, typename TOut>
 int DepthwiseConvolution<OTR, OTC, KR, KC, SR, SC, TIn, TOut>::get_output_size(
   const int dim_size, const bool same_padding
@@ -49,6 +67,13 @@ int DepthwiseConvolution<OTR, OTC, KR, KC, SR, SC, TIn, TOut>::get_output_size(
   return iceildiv(dim_size - (same_padding ? 0 : (KC - 1)), SR);
 }
 
+template <int OTR, int OTC, int KR, int KC, int SR, int SC, typename TIn, typename TOut>
+int DepthwiseConvolution<OTR, OTC, KR, KC, SR, SC, TIn, TOut>::get_output_size(
+  const int dim_size, const unsigned int padding_before, const unsigned int padding_after
+)
+{
+  return iceildiv(dim_size + padding_before + padding_after - KR + 1, SR);
+}
 
 template <int OTR, int OTC, int KR, int KC, int SR, int SC, typename TIn, typename TOut>
 DepthwiseConvolution<OTR, OTC, KR, KC, SR, SC, TIn, TOut>::DepthwiseConvolution(
@@ -65,16 +90,54 @@ DepthwiseConvolution<OTR, OTC, KR, KC, SR, SC, TIn, TOut>::DepthwiseConvolution(
   const int output_col_stride,
   const int output_row_stride,
   const int output_batch_stride
+) : DepthwiseConvolution<OTR, OTC, KR, KC, SR, SC, TIn, TOut>(
+  n_batches, n_input_rows, n_input_cols,
+  n_channels,
+  pad_along_dim(padding_same, KR, SR, n_input_rows) / 2,  /* top padding */
+  pad_along_dim(padding_same, KC, SC, n_input_cols) / 2,  /* left padding */
+  iceildiv(pad_along_dim(padding_same, KR, SR, n_input_rows), 2),  /* bottom padding */
+  iceildiv(pad_along_dim(padding_same, KC, SC, n_input_cols), 2),  /* right padding */
+  weights, input, output,
+  weight_col_stride, weight_row_stride,
+  input_col_stride, input_row_stride, input_batch_stride,
+  output_col_stride, output_row_stride, output_batch_stride
+)
+{
+}
+
+
+template <int OTR, int OTC, int KR, int KC, int SR, int SC, typename TIn, typename TOut>
+DepthwiseConvolution<OTR, OTC, KR, KC, SR, SC, TIn, TOut>::DepthwiseConvolution(
+  const int n_batches, const int n_input_rows, const int n_input_cols,
+  const int n_channels,
+  const unsigned int padding_top,
+  const unsigned int padding_left,
+  const unsigned int padding_bottom,
+  const unsigned int padding_right,
+  const TIn* const weights,
+  const TIn* const input,
+  TOut* const output,
+  const int weight_col_stride,
+  const int weight_row_stride,
+  const int input_col_stride,
+  const int input_row_stride,
+  const int input_batch_stride,
+  const int output_col_stride,
+  const int output_row_stride,
+  const int output_batch_stride
 ) : _weights(weights), _input(input), _output(output),
     _n_batches(n_batches),
     _n_input_rows(n_input_rows),
     _n_input_cols(n_input_cols),
     _n_channels(n_channels),
-    _n_output_rows(get_output_size(n_input_rows, padding_same)),
-    _n_output_cols(get_output_size(n_input_cols, padding_same)),
+    _n_output_rows(get_output_size(n_input_rows, padding_top, padding_bottom)),
+    _n_output_cols(get_output_size(n_input_cols, padding_left, padding_right)),
     _n_tile_rows(iceildiv(_n_output_rows, output_tile_rows)),
     _n_tile_cols(iceildiv(_n_output_cols, output_tile_cols)),
-    _padding_same(padding_same),
+    _padding_top(padding_top),
+    _padding_left(padding_left),
+    _padding_bottom(padding_bottom),
+    _padding_right(padding_right),
     _weight_col_stride(weight_col_stride ? weight_col_stride : _n_channels),
     _weight_row_stride(weight_row_stride ? weight_row_stride : KC * _weight_col_stride),
     _input_col_stride(input_col_stride ? input_col_stride : _n_channels),
@@ -113,10 +176,8 @@ void DepthwiseConvolution<OTR, OTC, KR, KC, SR, SC, TIn, TOut>::run(
   const auto stop_channel = std::min<unsigned int>(_n_channels, CHANNEL_BLOCK * stop);
 
   // Compute top and bottom padding for input and output
-  const int input_pad_top = _padding_same ?
-    ((_n_output_rows - 1)*stride_rows + kernel_rows - _n_input_rows) / 2 : 0;
-  const int input_pad_left = _padding_same ?
-    ((_n_output_cols - 1)*stride_cols + kernel_cols - _n_input_cols) / 2 : 0;
+  const int input_pad_top = _padding_top;
+  const int input_pad_left = _padding_left;
   constexpr int tile_overlap = kernel_rows - stride_rows;
 
   // Perform the convolution by calling `process_tile_row` for each tile row in
-- 
cgit v1.2.1