From 4074c995d2a88684fd4a9d1aa36d51de56bb8dab Mon Sep 17 00:00:00 2001
From: Georgios Pinitas <georgios.pinitas@arm.com>
Date: Tue, 30 Jan 2018 18:13:46 +0000
Subject: COMPMID-873: Integrate RSH NEON Depthwise Convolution routine

Change-Id: Ida1e9a836bc518bfe5563e16bf7f92bde5fc13f7
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/118472
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Pablo Tello <pablo.tello@arm.com>
---
 .../kernels/convolution/depthwise/depthwise.hpp    | 209 +++++++++++++++++++++
 1 file changed, 209 insertions(+)
 create mode 100644 arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp

(limited to 'arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp')

diff --git a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp b/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp
new file mode 100644
index 0000000000..80b0614015
--- /dev/null
+++ b/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#pragma once
+
+namespace depthwise
+{
+
+class IDepthwiseConvolution
+{
+public:
+    virtual ~IDepthwiseConvolution() = default;
+    virtual int output_size(const int dim_size, const bool padding_same) const = 0;
+    virtual unsigned int get_window(void) const = 0;
+    virtual void run(const unsigned int start, const unsigned int stop) = 0;
+};
+
+template <
+  int OutputTileRows,
+  int OutputTileCols,
+  int KernelRows,
+  int KernelCols,
+  int StrideRows,
+  int StrideCols,
+  typename TIn,
+  typename TOut
+>
+class DepthwiseConvolution : public IDepthwiseConvolution
+{
+  public:
+    typedef TIn InputType;
+    typedef TOut OutputType;
+
+    // Information about the specific convolution instance
+    static constexpr int output_tile_rows = OutputTileRows;
+    static constexpr int output_tile_cols = OutputTileCols;
+    static constexpr int kernel_rows = KernelRows;
+    static constexpr int kernel_cols = KernelCols;
+    static constexpr int stride_rows = StrideRows;
+    static constexpr int stride_cols = StrideCols;
+    static constexpr int inner_tile_rows = stride_rows * output_tile_rows + kernel_rows - 1;
+    static constexpr int inner_tile_cols = stride_cols * output_tile_cols + kernel_cols - 1;
+
+    /** Create a new depthwise convolution engine.
+     *
+     * @param[in] n_batches Number of batches tensors.
+     * @param[in] n_input_rows Number of rows in input tensor.
+     * @param[in] n_input_cols Number of columns in input tensor.
+     * @param[in] n_channels Number of channels in input and output tensors.
+     * @param[in] padding_same True if padding is SAME, else VALID.
+     * @param[in] weights Pointer to Height x Width x Channel ordered weights.
+     * @param[in] input Pointer to NHWC ordered input tensor.
+     * @param[output] output Pointer to NHWC ordered output tensor.
+     */
+    DepthwiseConvolution(
+      const int n_batches, const int n_input_rows, const int n_input_cols,
+      const int n_channels, const bool padding_same,
+      const TIn* const weights,
+      const TIn* const input,
+      TOut* const output
+    );
+
+    // Cannot copy or move a DepthwiseConvolution.
+    DepthwiseConvolution(DepthwiseConvolution&) = delete;
+    DepthwiseConvolution operator=(DepthwiseConvolution&) = delete;
+
+    /** Get the number of output rows/columns.
+     *
+     * @param[in] dim_size Number of elements in the dimension (rows/columns)
+     * @param[in] same_padding True if the padding is SAME, otherwise false.
+     */
+    static int get_output_size(const int dim_size, const bool padding_same);
+
+    /** Get the number of output rows/columns.
+     *
+     * @param[in] dim_size Number of elements in the dimension (rows/columns)
+     * @param[in] same_padding True if the padding is SAME, otherwise false.
+     */
+    int output_size(const int dim_size, const bool padding_same) const override
+    {
+        return DepthwiseConvolution<OutputTileRows,
+                                    OutputTileCols,
+                                    KernelRows,
+                                    KernelCols,
+                                    StrideRows,
+                                    StrideCols,
+                                    TIn,
+                                    TOut>::get_output_size(dim_size, padding_same);
+    }
+
+    /** Get the window of work to be performed by an instance of the operator.
+     */
+    unsigned int get_window(void) const override;
+
+    /** Perform a portion of the work associated with the operator.
+     *
+     * Will perform the window of work described by $[start, stop)$.
+     *
+     * @param[in] start Start of the window of work to perform.
+     * @param[in] stop End of the work to perform.
+     */
+    void run(const unsigned int start, const unsigned int stop) override;
+
+  protected:
+    /** Process a tile-row of the tensors.
+     */
+    static void process_tile_row(
+      const int n_channels,
+      const TIn* const weights,
+      const TIn* const inptr,
+      const int in_row_stride,
+      const int in_col_stride,
+      TOut* const outptr,
+      const int out_row_stride,
+      const int out_col_stride,
+      const int row_pad_in_top,
+      const int row_pad_in_left,
+      const int row_pad_in_bottom,
+      const int row_pad_out_bottom,
+      const int n_tiles,
+      const int n_input_cols,
+      const int n_output_cols
+    );
+
+    /** Process a single tile of the tensors.
+     *
+     * @param[in] n_channels Number of channels.
+     * @param[in] weights Pointer to Height x Width x Channels ordered weights.
+     * @param[in] inptr Pointer to the top-left unpadded value of the tile.
+     * @param[in] in_row_stride Stride between rows of the input tensor.
+     * @param[in] in_col_stride Stride between columns of the input tensor.
+     * @param[out] outptr Pointer to the top-left output value for the tile.
+     * @param[in] out_row_stride Stride between rows of the output tensor.
+     * @param[in] out_col_stride Stride between columns of the output tensor.
+     */
+    template <
+      int in_pad_top, int in_pad_left, int in_pad_bottom, int in_pad_right,
+      int out_pad_bottom, int out_pad_right
+    >
+    static void process_tile(
+      const int n_channels,
+      const TIn* const weights,
+      const TIn* const inptr,
+      const int in_row_stride,
+      const int in_col_stride,
+      TOut* const outptr,
+      const int out_row_stride,
+      const int out_col_stride
+    );
+
+    // Type of a pointer to a `process_tile` instance
+    typedef void (*TileFn)(
+      const int,
+      const TIn* const,
+      const TIn* const, const int, const int,
+      TOut* const, const int, const int
+    );
+
+    // Determine the maximum padding values which can be applied to tiles of
+    // the tensors involved in this class of convolution.
+    static constexpr int max_in_pad_top = 2;
+    static constexpr int max_in_pad_left = 2;
+    static constexpr int max_in_pad_bottom = inner_tile_rows - 1;
+    static constexpr int max_in_pad_right = inner_tile_cols - 1;
+    static constexpr int max_out_pad_bottom = output_tile_rows;
+    static constexpr int max_out_pad_right = output_tile_cols;
+
+    /** Array of methods to process tensor tiles.
+     *
+     * Allows dynamic dispatch to specialized implementations based on
+     * different padding configurations.
+     */
+    static const TileFn tile_fns[
+      max_in_pad_top][max_in_pad_left][max_in_pad_bottom][max_in_pad_right][
+      max_out_pad_bottom][max_out_pad_right
+    ];
+
+  private:
+    // Member variables of instances of a convolution engine.
+    const TIn* const _weights;
+    const TIn* const _input;
+    TOut* const _output;
+    const int _n_batches, _n_input_rows, _n_input_cols, _n_channels,
+              _n_output_rows, _n_output_cols, _n_tile_rows, _n_tile_cols;
+    const bool _padding_same;
+};
+
+}  // namespace depthwise
-- 
cgit v1.2.1