From 4074c995d2a88684fd4a9d1aa36d51de56bb8dab Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Tue, 30 Jan 2018 18:13:46 +0000 Subject: COMPMID-873: Integrate RSH NEON Depthwise Convolution routine Change-Id: Ida1e9a836bc518bfe5563e16bf7f92bde5fc13f7 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/118472 Tested-by: Jenkins Reviewed-by: Pablo Tello --- .../kernels/convolution/depthwise/depthwise.hpp | 209 +++++++++++++++++++++ 1 file changed, 209 insertions(+) create mode 100644 arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp (limited to 'arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp') diff --git a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp b/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp new file mode 100644 index 0000000000..80b0614015 --- /dev/null +++ b/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once + +namespace depthwise +{ + +class IDepthwiseConvolution +{ +public: + virtual ~IDepthwiseConvolution() = default; + virtual int output_size(const int dim_size, const bool padding_same) const = 0; + virtual unsigned int get_window(void) const = 0; + virtual void run(const unsigned int start, const unsigned int stop) = 0; +}; + +template < + int OutputTileRows, + int OutputTileCols, + int KernelRows, + int KernelCols, + int StrideRows, + int StrideCols, + typename TIn, + typename TOut +> +class DepthwiseConvolution : public IDepthwiseConvolution +{ + public: + typedef TIn InputType; + typedef TOut OutputType; + + // Information about the specific convolution instance + static constexpr int output_tile_rows = OutputTileRows; + static constexpr int output_tile_cols = OutputTileCols; + static constexpr int kernel_rows = KernelRows; + static constexpr int kernel_cols = KernelCols; + static constexpr int stride_rows = StrideRows; + static constexpr int stride_cols = StrideCols; + static constexpr int inner_tile_rows = stride_rows * output_tile_rows + kernel_rows - 1; + static constexpr int inner_tile_cols = stride_cols * output_tile_cols + kernel_cols - 1; + + /** Create a new depthwise convolution engine. + * + * @param[in] n_batches Number of batches tensors. + * @param[in] n_input_rows Number of rows in input tensor. + * @param[in] n_input_cols Number of columns in input tensor. + * @param[in] n_channels Number of channels in input and output tensors. + * @param[in] padding_same True if padding is SAME, else VALID. + * @param[in] weights Pointer to Height x Width x Channel ordered weights. + * @param[in] input Pointer to NHWC ordered input tensor. + * @param[output] output Pointer to NHWC ordered output tensor. + */ + DepthwiseConvolution( + const int n_batches, const int n_input_rows, const int n_input_cols, + const int n_channels, const bool padding_same, + const TIn* const weights, + const TIn* const input, + TOut* const output + ); + + // Cannot copy or move a DepthwiseConvolution. + DepthwiseConvolution(DepthwiseConvolution&) = delete; + DepthwiseConvolution operator=(DepthwiseConvolution&) = delete; + + /** Get the number of output rows/columns. + * + * @param[in] dim_size Number of elements in the dimension (rows/columns) + * @param[in] same_padding True if the padding is SAME, otherwise false. + */ + static int get_output_size(const int dim_size, const bool padding_same); + + /** Get the number of output rows/columns. + * + * @param[in] dim_size Number of elements in the dimension (rows/columns) + * @param[in] same_padding True if the padding is SAME, otherwise false. + */ + int output_size(const int dim_size, const bool padding_same) const override + { + return DepthwiseConvolution::get_output_size(dim_size, padding_same); + } + + /** Get the window of work to be performed by an instance of the operator. + */ + unsigned int get_window(void) const override; + + /** Perform a portion of the work associated with the operator. + * + * Will perform the window of work described by $[start, stop)$. + * + * @param[in] start Start of the window of work to perform. + * @param[in] stop End of the work to perform. + */ + void run(const unsigned int start, const unsigned int stop) override; + + protected: + /** Process a tile-row of the tensors. + */ + static void process_tile_row( + const int n_channels, + const TIn* const weights, + const TIn* const inptr, + const int in_row_stride, + const int in_col_stride, + TOut* const outptr, + const int out_row_stride, + const int out_col_stride, + const int row_pad_in_top, + const int row_pad_in_left, + const int row_pad_in_bottom, + const int row_pad_out_bottom, + const int n_tiles, + const int n_input_cols, + const int n_output_cols + ); + + /** Process a single tile of the tensors. + * + * @param[in] n_channels Number of channels. + * @param[in] weights Pointer to Height x Width x Channels ordered weights. + * @param[in] inptr Pointer to the top-left unpadded value of the tile. + * @param[in] in_row_stride Stride between rows of the input tensor. + * @param[in] in_col_stride Stride between columns of the input tensor. + * @param[out] outptr Pointer to the top-left output value for the tile. + * @param[in] out_row_stride Stride between rows of the output tensor. + * @param[in] out_col_stride Stride between columns of the output tensor. + */ + template < + int in_pad_top, int in_pad_left, int in_pad_bottom, int in_pad_right, + int out_pad_bottom, int out_pad_right + > + static void process_tile( + const int n_channels, + const TIn* const weights, + const TIn* const inptr, + const int in_row_stride, + const int in_col_stride, + TOut* const outptr, + const int out_row_stride, + const int out_col_stride + ); + + // Type of a pointer to a `process_tile` instance + typedef void (*TileFn)( + const int, + const TIn* const, + const TIn* const, const int, const int, + TOut* const, const int, const int + ); + + // Determine the maximum padding values which can be applied to tiles of + // the tensors involved in this class of convolution. + static constexpr int max_in_pad_top = 2; + static constexpr int max_in_pad_left = 2; + static constexpr int max_in_pad_bottom = inner_tile_rows - 1; + static constexpr int max_in_pad_right = inner_tile_cols - 1; + static constexpr int max_out_pad_bottom = output_tile_rows; + static constexpr int max_out_pad_right = output_tile_cols; + + /** Array of methods to process tensor tiles. + * + * Allows dynamic dispatch to specialized implementations based on + * different padding configurations. + */ + static const TileFn tile_fns[ + max_in_pad_top][max_in_pad_left][max_in_pad_bottom][max_in_pad_right][ + max_out_pad_bottom][max_out_pad_right + ]; + + private: + // Member variables of instances of a convolution engine. + const TIn* const _weights; + const TIn* const _input; + TOut* const _output; + const int _n_batches, _n_input_rows, _n_input_cols, _n_channels, + _n_output_rows, _n_output_cols, _n_tile_rows, _n_tile_cols; + const bool _padding_same; +}; + +} // namespace depthwise -- cgit v1.2.1