From eb027e933758b1e749f0f6bd2817ee8979ef903c Mon Sep 17 00:00:00 2001 From: Pablo Tello Date: Tue, 25 Sep 2018 16:01:35 +0100 Subject: COMPMID-1600: Reduce number of tile specialisations. Change-Id: I4d06eca9404ea6d3df9d0ca52f5d6f5421ab7116 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/150117 Tested-by: bsgcomp Reviewed-by: Georgios Pinitas --- .../convolution/winograd/transforms/input.hpp | 98 ++++++++++++++-- .../winograd/winograd_input_transform.hpp | 127 +++++++++++++++++---- 2 files changed, 197 insertions(+), 28 deletions(-) (limited to 'arm_compute/core/NEON') diff --git a/arm_compute/core/NEON/kernels/convolution/winograd/transforms/input.hpp b/arm_compute/core/NEON/kernels/convolution/winograd/transforms/input.hpp index 473a13c3b0..b813bbb25c 100644 --- a/arm_compute/core/NEON/kernels/convolution/winograd/transforms/input.hpp +++ b/arm_compute/core/NEON/kernels/convolution/winograd/transforms/input.hpp @@ -155,15 +155,15 @@ namespace winograd T* const outptr = matrix_base + tile_j*matrix_row_stride; // Apply the specific tile processing function - const int f_pad_top = iceildiv(pad_top, 2); - const int f_pad_left = iceildiv(t_pad_left, 2); - tile_fns[f_pad_top][f_pad_left][pad_bottom][t_pad_right]( + const typename Tiles::TileFn tilefn = Tiles::get_tile_specialization( + pad_top, t_pad_left, pad_bottom, t_pad_right + ); + + tilefn( n_channels, - input_base_col, - input_row_stride, - input_col_stride, - outptr, - matrix_stride + input_base_col, input_row_stride, input_col_stride, + outptr, matrix_stride, + pad_top, t_pad_left, pad_bottom, t_pad_right ); } } @@ -264,4 +264,86 @@ namespace winograd matrix_stride, matrix_batch_stride, matrix_row_stride ); } + + template + typename InputTransformImplTiles::TileFn + InputTransformImplTiles:: + get_tile_specialization( + const int pad_top, + const int pad_left, + const int pad_bottom, + const int pad_right + ) + { + if (!(pad_top || pad_left || pad_bottom || pad_right)) + { + // No padding, return unpadded specialisation + return tilefn_unpadded; + } + else if (pad_top && !(pad_left || pad_bottom || pad_right)) + { + // Top padding only + const int index = (pad_top - min_pad_top) / (InnerTileRows - overlap_rows); + return tilefn_top_padded[index]; + } + else if (!(pad_top) && pad_left && !(pad_bottom || pad_right)) + { + // Left padding only + const int index = (pad_left - min_pad_left) / (InnerTileCols - overlap_cols); + return tilefn_left_padded[index]; + } + else if (!(pad_top || pad_left) && pad_bottom && !(pad_right)) + { + // Bottom padding only + return tilefn_bottom_padded[pad_bottom - 1]; + } + else if (!(pad_top || pad_left || pad_bottom) && pad_right) + { + // Right padding only + return tilefn_right_padded[pad_right - 1]; + } + else + { + // Combination of paddings, return an unspecialised method + return tilefn_generic; + } + } + + template + typename InputTransformImplTiles<1, KernelCols, 1, InnerTileCols, T>::TileFn + InputTransformImplTiles<1, KernelCols, 1, InnerTileCols, T>:: + get_tile_specialization( + const int pad_top, + const int pad_left, + const int pad_bottom, + const int pad_right + ) + { + (void) pad_top; + (void) pad_bottom; + + if (!(pad_left || pad_right)) + { + // No padding, return unpadded specialisation + return tilefn_unpadded; + } + else if (pad_left && !pad_right) + { + // Left padding only + const int index = (pad_left - min_pad_left) / (InnerTileCols - overlap_cols); + return tilefn_left_padded[index]; + } + else if (!pad_left && pad_right) + { + // Right padding only + return tilefn_right_padded[pad_right - 1]; + } + else + { + // Combination of paddings, return an unspecialised method + return tilefn_generic; + } + } } + + diff --git a/arm_compute/core/NEON/kernels/convolution/winograd/winograd_input_transform.hpp b/arm_compute/core/NEON/kernels/convolution/winograd/winograd_input_transform.hpp index abcda53534..995554d7f2 100644 --- a/arm_compute/core/NEON/kernels/convolution/winograd/winograd_input_transform.hpp +++ b/arm_compute/core/NEON/kernels/convolution/winograd/winograd_input_transform.hpp @@ -30,6 +30,109 @@ namespace winograd namespace { +template +class InputTransformImplTiles +{ + public: + /** Method to transform a tile of the input tensor into the Winograd domain. */ + typedef void (*TileFn)( + const int n_channels, /** @param[in] Number of channels in the tensor. */ + const T* const inptr_base, /** @param[in] Pointer to the base of the input tile. */ + const int input_row_stride, /** @param[in] Stride between rows of the input tensor. */ + const int input_col_stride, /** @param[in] Stride between columns of the input tensor. */ + T* const mptr_base, /** @param[out] Base pointer to transformed input matrices. */ + const int matrix_stride, /** @param[in] Stride between matrices in the input space. */ + const int _pad_top, /** @param[in] Top padding for unspecialised tiles. */ + const int _pad_left, /** @param[in] Left padding for unspecialised tiles. */ + const int _pad_bottom, /** @param[in] Bottom padding for unspecialised tiles. */ + const int _pad_right /** @param[in] Right padding for unspecialised tiles. */ + ); + + static TileFn get_tile_specialization( + const int pad_top, + const int pad_left, + const int pad_bottom, + const int pad_right + ); + + // Tile overlaps + static constexpr int overlap_rows = KernelRows - 1; + static constexpr int overlap_cols = KernelCols - 1; + + private: + + // Maximum padding and number of distinct paddings + static constexpr int max_pad_top = KernelRows / 2; + static constexpr int min_pad_top = KernelRows % (InnerTileRows - overlap_rows); + static constexpr int n_pad_top = iceildiv(max_pad_top, InnerTileRows - overlap_rows); + + static constexpr int max_pad_left = KernelCols / 2; + static constexpr int min_pad_left = KernelCols % (InnerTileCols - overlap_cols); + static constexpr int n_pad_left = iceildiv(max_pad_left, InnerTileCols - overlap_cols); + + static constexpr int n_pad_bottom = InnerTileRows; + static constexpr int n_pad_right = InnerTileCols; + + // Pointers to methods implementing a generically padded tile and a totally unpadded tile. + static const TileFn tilefn_generic; /** Generic tile processing function. */ + static const TileFn tilefn_unpadded; /** Tile processor for unpadded tiles. */ + + // Arrays of methods covering tiles which are padded only on a single side. + static const TileFn tilefn_top_padded[n_pad_top]; + static const TileFn tilefn_left_padded[n_pad_left]; + static const TileFn tilefn_bottom_padded[n_pad_bottom]; + static const TileFn tilefn_right_padded[n_pad_right]; +}; + + +template < int KernelCols, int InnerTileCols, typename T> +class InputTransformImplTiles<1, KernelCols, 1, InnerTileCols, T> +{ + public: + /** Method to transform a tile of the input tensor into the Winograd domain. */ + typedef void (*TileFn)( + const int n_channels, /** @param[in] Number of channels in the tensor. */ + const T* const inptr_base, /** @param[in] Pointer to the base of the input tile. */ + const int input_row_stride, /** @param[in] Stride between rows of the input tensor. */ + const int input_col_stride, /** @param[in] Stride between columns of the input tensor. */ + T* const mptr_base, /** @param[out] Base pointer to transformed input matrices. */ + const int matrix_stride, /** @param[in] Stride between matrices in the input space. */ + const int _pad_top, /** @param[in] Top padding for unspecialised tiles. */ + const int _pad_left, /** @param[in] Left padding for unspecialised tiles. */ + const int _pad_bottom, /** @param[in] Bottom padding for unspecialised tiles. */ + const int _pad_right /** @param[in] Right padding for unspecialised tiles. */ + ); + + static TileFn get_tile_specialization( + const int pad_top, + const int pad_left, + const int pad_bottom, + const int pad_right + ); + + // Tile overlaps + static constexpr int overlap_rows = 0; + static constexpr int overlap_cols = KernelCols - 1; + + private: + // Maximum padding and number of distinct paddings + static constexpr int max_pad_left = KernelCols / 2; + static constexpr int min_pad_left = KernelCols % (InnerTileCols - overlap_cols); + static constexpr int n_pad_left = iceildiv(max_pad_left, InnerTileCols - overlap_cols); + + static constexpr int n_pad_right = InnerTileCols; + + // Pointers to methods implementing a generically padded tile and a totally unpadded tile. + static const TileFn tilefn_generic; /** Generic tile processing function. */ + static const TileFn tilefn_unpadded; /** Tile processor for unpadded tiles. */ + + // Arrays of methods covering tiles which are padded only on a single side. + static const TileFn tilefn_left_padded[n_pad_left]; + static const TileFn tilefn_right_padded[n_pad_right]; +}; + + + template class InputTransformImpl { @@ -69,29 +172,13 @@ class InputTransformImpl const int n_cols ); - // Tile overlaps - static constexpr int overlap_rows = KernelRows - 1; - static constexpr int overlap_cols = KernelCols - 1; - - // Maximum padding and number of distinct paddings - static constexpr int max_pad_top = KernelRows / 2; - static constexpr int n_pad_top = 1 + iceildiv(max_pad_top, InnerTileRows - overlap_rows); + using Tiles = InputTransformImplTiles; - static constexpr int max_pad_left = KernelCols / 2; - static constexpr int n_pad_left = 1 + iceildiv(max_pad_left, InnerTileCols - overlap_cols); + static constexpr int overlap_rows = Tiles::overlap_rows; + static constexpr int overlap_cols = Tiles::overlap_cols; - static constexpr int n_pad_bottom = InnerTileRows; - static constexpr int n_pad_right = InnerTileCols; - - /** Process a single tile of the input tensor. */ - template - static void process_tile(int, const T*, int, int, T*, int); - // Array of methods to transform tiles of the input tensor. - typedef void (*TileFn)(int, const T*, int, int, T*, int); - static const TileFn - tile_fns[n_pad_top][n_pad_left][n_pad_bottom][n_pad_right]; -}; + }; template -- cgit v1.2.1