From bda6e4b51bc4045c97100bb9d562164ba7c6c28f Mon Sep 17 00:00:00 2001 From: Pablo Tello Date: Wed, 22 Aug 2018 11:40:33 +0100 Subject: COMPMID-1247:Integrate kernel size 1x3 & 3x1 support in NEWinogradLayer. Change-Id: I6fe198881230e49864c841a3b2366ccf2a9247f9 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/145210 Tested-by: Jenkins Reviewed-by: Georgios Pinitas --- .../kernels/NEWinogradConvolutionLayerKernel.h | 28 ++++++++++++++++++---- .../core/NEON/kernels/convolution/common/utils.hpp | 2 +- .../convolution/winograd/transforms/input.hpp | 22 +++++++++++++++++ .../convolution/winograd/transforms/output.hpp | 22 +++++++++++++++++ .../kernels/convolution/winograd/winograd_gemm.hpp | 24 +++++++++++++++---- 5 files changed, 88 insertions(+), 10 deletions(-) (limited to 'arm_compute/core/NEON') diff --git a/arm_compute/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h b/arm_compute/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h index 9cdd69a70a..c71c105d92 100644 --- a/arm_compute/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h @@ -159,7 +159,7 @@ public: void run(const Window &window, const ThreadInfo &info) override; /** Winograd base kernel */ - using WinogradBase = winograd::WinogradGEMM; + using WinogradBase = winograd::WinogradGEMM; /** Winograd convolution kernel */ using WinogradConv = typename WinogradBase::template Convolution; @@ -360,6 +360,21 @@ template class INEWinogradLayerTransformWeightsKernel : public INEKernel { public: + /** Prevent instances of this class from being copied (As this class contains pointers) */ + INEWinogradLayerTransformWeightsKernel(const INEWinogradLayerTransformWeightsKernel &) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + INEWinogradLayerTransformWeightsKernel &operator=(const INEWinogradLayerTransformWeightsKernel &) = default; + /** Allow instances of this class to be moved */ + INEWinogradLayerTransformWeightsKernel(INEWinogradLayerTransformWeightsKernel &&) = default; + /** Allow instances of this class to be moved */ + INEWinogradLayerTransformWeightsKernel &operator=(INEWinogradLayerTransformWeightsKernel &&) = default; + + INEWinogradLayerTransformWeightsKernel() + { + } + virtual ~INEWinogradLayerTransformWeightsKernel() + { + } /** Determine how much memory (in units of T) to allocate for the * transformed weights. * @@ -388,9 +403,14 @@ public: virtual void configure(const ITensor *weights_hwio, ITensor *output, const int matrix_stride, const int num_output_channels, const int num_input_channels) = 0; - virtual ~INEWinogradLayerTransformWeightsKernel() - { - } + /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformWeightsKernel + * + * @param[in] input First tensor input info. Data types supported: F32. + * @param[in] weights Weights tensor info. Data types supported: same as @p input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *weights); }; /** NEON kernel to perform Winograd weights transform. */ diff --git a/arm_compute/core/NEON/kernels/convolution/common/utils.hpp b/arm_compute/core/NEON/kernels/convolution/common/utils.hpp index 5f42719119..25bfa332fb 100644 --- a/arm_compute/core/NEON/kernels/convolution/common/utils.hpp +++ b/arm_compute/core/NEON/kernels/convolution/common/utils.hpp @@ -26,7 +26,7 @@ void PrintMatrix(const float *const m, const int M, const int N, const int row_stride); -inline int iceildiv(const int a, const int b) +constexpr inline int iceildiv(const int a, const int b) { return (a + b - 1) / b; } diff --git a/arm_compute/core/NEON/kernels/convolution/winograd/transforms/input.hpp b/arm_compute/core/NEON/kernels/convolution/winograd/transforms/input.hpp index 13218030d2..369c2ff48f 100644 --- a/arm_compute/core/NEON/kernels/convolution/winograd/transforms/input.hpp +++ b/arm_compute/core/NEON/kernels/convolution/winograd/transforms/input.hpp @@ -50,6 +50,22 @@ namespace winograd const int matrix_row_stride /** Stride within matrices. */ ) { + // If an Nx1 kernel then transpose and redirect to the 1xN implementation + if (kernel_cols == 1) + { + WinogradGEMM:: + template InputTransform::execute( + input, + n_batches, in_batch_stride, + n_cols, in_col_stride, + n_rows, in_row_stride, + n_channels, padding, + tile_N, tile_M, + output, matrix_stride, matrix_batch_stride, matrix_row_stride + ); + return; + } + // Compute the padding required on each edge of the image const int pad_top = (padding == PADDING_SAME) ? (kernel_rows - 1) / 2 : 0; const int pad_left = (padding == PADDING_SAME) ? (kernel_cols - 1) / 2 : 0; @@ -111,6 +127,12 @@ namespace winograd const int n_cols ) { + if (kernel_cols == 1) + { + // If an Nx1 implementation then this should never be reached. + return; + } + constexpr int tile_overlap = kernel_cols - 1; // Loop over columns of tiles diff --git a/arm_compute/core/NEON/kernels/convolution/winograd/transforms/output.hpp b/arm_compute/core/NEON/kernels/convolution/winograd/transforms/output.hpp index 700ca76c68..6ed146bf85 100644 --- a/arm_compute/core/NEON/kernels/convolution/winograd/transforms/output.hpp +++ b/arm_compute/core/NEON/kernels/convolution/winograd/transforms/output.hpp @@ -45,6 +45,22 @@ namespace winograd T* const output ) { + // If an Nx1 kernel then transpose and redirect to the 1xN implementation. + if (kernel_cols == 1) + { + WinogradGEMM:: + template OutputTransform::execute( + n_batches, + output_batch_stride, + n_cols, output_col_stride, + n_rows, output_row_stride, + n_channels, + matrix_base, matrix_stride, matrix_row_stride, + biases, output + ); + return; + } + // Compute the number of tiles and hence the padding required on the bottom // and right of the image. const int tile_M = iceildiv(n_rows, output_tile_rows); @@ -98,6 +114,12 @@ namespace winograd const int row_pad_right ) { + if (kernel_cols == 1) + { + // If an Nx1 implementation then this should never be reached. + return; + } + // Loop over columns of tiles for (int tile_j = 0; tile_j < tile_N; tile_j++) { diff --git a/arm_compute/core/NEON/kernels/convolution/winograd/winograd_gemm.hpp b/arm_compute/core/NEON/kernels/convolution/winograd/winograd_gemm.hpp index bc067fd07a..7098fc48a1 100644 --- a/arm_compute/core/NEON/kernels/convolution/winograd/winograd_gemm.hpp +++ b/arm_compute/core/NEON/kernels/convolution/winograd/winograd_gemm.hpp @@ -49,8 +49,8 @@ class WinogradGEMM static constexpr int output_tile_cols = OutputTileCols; static constexpr int kernel_rows = KernelRows; static constexpr int kernel_cols = KernelCols; - static constexpr int inner_tile_rows = output_tile_rows + kernel_rows - 1; // TODO Check - static constexpr int inner_tile_cols = output_tile_cols + kernel_cols - 1; // TODO Check + static constexpr int inner_tile_rows = output_tile_rows + kernel_rows - 1; + static constexpr int inner_tile_cols = output_tile_cols + kernel_cols - 1; static constexpr int N_GEMMS = inner_tile_rows * inner_tile_cols; /** Transform weights from the spatial to the Winograd domain. */ @@ -196,8 +196,21 @@ class WinogradGEMM const int n_cols ); - static constexpr int max_pad_bottom = inner_tile_rows - 1; - static constexpr int max_pad_right = inner_tile_cols - 1; + // Tile overlaps + static constexpr int overlap_rows = kernel_rows - 1; + static constexpr int overlap_cols = kernel_cols - 1; + + // Maximum padding and number of distinct paddings + static constexpr int max_pad_top = kernel_rows / 2; + static constexpr int n_pad_top = 1 + iceildiv(max_pad_top, inner_tile_rows - overlap_rows); + + static constexpr int max_pad_left = kernel_cols / 2; + static constexpr int n_pad_left = 1 + iceildiv(max_pad_left, inner_tile_cols - overlap_cols); + + static constexpr int n_pad_bottom = inner_tile_rows; + static constexpr int n_pad_right = inner_tile_cols; + + /** Process a single tile of the input tensor. */ template @@ -205,7 +218,8 @@ class WinogradGEMM // Array of methods to transform tiles of the input tensor. typedef void (*TileFn)(int, const T*, int, int, T*, int); - static const TileFn tile_fns[2][2][max_pad_bottom][max_pad_right]; + static const TileFn + tile_fns[n_pad_top][n_pad_left][n_pad_bottom][n_pad_right]; /* Member values for instance-based API. */ const T* const _inptr; -- cgit v1.2.1