diff options
author | Pablo Tello <pablo.tello@arm.com> | 2018-09-25 16:01:35 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:55:19 +0000 |
commit | eb027e933758b1e749f0f6bd2817ee8979ef903c (patch) | |
tree | 99543b716f42c040d801a01d7e4674c54a1173a7 /src/core/NEON/kernels/convolution/winograd/transforms/input_6x6_fp32.cpp | |
parent | 4284bfab4594d4babb23123001ef63db7bebeccb (diff) | |
download | ComputeLibrary-eb027e933758b1e749f0f6bd2817ee8979ef903c.tar.gz |
COMPMID-1600: Reduce number of tile specialisations.
Change-Id: I4d06eca9404ea6d3df9d0ca52f5d6f5421ab7116
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/150117
Tested-by: bsgcomp <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/convolution/winograd/transforms/input_6x6_fp32.cpp')
-rw-r--r-- | src/core/NEON/kernels/convolution/winograd/transforms/input_6x6_fp32.cpp | 400 |
1 files changed, 84 insertions, 316 deletions
diff --git a/src/core/NEON/kernels/convolution/winograd/transforms/input_6x6_fp32.cpp b/src/core/NEON/kernels/convolution/winograd/transforms/input_6x6_fp32.cpp index 908613068a..893122cc45 100644 --- a/src/core/NEON/kernels/convolution/winograd/transforms/input_6x6_fp32.cpp +++ b/src/core/NEON/kernels/convolution/winograd/transforms/input_6x6_fp32.cpp @@ -29,20 +29,30 @@ namespace { -template <int pad_top, int pad_left, int pad_bottom, int pad_right> +template <bool Specialized, int PadTop=0, int PadLeft=0, int PadBottom=0, int PadRight=0> void winograd_input_transform_6x6_fp32_process_tile( int n_channels, const float* const input_base, const int input_row_stride, const int input_col_stride, float* const matrix_base, - const int matrix_stride +const int matrix_stride, + const int _pad_top, + const int _pad_left, + const int _pad_bottom, + const int _pad_right ) { - constexpr int inner_tile_rows = 6; + const int pad_top = Specialized ? PadTop : _pad_top; + const int pad_left = Specialized ? PadLeft : _pad_left; + const int pad_bottom = Specialized ? PadBottom : _pad_bottom; + const int pad_right = Specialized ? PadRight : _pad_right; + + constexpr int inner_tile_rows = 6; constexpr int inner_tile_cols = 6; - constexpr int cells_i = inner_tile_rows - pad_bottom; - constexpr int cells_j = inner_tile_cols - pad_right; + + const int cells_i = inner_tile_rows - pad_bottom; + const int cells_j = inner_tile_cols - pad_right; float *outptr = matrix_base; @@ -285,322 +295,80 @@ void winograd_input_transform_6x6_fp32_process_tile( namespace winograd { template <int k> -using Transform = InputTransformImpl<k, k, 6, 6, float>; +using Tiles = InputTransformImplTiles<k, k, 6, 6, float>; template <> -const Transform<3>::TileFn - Transform<3>::tile_fns[n_pad_top][n_pad_left][n_pad_bottom][n_pad_right] = -{ - { - { - { - winograd_input_transform_6x6_fp32_process_tile<0, 0, 0, 0>, // No padding - winograd_input_transform_6x6_fp32_process_tile<0, 0, 0, 1>, // Right - winograd_input_transform_6x6_fp32_process_tile<0, 0, 0, 2>, // " " - winograd_input_transform_6x6_fp32_process_tile<0, 0, 0, 3>, // " " - winograd_input_transform_6x6_fp32_process_tile<0, 0, 0, 4>, // " " - }, - { - winograd_input_transform_6x6_fp32_process_tile<0, 0, 1, 0>, // Bottom - winograd_input_transform_6x6_fp32_process_tile<0, 0, 1, 1>, // Bottom right - winograd_input_transform_6x6_fp32_process_tile<0, 0, 1, 2>, // " " - winograd_input_transform_6x6_fp32_process_tile<0, 0, 1, 3>, // " " - winograd_input_transform_6x6_fp32_process_tile<0, 0, 1, 4>, // " " - }, - { - winograd_input_transform_6x6_fp32_process_tile<0, 0, 2, 0>, // Bottom - winograd_input_transform_6x6_fp32_process_tile<0, 0, 2, 1>, // Bottom right - winograd_input_transform_6x6_fp32_process_tile<0, 0, 2, 2>, // " " - winograd_input_transform_6x6_fp32_process_tile<0, 0, 2, 3>, // " " - winograd_input_transform_6x6_fp32_process_tile<0, 0, 2, 4>, // " " - }, - { - winograd_input_transform_6x6_fp32_process_tile<0, 0, 3, 0>, // Bottom - winograd_input_transform_6x6_fp32_process_tile<0, 0, 3, 1>, // Bottom right - winograd_input_transform_6x6_fp32_process_tile<0, 0, 3, 2>, // " " - winograd_input_transform_6x6_fp32_process_tile<0, 0, 3, 3>, // " " - winograd_input_transform_6x6_fp32_process_tile<0, 0, 3, 4>, // " " - }, - { - winograd_input_transform_6x6_fp32_process_tile<0, 0, 4, 0>, // Bottom - winograd_input_transform_6x6_fp32_process_tile<0, 0, 4, 1>, // Bottom right - winograd_input_transform_6x6_fp32_process_tile<0, 0, 4, 2>, // " " - winograd_input_transform_6x6_fp32_process_tile<0, 0, 4, 3>, // " " - winograd_input_transform_6x6_fp32_process_tile<0, 0, 4, 4>, // " " - } - }, - { - { - winograd_input_transform_6x6_fp32_process_tile<0, 1, 0, 0>, // Left - winograd_input_transform_6x6_fp32_process_tile<0, 1, 0, 1>, - winograd_input_transform_6x6_fp32_process_tile<0, 1, 0, 2>, - winograd_input_transform_6x6_fp32_process_tile<0, 1, 0, 3>, - winograd_input_transform_6x6_fp32_process_tile<0, 1, 0, 4>, - }, - { - winograd_input_transform_6x6_fp32_process_tile<0, 1, 1, 0>, // Bottom left - winograd_input_transform_6x6_fp32_process_tile<0, 1, 1, 1>, - winograd_input_transform_6x6_fp32_process_tile<0, 1, 1, 2>, - winograd_input_transform_6x6_fp32_process_tile<0, 1, 1, 3>, - winograd_input_transform_6x6_fp32_process_tile<0, 1, 1, 4>, - }, - { - winograd_input_transform_6x6_fp32_process_tile<0, 1, 2, 0>, // " " - winograd_input_transform_6x6_fp32_process_tile<0, 1, 2, 1>, - winograd_input_transform_6x6_fp32_process_tile<0, 1, 2, 2>, - winograd_input_transform_6x6_fp32_process_tile<0, 1, 2, 3>, - winograd_input_transform_6x6_fp32_process_tile<0, 1, 2, 4>, - }, - { - winograd_input_transform_6x6_fp32_process_tile<0, 1, 3, 0>, // " " - winograd_input_transform_6x6_fp32_process_tile<0, 1, 3, 1>, - winograd_input_transform_6x6_fp32_process_tile<0, 1, 3, 2>, - winograd_input_transform_6x6_fp32_process_tile<0, 1, 3, 3>, - winograd_input_transform_6x6_fp32_process_tile<0, 1, 3, 4>, - }, - { - winograd_input_transform_6x6_fp32_process_tile<0, 1, 4, 0>, // " " - winograd_input_transform_6x6_fp32_process_tile<0, 1, 4, 1>, - winograd_input_transform_6x6_fp32_process_tile<0, 1, 4, 2>, - winograd_input_transform_6x6_fp32_process_tile<0, 1, 4, 3>, - winograd_input_transform_6x6_fp32_process_tile<0, 1, 4, 4>, - } - } - }, - { - { - { - winograd_input_transform_6x6_fp32_process_tile<1, 0, 0, 0>, // Top - winograd_input_transform_6x6_fp32_process_tile<1, 0, 0, 1>, // Top right - winograd_input_transform_6x6_fp32_process_tile<1, 0, 0, 2>, // " " - winograd_input_transform_6x6_fp32_process_tile<1, 0, 0, 3>, // " " - winograd_input_transform_6x6_fp32_process_tile<1, 0, 0, 4>, // " " - }, - { - winograd_input_transform_6x6_fp32_process_tile<1, 0, 1, 0>, - winograd_input_transform_6x6_fp32_process_tile<1, 0, 1, 1>, - winograd_input_transform_6x6_fp32_process_tile<1, 0, 1, 2>, - winograd_input_transform_6x6_fp32_process_tile<1, 0, 1, 3>, - winograd_input_transform_6x6_fp32_process_tile<1, 0, 1, 4>, - }, - { - winograd_input_transform_6x6_fp32_process_tile<1, 0, 2, 0>, - winograd_input_transform_6x6_fp32_process_tile<1, 0, 2, 1>, - winograd_input_transform_6x6_fp32_process_tile<1, 0, 2, 2>, - winograd_input_transform_6x6_fp32_process_tile<1, 0, 2, 3>, - winograd_input_transform_6x6_fp32_process_tile<1, 0, 2, 4>, - }, - { - winograd_input_transform_6x6_fp32_process_tile<1, 0, 3, 0>, - winograd_input_transform_6x6_fp32_process_tile<1, 0, 3, 1>, - winograd_input_transform_6x6_fp32_process_tile<1, 0, 3, 2>, - winograd_input_transform_6x6_fp32_process_tile<1, 0, 3, 3>, - winograd_input_transform_6x6_fp32_process_tile<1, 0, 3, 4>, - }, - { - winograd_input_transform_6x6_fp32_process_tile<1, 0, 4, 0>, - winograd_input_transform_6x6_fp32_process_tile<1, 0, 4, 1>, - winograd_input_transform_6x6_fp32_process_tile<1, 0, 4, 2>, - winograd_input_transform_6x6_fp32_process_tile<1, 0, 4, 3>, - winograd_input_transform_6x6_fp32_process_tile<1, 0, 4, 4>, - }, - }, - { - { - winograd_input_transform_6x6_fp32_process_tile<1, 1, 0, 0>, // Top left - winograd_input_transform_6x6_fp32_process_tile<1, 1, 0, 1>, - winograd_input_transform_6x6_fp32_process_tile<1, 1, 0, 2>, - winograd_input_transform_6x6_fp32_process_tile<1, 1, 0, 3>, - winograd_input_transform_6x6_fp32_process_tile<1, 1, 0, 4>, - }, - { - winograd_input_transform_6x6_fp32_process_tile<1, 1, 1, 0>, - winograd_input_transform_6x6_fp32_process_tile<1, 1, 1, 1>, - winograd_input_transform_6x6_fp32_process_tile<1, 1, 1, 2>, - winograd_input_transform_6x6_fp32_process_tile<1, 1, 1, 3>, - winograd_input_transform_6x6_fp32_process_tile<1, 1, 1, 4>, - }, - { - winograd_input_transform_6x6_fp32_process_tile<1, 1, 2, 0>, - winograd_input_transform_6x6_fp32_process_tile<1, 1, 2, 1>, - winograd_input_transform_6x6_fp32_process_tile<1, 1, 2, 2>, - winograd_input_transform_6x6_fp32_process_tile<1, 1, 2, 3>, - winograd_input_transform_6x6_fp32_process_tile<1, 1, 2, 4>, - }, - { - winograd_input_transform_6x6_fp32_process_tile<1, 1, 3, 0>, - winograd_input_transform_6x6_fp32_process_tile<1, 1, 3, 1>, - winograd_input_transform_6x6_fp32_process_tile<1, 1, 3, 2>, - winograd_input_transform_6x6_fp32_process_tile<1, 1, 3, 3>, - winograd_input_transform_6x6_fp32_process_tile<1, 1, 3, 4>, - }, - { - winograd_input_transform_6x6_fp32_process_tile<1, 1, 4, 0>, - winograd_input_transform_6x6_fp32_process_tile<1, 1, 4, 1>, - winograd_input_transform_6x6_fp32_process_tile<1, 1, 4, 2>, - winograd_input_transform_6x6_fp32_process_tile<1, 1, 4, 3>, - winograd_input_transform_6x6_fp32_process_tile<1, 1, 4, 4>, - } - } - } +const Tiles<3>::TileFn Tiles<3>::tilefn_generic = winograd_input_transform_6x6_fp32_process_tile<false>; + +template <> +const Tiles<3>::TileFn Tiles<3>::tilefn_unpadded = winograd_input_transform_6x6_fp32_process_tile<true>; + +template <> +const Tiles<3>::TileFn Tiles<3>::tilefn_top_padded[n_pad_top] = { + winograd_input_transform_6x6_fp32_process_tile<true, 1, 0, 0, 0>, }; template <> -const Transform<5>::TileFn - Transform<5>::tile_fns[n_pad_top][n_pad_left][n_pad_bottom][n_pad_right] = -{ - { - { - { - winograd_input_transform_6x6_fp32_process_tile<0, 0, 0, 0>, // No padding - winograd_input_transform_6x6_fp32_process_tile<0, 0, 0, 1>, // Right - winograd_input_transform_6x6_fp32_process_tile<0, 0, 0, 2>, // " " - winograd_input_transform_6x6_fp32_process_tile<0, 0, 0, 3>, // " " - winograd_input_transform_6x6_fp32_process_tile<0, 0, 0, 4>, // " " - }, - { - winograd_input_transform_6x6_fp32_process_tile<0, 0, 1, 0>, // Bottom - winograd_input_transform_6x6_fp32_process_tile<0, 0, 1, 1>, // Bottom right - winograd_input_transform_6x6_fp32_process_tile<0, 0, 1, 2>, // " " - winograd_input_transform_6x6_fp32_process_tile<0, 0, 1, 3>, // " " - winograd_input_transform_6x6_fp32_process_tile<0, 0, 1, 4>, // " " - }, - { - winograd_input_transform_6x6_fp32_process_tile<0, 0, 2, 0>, // Bottom - winograd_input_transform_6x6_fp32_process_tile<0, 0, 2, 1>, // Bottom right - winograd_input_transform_6x6_fp32_process_tile<0, 0, 2, 2>, // " " - winograd_input_transform_6x6_fp32_process_tile<0, 0, 2, 3>, // " " - winograd_input_transform_6x6_fp32_process_tile<0, 0, 2, 4>, // " " - }, - { - winograd_input_transform_6x6_fp32_process_tile<0, 0, 3, 0>, // Bottom - winograd_input_transform_6x6_fp32_process_tile<0, 0, 3, 1>, // Bottom right - winograd_input_transform_6x6_fp32_process_tile<0, 0, 3, 2>, // " " - winograd_input_transform_6x6_fp32_process_tile<0, 0, 3, 3>, // " " - winograd_input_transform_6x6_fp32_process_tile<0, 0, 3, 4>, // " " - }, - { - winograd_input_transform_6x6_fp32_process_tile<0, 0, 4, 0>, // Bottom - winograd_input_transform_6x6_fp32_process_tile<0, 0, 4, 1>, // Bottom right - winograd_input_transform_6x6_fp32_process_tile<0, 0, 4, 2>, // " " - winograd_input_transform_6x6_fp32_process_tile<0, 0, 4, 3>, // " " - winograd_input_transform_6x6_fp32_process_tile<0, 0, 4, 4>, // " " - } - }, - { - { - winograd_input_transform_6x6_fp32_process_tile<0, 2, 0, 0>, // Left - winograd_input_transform_6x6_fp32_process_tile<0, 2, 0, 1>, - winograd_input_transform_6x6_fp32_process_tile<0, 2, 0, 2>, - winograd_input_transform_6x6_fp32_process_tile<0, 2, 0, 3>, - winograd_input_transform_6x6_fp32_process_tile<0, 2, 0, 4>, - }, - { - winograd_input_transform_6x6_fp32_process_tile<0, 2, 1, 0>, // Bottom left - winograd_input_transform_6x6_fp32_process_tile<0, 2, 1, 1>, - winograd_input_transform_6x6_fp32_process_tile<0, 2, 1, 2>, - winograd_input_transform_6x6_fp32_process_tile<0, 2, 1, 3>, - winograd_input_transform_6x6_fp32_process_tile<0, 2, 1, 4>, - }, - { - winograd_input_transform_6x6_fp32_process_tile<0, 2, 2, 0>, // " " - winograd_input_transform_6x6_fp32_process_tile<0, 2, 2, 1>, - winograd_input_transform_6x6_fp32_process_tile<0, 2, 2, 2>, - winograd_input_transform_6x6_fp32_process_tile<0, 2, 2, 3>, - winograd_input_transform_6x6_fp32_process_tile<0, 2, 2, 4>, - }, - { - winograd_input_transform_6x6_fp32_process_tile<0, 2, 3, 0>, // " " - winograd_input_transform_6x6_fp32_process_tile<0, 2, 3, 1>, - winograd_input_transform_6x6_fp32_process_tile<0, 2, 3, 2>, - winograd_input_transform_6x6_fp32_process_tile<0, 2, 3, 3>, - winograd_input_transform_6x6_fp32_process_tile<0, 2, 3, 4>, - }, - { - winograd_input_transform_6x6_fp32_process_tile<0, 2, 4, 0>, // " " - winograd_input_transform_6x6_fp32_process_tile<0, 2, 4, 1>, - winograd_input_transform_6x6_fp32_process_tile<0, 2, 4, 2>, - winograd_input_transform_6x6_fp32_process_tile<0, 2, 4, 3>, - winograd_input_transform_6x6_fp32_process_tile<0, 2, 4, 4>, - } - } - }, - { - { - { - winograd_input_transform_6x6_fp32_process_tile<2, 0, 0, 0>, // Top - winograd_input_transform_6x6_fp32_process_tile<2, 0, 0, 1>, // Top right - winograd_input_transform_6x6_fp32_process_tile<2, 0, 0, 2>, // " " - winograd_input_transform_6x6_fp32_process_tile<2, 0, 0, 3>, // " " - winograd_input_transform_6x6_fp32_process_tile<2, 0, 0, 4>, // " " - }, - { - winograd_input_transform_6x6_fp32_process_tile<2, 0, 1, 0>, - winograd_input_transform_6x6_fp32_process_tile<2, 0, 1, 1>, - winograd_input_transform_6x6_fp32_process_tile<2, 0, 1, 2>, - winograd_input_transform_6x6_fp32_process_tile<2, 0, 1, 3>, - winograd_input_transform_6x6_fp32_process_tile<2, 0, 1, 4>, - }, - { - winograd_input_transform_6x6_fp32_process_tile<2, 0, 2, 0>, - winograd_input_transform_6x6_fp32_process_tile<2, 0, 2, 1>, - winograd_input_transform_6x6_fp32_process_tile<2, 0, 2, 2>, - winograd_input_transform_6x6_fp32_process_tile<2, 0, 2, 3>, - winograd_input_transform_6x6_fp32_process_tile<2, 0, 2, 4>, - }, - { - winograd_input_transform_6x6_fp32_process_tile<2, 0, 3, 0>, - winograd_input_transform_6x6_fp32_process_tile<2, 0, 3, 1>, - winograd_input_transform_6x6_fp32_process_tile<2, 0, 3, 2>, - winograd_input_transform_6x6_fp32_process_tile<2, 0, 3, 3>, - winograd_input_transform_6x6_fp32_process_tile<2, 0, 3, 4>, - }, - { - winograd_input_transform_6x6_fp32_process_tile<2, 0, 4, 0>, - winograd_input_transform_6x6_fp32_process_tile<2, 0, 4, 1>, - winograd_input_transform_6x6_fp32_process_tile<2, 0, 4, 2>, - winograd_input_transform_6x6_fp32_process_tile<2, 0, 4, 3>, - winograd_input_transform_6x6_fp32_process_tile<2, 0, 4, 4>, - }, - }, - { - { - winograd_input_transform_6x6_fp32_process_tile<2, 2, 0, 0>, // Top left - winograd_input_transform_6x6_fp32_process_tile<2, 2, 0, 1>, - winograd_input_transform_6x6_fp32_process_tile<2, 2, 0, 2>, - winograd_input_transform_6x6_fp32_process_tile<2, 2, 0, 3>, - winograd_input_transform_6x6_fp32_process_tile<2, 2, 0, 4>, - }, - { - winograd_input_transform_6x6_fp32_process_tile<2, 2, 1, 0>, - winograd_input_transform_6x6_fp32_process_tile<2, 2, 1, 1>, - winograd_input_transform_6x6_fp32_process_tile<2, 2, 1, 2>, - winograd_input_transform_6x6_fp32_process_tile<2, 2, 1, 3>, - winograd_input_transform_6x6_fp32_process_tile<2, 2, 1, 4>, - }, - { - winograd_input_transform_6x6_fp32_process_tile<2, 2, 2, 0>, - winograd_input_transform_6x6_fp32_process_tile<2, 2, 2, 1>, - winograd_input_transform_6x6_fp32_process_tile<2, 2, 2, 2>, - winograd_input_transform_6x6_fp32_process_tile<2, 2, 2, 3>, - winograd_input_transform_6x6_fp32_process_tile<2, 2, 2, 4>, - }, - { - winograd_input_transform_6x6_fp32_process_tile<2, 2, 3, 0>, - winograd_input_transform_6x6_fp32_process_tile<2, 2, 3, 1>, - winograd_input_transform_6x6_fp32_process_tile<2, 2, 3, 2>, - winograd_input_transform_6x6_fp32_process_tile<2, 2, 3, 3>, - winograd_input_transform_6x6_fp32_process_tile<2, 2, 3, 4>, - }, - { - winograd_input_transform_6x6_fp32_process_tile<2, 2, 4, 0>, - winograd_input_transform_6x6_fp32_process_tile<2, 2, 4, 1>, - winograd_input_transform_6x6_fp32_process_tile<2, 2, 4, 2>, - winograd_input_transform_6x6_fp32_process_tile<2, 2, 4, 3>, - winograd_input_transform_6x6_fp32_process_tile<2, 2, 4, 4>, - } - } - } +const Tiles<3>::TileFn Tiles<3>::tilefn_left_padded[n_pad_left] = { + winograd_input_transform_6x6_fp32_process_tile<true, 0, 1, 0, 0>, +}; + +template <> +const Tiles<3>::TileFn Tiles<3>::tilefn_bottom_padded[n_pad_bottom] = { + winograd_input_transform_6x6_fp32_process_tile<true, 0, 0, 1, 0>, + winograd_input_transform_6x6_fp32_process_tile<true, 0, 0, 2, 0>, + winograd_input_transform_6x6_fp32_process_tile<true, 0, 0, 3, 0>, + winograd_input_transform_6x6_fp32_process_tile<true, 0, 0, 4, 0>, + winograd_input_transform_6x6_fp32_process_tile<true, 0, 0, 5, 0>, + winograd_input_transform_6x6_fp32_process_tile<true, 0, 0, 6, 0>, +}; + +template <> +const Tiles<3>::TileFn Tiles<3>::tilefn_right_padded[n_pad_right] = { + winograd_input_transform_6x6_fp32_process_tile<true, 0, 0, 0, 1>, + winograd_input_transform_6x6_fp32_process_tile<true, 0, 0, 0, 2>, + winograd_input_transform_6x6_fp32_process_tile<true, 0, 0, 0, 3>, + winograd_input_transform_6x6_fp32_process_tile<true, 0, 0, 0, 4>, + winograd_input_transform_6x6_fp32_process_tile<true, 0, 0, 0, 5>, + winograd_input_transform_6x6_fp32_process_tile<true, 0, 0, 0, 6>, +}; + +template <> +const Tiles<5>::TileFn Tiles<5>::tilefn_generic = winograd_input_transform_6x6_fp32_process_tile<false>; + + +template <> +const Tiles<5>::TileFn Tiles<5>::tilefn_unpadded = winograd_input_transform_6x6_fp32_process_tile<true>; + + +template <> +const Tiles<5>::TileFn Tiles<5>::tilefn_top_padded[n_pad_top] = { + winograd_input_transform_6x6_fp32_process_tile<true, 2, 0, 0, 0>, +}; + +template <> +const Tiles<5>::TileFn Tiles<5>::tilefn_left_padded[n_pad_left] = { + winograd_input_transform_6x6_fp32_process_tile<true, 0, 2, 0, 0>, +}; + +template <> +const Tiles<5>::TileFn Tiles<5>::tilefn_bottom_padded[n_pad_bottom] = { + winograd_input_transform_6x6_fp32_process_tile<true, 0, 0, 1, 0>, + winograd_input_transform_6x6_fp32_process_tile<true, 0, 0, 2, 0>, + winograd_input_transform_6x6_fp32_process_tile<true, 0, 0, 3, 0>, + winograd_input_transform_6x6_fp32_process_tile<true, 0, 0, 4, 0>, + winograd_input_transform_6x6_fp32_process_tile<true, 0, 0, 5, 0>, + winograd_input_transform_6x6_fp32_process_tile<true, 0, 0, 6, 0>, +}; + +template <> +const Tiles<5>::TileFn Tiles<5>::tilefn_right_padded[n_pad_right] = { + winograd_input_transform_6x6_fp32_process_tile<true, 0, 0, 0, 1>, + winograd_input_transform_6x6_fp32_process_tile<true, 0, 0, 0, 2>, + winograd_input_transform_6x6_fp32_process_tile<true, 0, 0, 0, 3>, + winograd_input_transform_6x6_fp32_process_tile<true, 0, 0, 0, 4>, + winograd_input_transform_6x6_fp32_process_tile<true, 0, 0, 0, 5>, + winograd_input_transform_6x6_fp32_process_tile<true, 0, 0, 0, 6>, }; template class InputTransform<3, 3, 6, 6, float>; |