aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/convolution/winograd/transforms/input_1x8_fp32.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/NEON/kernels/convolution/winograd/transforms/input_1x8_fp32.cpp')
-rw-r--r--src/core/NEON/kernels/convolution/winograd/transforms/input_1x8_fp32.cpp179
1 files changed, 83 insertions, 96 deletions
diff --git a/src/core/NEON/kernels/convolution/winograd/transforms/input_1x8_fp32.cpp b/src/core/NEON/kernels/convolution/winograd/transforms/input_1x8_fp32.cpp
index 042d4debbc..e66300d39a 100644
--- a/src/core/NEON/kernels/convolution/winograd/transforms/input_1x8_fp32.cpp
+++ b/src/core/NEON/kernels/convolution/winograd/transforms/input_1x8_fp32.cpp
@@ -29,19 +29,30 @@
namespace
{
-template <int pad_top, int pad_left, int pad_bottom, int pad_right>
+template <bool Specialized, int PadTop=0, int PadLeft=0, int PadBottom=0, int PadRight=0>
void winograd_input_transform_1x8_fp32_process_tile(
int n_channels,
const float* const input_base,
const int input_row_stride,
const int input_col_stride,
float* const matrix_base,
- const int matrix_stride
+ const int matrix_stride,
+ const int _pad_top,
+ const int _pad_left,
+ const int _pad_bottom,
+ const int _pad_right
)
{
(void) input_row_stride; // No rows over which to stride
+ (void) _pad_top; // Never any top padding
+ (void) _pad_bottom; // Never any bottom padding
+
+ // Extract padding arguments
+ const int pad_left = Specialized ? PadLeft : _pad_left;
+ const int pad_right = Specialized ? PadRight : _pad_right;
+
constexpr int inner_tile_cols = 8;
- constexpr int cells_j = inner_tile_cols - pad_right;
+ const int cells_j = inner_tile_cols - pad_right;
float *outptr = matrix_base;
@@ -162,109 +173,85 @@ void winograd_input_transform_1x8_fp32_process_tile(
namespace winograd
{
template <int x>
-using Transform = InputTransformImpl<1, x, 1, 8, float>;
+using Tiles = InputTransformImplTiles<1, x, 1, 8, float>;
+/*****************************************************************************/
+// 1x3 specialisations
template <>
-const Transform<3>::TileFn
- Transform<3>::tile_fns[n_pad_top][n_pad_left][n_pad_bottom][n_pad_right] =
-{
- {
- {
- {
- winograd_input_transform_1x8_fp32_process_tile<0, 0, 0, 0>,
- winograd_input_transform_1x8_fp32_process_tile<0, 0, 0, 1>,
- winograd_input_transform_1x8_fp32_process_tile<0, 0, 0, 2>,
- winograd_input_transform_1x8_fp32_process_tile<0, 0, 0, 3>,
- winograd_input_transform_1x8_fp32_process_tile<0, 0, 0, 4>,
- winograd_input_transform_1x8_fp32_process_tile<0, 0, 0, 5>,
- winograd_input_transform_1x8_fp32_process_tile<0, 0, 0, 6>,
- }
- },
- {
- {
- winograd_input_transform_1x8_fp32_process_tile<0, 1, 0, 0>,
- winograd_input_transform_1x8_fp32_process_tile<0, 1, 0, 1>,
- winograd_input_transform_1x8_fp32_process_tile<0, 1, 0, 2>,
- winograd_input_transform_1x8_fp32_process_tile<0, 1, 0, 3>,
- winograd_input_transform_1x8_fp32_process_tile<0, 1, 0, 4>,
- winograd_input_transform_1x8_fp32_process_tile<0, 1, 0, 5>,
- winograd_input_transform_1x8_fp32_process_tile<0, 1, 0, 6>,
- }
- }
- }
+const Tiles<3>::TileFn Tiles<3>::tilefn_generic = winograd_input_transform_1x8_fp32_process_tile<false>;
+
+template <>
+const Tiles<3>::TileFn Tiles<3>::tilefn_unpadded = winograd_input_transform_1x8_fp32_process_tile<true>;
+
+template <>
+const Tiles<3>::TileFn Tiles<3>::tilefn_left_padded[n_pad_left] = {
+ winograd_input_transform_1x8_fp32_process_tile<true, 0, 1, 0, 0>,
};
template <>
-const Transform<5>::TileFn
- Transform<5>::tile_fns[n_pad_top][n_pad_left][n_pad_bottom][n_pad_right] =
-{
- {
- {
- {
- winograd_input_transform_1x8_fp32_process_tile<0, 0, 0, 0>,
- winograd_input_transform_1x8_fp32_process_tile<0, 0, 0, 1>,
- winograd_input_transform_1x8_fp32_process_tile<0, 0, 0, 2>,
- winograd_input_transform_1x8_fp32_process_tile<0, 0, 0, 3>,
- winograd_input_transform_1x8_fp32_process_tile<0, 0, 0, 4>,
- winograd_input_transform_1x8_fp32_process_tile<0, 0, 0, 5>,
- winograd_input_transform_1x8_fp32_process_tile<0, 0, 0, 6>,
- }
- },
- {
- {
- winograd_input_transform_1x8_fp32_process_tile<0, 2, 0, 0>,
- winograd_input_transform_1x8_fp32_process_tile<0, 2, 0, 1>,
- winograd_input_transform_1x8_fp32_process_tile<0, 2, 0, 2>,
- winograd_input_transform_1x8_fp32_process_tile<0, 2, 0, 3>,
- winograd_input_transform_1x8_fp32_process_tile<0, 2, 0, 4>,
- winograd_input_transform_1x8_fp32_process_tile<0, 2, 0, 5>,
- winograd_input_transform_1x8_fp32_process_tile<0, 2, 0, 6>,
- }
- }
- }
+const Tiles<3>::TileFn Tiles<3>::tilefn_right_padded[n_pad_right] = {
+ winograd_input_transform_1x8_fp32_process_tile<true, 0, 0, 0, 1>,
+ winograd_input_transform_1x8_fp32_process_tile<true, 0, 0, 0, 2>,
+ winograd_input_transform_1x8_fp32_process_tile<true, 0, 0, 0, 3>,
+ winograd_input_transform_1x8_fp32_process_tile<true, 0, 0, 0, 4>,
+ winograd_input_transform_1x8_fp32_process_tile<true, 0, 0, 0, 5>,
+ winograd_input_transform_1x8_fp32_process_tile<true, 0, 0, 0, 6>,
+ winograd_input_transform_1x8_fp32_process_tile<true, 0, 0, 0, 7>,
};
+/*****************************************************************************/
+/*****************************************************************************/
+// 1x5 specialisations
template <>
-const Transform<7>::TileFn
- Transform<7>::tile_fns[n_pad_top][n_pad_left][n_pad_bottom][n_pad_right] =
-{
- {
- {
- {
- winograd_input_transform_1x8_fp32_process_tile<0, 0, 0, 0>,
- winograd_input_transform_1x8_fp32_process_tile<0, 0, 0, 1>,
- winograd_input_transform_1x8_fp32_process_tile<0, 0, 0, 2>,
- winograd_input_transform_1x8_fp32_process_tile<0, 0, 0, 3>,
- winograd_input_transform_1x8_fp32_process_tile<0, 0, 0, 4>,
- winograd_input_transform_1x8_fp32_process_tile<0, 0, 0, 5>,
- winograd_input_transform_1x8_fp32_process_tile<0, 0, 0, 6>,
- }
- },
- {
- {
- winograd_input_transform_1x8_fp32_process_tile<0, 1, 0, 0>,
- winograd_input_transform_1x8_fp32_process_tile<0, 1, 0, 1>,
- winograd_input_transform_1x8_fp32_process_tile<0, 1, 0, 2>,
- winograd_input_transform_1x8_fp32_process_tile<0, 1, 0, 3>,
- winograd_input_transform_1x8_fp32_process_tile<0, 1, 0, 4>,
- winograd_input_transform_1x8_fp32_process_tile<0, 1, 0, 5>,
- winograd_input_transform_1x8_fp32_process_tile<0, 1, 0, 6>,
- }
- },
- {
- {
- winograd_input_transform_1x8_fp32_process_tile<0, 3, 0, 0>,
- winograd_input_transform_1x8_fp32_process_tile<0, 3, 0, 1>,
- winograd_input_transform_1x8_fp32_process_tile<0, 3, 0, 2>,
- winograd_input_transform_1x8_fp32_process_tile<0, 3, 0, 3>,
- winograd_input_transform_1x8_fp32_process_tile<0, 3, 0, 4>,
- winograd_input_transform_1x8_fp32_process_tile<0, 3, 0, 5>,
- winograd_input_transform_1x8_fp32_process_tile<0, 3, 0, 6>,
- }
- }
- }
+const Tiles<5>::TileFn Tiles<5>::tilefn_generic = winograd_input_transform_1x8_fp32_process_tile<false>;
+
+template <>
+const Tiles<5>::TileFn Tiles<5>::tilefn_unpadded = winograd_input_transform_1x8_fp32_process_tile<true>;
+
+template <>
+const Tiles<5>::TileFn Tiles<5>::tilefn_left_padded[n_pad_left] = {
+ winograd_input_transform_1x8_fp32_process_tile<true, 0, 2, 0, 0>,
};
+template <>
+const Tiles<5>::TileFn Tiles<5>::tilefn_right_padded[n_pad_right] = {
+ winograd_input_transform_1x8_fp32_process_tile<true, 0, 0, 0, 1>,
+ winograd_input_transform_1x8_fp32_process_tile<true, 0, 0, 0, 2>,
+ winograd_input_transform_1x8_fp32_process_tile<true, 0, 0, 0, 3>,
+ winograd_input_transform_1x8_fp32_process_tile<true, 0, 0, 0, 4>,
+ winograd_input_transform_1x8_fp32_process_tile<true, 0, 0, 0, 5>,
+ winograd_input_transform_1x8_fp32_process_tile<true, 0, 0, 0, 6>,
+ winograd_input_transform_1x8_fp32_process_tile<true, 0, 0, 0, 7>,
+};
+/*****************************************************************************/
+
+/*****************************************************************************/
+// 1x7 specialisations
+template <>
+const Tiles<7>::TileFn Tiles<7>::tilefn_generic = winograd_input_transform_1x8_fp32_process_tile<false>;
+
+template <>
+const Tiles<7>::TileFn Tiles<7>::tilefn_unpadded = winograd_input_transform_1x8_fp32_process_tile<true>;
+
+template <>
+const Tiles<7>::TileFn Tiles<7>::tilefn_left_padded[n_pad_left] = {
+ winograd_input_transform_1x8_fp32_process_tile<true, 0, 1, 0, 0>,
+ winograd_input_transform_1x8_fp32_process_tile<true, 0, 3, 0, 0>,
+};
+
+template <>
+const Tiles<7>::TileFn Tiles<7>::tilefn_right_padded[n_pad_right] = {
+ winograd_input_transform_1x8_fp32_process_tile<true, 0, 0, 0, 1>,
+ winograd_input_transform_1x8_fp32_process_tile<true, 0, 0, 0, 2>,
+ winograd_input_transform_1x8_fp32_process_tile<true, 0, 0, 0, 3>,
+ winograd_input_transform_1x8_fp32_process_tile<true, 0, 0, 0, 4>,
+ winograd_input_transform_1x8_fp32_process_tile<true, 0, 0, 0, 5>,
+ winograd_input_transform_1x8_fp32_process_tile<true, 0, 0, 0, 6>,
+ winograd_input_transform_1x8_fp32_process_tile<true, 0, 0, 0, 7>,
+};
+/*****************************************************************************/
+
+
template class InputTransform<1, 3, 1, 8, float>;
template class InputTransform<3, 1, 8, 1, float>;
template class InputTransform<1, 5, 1, 8, float>;