2 files changed, 44 insertions, 0 deletions
diff --git a/arm_compute/core/NEON/kernels/convolution/winograd/transforms/input.hpp b/arm_compute/core/NEON/kernels/convolution/winograd/transforms/input.hpp
index 13218030d2..369c2ff48f 100644
--- a/arm_compute/core/NEON/kernels/convolution/winograd/transforms/input.hpp
+++ b/arm_compute/core/NEON/kernels/convolution/winograd/transforms/input.hpp
@@ -50,6 +50,22 @@ namespace winograd
     const int matrix_row_stride  /** Stride within matrices. */
   )
   {
+    // If an Nx1 kernel then transpose and redirect to the 1xN implementation
+    if (kernel_cols == 1)
+    {
+      WinogradGEMM<output_tile_cols, output_tile_rows, kernel_cols, kernel_rows>::
+        template InputTransform<T>::execute(
+          input,
+          n_batches, in_batch_stride,
+          n_cols, in_col_stride,
+          n_rows, in_row_stride,
+          n_channels, padding,
+          tile_N, tile_M,
+          output, matrix_stride, matrix_batch_stride, matrix_row_stride
+        );
+      return;
+    }
+
     // Compute the padding required on each edge of the image
     const int pad_top = (padding == PADDING_SAME) ? (kernel_rows - 1) / 2 : 0;
     const int pad_left = (padding == PADDING_SAME) ? (kernel_cols - 1) / 2 : 0;
@@ -111,6 +127,12 @@ namespace winograd
     const int n_cols
   )
   {
+    if (kernel_cols == 1)
+    {
+      // If an Nx1 implementation then this should never be reached.
+      return;
+    }
+
     constexpr int tile_overlap = kernel_cols - 1;
 
     // Loop over columns of tiles
diff --git a/arm_compute/core/NEON/kernels/convolution/winograd/transforms/output.hpp b/arm_compute/core/NEON/kernels/convolution/winograd/transforms/output.hpp
index 700ca76c68..6ed146bf85 100644
--- a/arm_compute/core/NEON/kernels/convolution/winograd/transforms/output.hpp
+++ b/arm_compute/core/NEON/kernels/convolution/winograd/transforms/output.hpp
@@ -45,6 +45,22 @@ namespace winograd
     T* const output
   )
   {
+    // If an Nx1 kernel then transpose and redirect to the 1xN implementation.
+    if (kernel_cols == 1)
+    {
+      WinogradGEMM<output_tile_cols, output_tile_rows, kernel_cols, kernel_rows>::
+        template OutputTransform<T>::execute(
+          n_batches,
+          output_batch_stride,
+          n_cols, output_col_stride,
+          n_rows, output_row_stride,
+          n_channels,
+          matrix_base, matrix_stride, matrix_row_stride,
+          biases, output
+        );
+      return;
+    }
+
     // Compute the number of tiles and hence the padding required on the bottom
     // and right of the image.
     const int tile_M = iceildiv(n_rows, output_tile_rows);
@@ -98,6 +114,12 @@ namespace winograd
     const int row_pad_right
   )
   {
+    if (kernel_cols == 1)
+    {
+      // If an Nx1 implementation then this should never be reached.
+      return;
+    }
+
     // Loop over columns of tiles
     for (int tile_j = 0; tile_j < tile_N; tile_j++)
     {