aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/NEON/kernels/convolution/winograd/transforms
diff options
context:
space:
mode:
authorPablo Tello <pablo.tello@arm.com>2018-08-22 11:40:33 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:54 +0000
commitbda6e4b51bc4045c97100bb9d562164ba7c6c28f (patch)
tree8924bbae251b34dc35a4ffc9a9ece79d28c4415b /arm_compute/core/NEON/kernels/convolution/winograd/transforms
parent238c97cd8bfdb6dfce5c4eefed6aac4d9bb59457 (diff)
downloadComputeLibrary-bda6e4b51bc4045c97100bb9d562164ba7c6c28f.tar.gz
COMPMID-1247:Integrate kernel size 1x3 & 3x1 support in NEWinogradLayer.
Change-Id: I6fe198881230e49864c841a3b2366ccf2a9247f9 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/145210 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'arm_compute/core/NEON/kernels/convolution/winograd/transforms')
-rw-r--r--arm_compute/core/NEON/kernels/convolution/winograd/transforms/input.hpp22
-rw-r--r--arm_compute/core/NEON/kernels/convolution/winograd/transforms/output.hpp22
2 files changed, 44 insertions, 0 deletions
diff --git a/arm_compute/core/NEON/kernels/convolution/winograd/transforms/input.hpp b/arm_compute/core/NEON/kernels/convolution/winograd/transforms/input.hpp
index 13218030d2..369c2ff48f 100644
--- a/arm_compute/core/NEON/kernels/convolution/winograd/transforms/input.hpp
+++ b/arm_compute/core/NEON/kernels/convolution/winograd/transforms/input.hpp
@@ -50,6 +50,22 @@ namespace winograd
const int matrix_row_stride /** Stride within matrices. */
)
{
+ // If an Nx1 kernel then transpose and redirect to the 1xN implementation
+ if (kernel_cols == 1)
+ {
+ WinogradGEMM<output_tile_cols, output_tile_rows, kernel_cols, kernel_rows>::
+ template InputTransform<T>::execute(
+ input,
+ n_batches, in_batch_stride,
+ n_cols, in_col_stride,
+ n_rows, in_row_stride,
+ n_channels, padding,
+ tile_N, tile_M,
+ output, matrix_stride, matrix_batch_stride, matrix_row_stride
+ );
+ return;
+ }
+
// Compute the padding required on each edge of the image
const int pad_top = (padding == PADDING_SAME) ? (kernel_rows - 1) / 2 : 0;
const int pad_left = (padding == PADDING_SAME) ? (kernel_cols - 1) / 2 : 0;
@@ -111,6 +127,12 @@ namespace winograd
const int n_cols
)
{
+ if (kernel_cols == 1)
+ {
+ // If an Nx1 implementation then this should never be reached.
+ return;
+ }
+
constexpr int tile_overlap = kernel_cols - 1;
// Loop over columns of tiles
diff --git a/arm_compute/core/NEON/kernels/convolution/winograd/transforms/output.hpp b/arm_compute/core/NEON/kernels/convolution/winograd/transforms/output.hpp
index 700ca76c68..6ed146bf85 100644
--- a/arm_compute/core/NEON/kernels/convolution/winograd/transforms/output.hpp
+++ b/arm_compute/core/NEON/kernels/convolution/winograd/transforms/output.hpp
@@ -45,6 +45,22 @@ namespace winograd
T* const output
)
{
+ // If an Nx1 kernel then transpose and redirect to the 1xN implementation.
+ if (kernel_cols == 1)
+ {
+ WinogradGEMM<output_tile_cols, output_tile_rows, kernel_cols, kernel_rows>::
+ template OutputTransform<T>::execute(
+ n_batches,
+ output_batch_stride,
+ n_cols, output_col_stride,
+ n_rows, output_row_stride,
+ n_channels,
+ matrix_base, matrix_stride, matrix_row_stride,
+ biases, output
+ );
+ return;
+ }
+
// Compute the number of tiles and hence the padding required on the bottom
// and right of the image.
const int tile_M = iceildiv(n_rows, output_tile_rows);
@@ -98,6 +114,12 @@ namespace winograd
const int row_pad_right
)
{
+ if (kernel_cols == 1)
+ {
+ // If an Nx1 implementation then this should never be reached.
+ return;
+ }
+
// Loop over columns of tiles
for (int tile_j = 0; tile_j < tile_N; tile_j++)
{