diff options
Diffstat (limited to 'tests')
-rw-r--r-- | tests/datasets/LargeConvolutionLayerDataset.h | 44 | ||||
-rw-r--r-- | tests/datasets/ShapeDatasets.h | 64 | ||||
-rw-r--r-- | tests/datasets/SmallConvolutionLayerDataset.h | 30 | ||||
-rw-r--r-- | tests/datasets/WinogradInputTransformDataset.h | 108 | ||||
-rw-r--r-- | tests/datasets/WinogradOutputTransformDataset.h | 85 | ||||
-rw-r--r-- | tests/validation/CL/Winograd.cpp | 353 | ||||
-rw-r--r-- | tests/validation/Helpers.cpp | 31 | ||||
-rw-r--r-- | tests/validation/Helpers.h | 9 | ||||
-rw-r--r-- | tests/validation/fixtures/WinogradConvolutionLayerFixture.h | 13 | ||||
-rw-r--r-- | tests/validation/reference/Winograd.cpp | 130 |
10 files changed, 770 insertions, 97 deletions
diff --git a/tests/datasets/LargeConvolutionLayerDataset.h b/tests/datasets/LargeConvolutionLayerDataset.h index 36b3d60d57..ae25c8cd66 100644 --- a/tests/datasets/LargeConvolutionLayerDataset.h +++ b/tests/datasets/LargeConvolutionLayerDataset.h @@ -59,6 +59,50 @@ public: } }; +class LargeWinogradConvolutionLayer3x1Dataset final : public ConvolutionLayerDataset +{ +public: + LargeWinogradConvolutionLayer3x1Dataset() + { + // Kernel size 3 + // Batch size 1 + add_config(TensorShape(224U, 222U, 64U), TensorShape(3U, 1U, 64U, 64U), TensorShape(64U), TensorShape(224U, 222U, 64U), PadStrideInfo(1, 1, 1, 0)); + add_config(TensorShape(112U, 113U, 64U), TensorShape(3U, 1U, 64U, 128U), TensorShape(128U), TensorShape(112U, 113U, 128U), PadStrideInfo(1, 1, 1, 0)); + add_config(TensorShape(112U, 112U, 128U), TensorShape(3U, 1U, 128U, 129U), TensorShape(129U), TensorShape(112U, 112U, 129U), PadStrideInfo(1, 1, 1, 0)); + add_config(TensorShape(53U, 56U, 125U), TensorShape(3U, 1U, 125U, 256U), TensorShape(256U), TensorShape(51U, 56U, 256U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(56U, 56U, 256U), TensorShape(3U, 1U, 256U, 256U), TensorShape(256U), TensorShape(56U, 56U, 256U), PadStrideInfo(1, 1, 1, 0)); + add_config(TensorShape(28U, 28U, 257U), TensorShape(3U, 1U, 257U, 512U), TensorShape(512U), TensorShape(26U, 28U, 512U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(28U, 28U, 512U), TensorShape(3U, 1U, 512U, 512U), TensorShape(512U), TensorShape(28U, 28U, 512U), PadStrideInfo(1, 1, 1, 0)); + add_config(TensorShape(14U, 14U, 512U), TensorShape(3U, 1U, 512U, 512U), TensorShape(512U), TensorShape(12U, 14U, 512U), PadStrideInfo(1, 1, 0, 0)); + // Batch size 3, 2 and 4 + add_config(TensorShape(224U, 222U, 64U, 3U), TensorShape(3U, 1U, 64U, 64U), TensorShape(64U), TensorShape(224U, 222U, 64U, 3U), PadStrideInfo(1, 1, 1, 0)); + add_config(TensorShape(112U, 113U, 64U, 2U), TensorShape(3U, 1U, 64U, 128U), TensorShape(128U), TensorShape(110U, 113U, 128U, 2U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(3U, 1U, 127U, 128U), TensorShape(128U), TensorShape(111U, 112U, 128U, 4U), PadStrideInfo(1, 1, 1, 0)); + } +}; + +class LargeWinogradConvolutionLayer1x3Dataset final : public ConvolutionLayerDataset +{ +public: + LargeWinogradConvolutionLayer1x3Dataset() + { + // Kernel size 3 + // Batch size 1 + add_config(TensorShape(224U, 222U, 64U), TensorShape(1U, 3U, 64U, 64U), TensorShape(64U), TensorShape(224U, 222U, 64U), PadStrideInfo(1, 1, 0, 1)); + add_config(TensorShape(112U, 113U, 64U), TensorShape(1U, 3U, 64U, 128U), TensorShape(128U), TensorShape(112U, 113U, 128U), PadStrideInfo(1, 1, 0, 1)); + add_config(TensorShape(112U, 112U, 128U), TensorShape(1U, 3U, 128U, 129U), TensorShape(129U), TensorShape(112U, 110U, 129U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(53U, 56U, 125U), TensorShape(1U, 3U, 125U, 256U), TensorShape(256U), TensorShape(53U, 56U, 256U), PadStrideInfo(1, 1, 0, 1)); + add_config(TensorShape(56U, 56U, 256U), TensorShape(1U, 3U, 256U, 256U), TensorShape(256U), TensorShape(56U, 54U, 256U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(28U, 28U, 257U), TensorShape(1U, 3U, 257U, 512U), TensorShape(512U), TensorShape(28U, 28U, 512U), PadStrideInfo(1, 1, 0, 1)); + add_config(TensorShape(28U, 28U, 512U), TensorShape(1U, 3U, 512U, 512U), TensorShape(512U), TensorShape(28U, 28U, 512U), PadStrideInfo(1, 1, 0, 1)); + add_config(TensorShape(14U, 14U, 512U), TensorShape(1U, 3U, 512U, 512U), TensorShape(512U), TensorShape(14U, 12U, 512U), PadStrideInfo(1, 1, 0, 0)); + // Batch size 3, 2 and 4 + add_config(TensorShape(224U, 222U, 64U, 3U), TensorShape(1U, 3U, 64U, 64U), TensorShape(64U), TensorShape(224U, 222U, 64U, 3U), PadStrideInfo(1, 1, 0, 1)); + add_config(TensorShape(112U, 113U, 64U, 2U), TensorShape(1U, 3U, 64U, 128U), TensorShape(128U), TensorShape(112U, 113U, 128U, 2U), PadStrideInfo(1, 1, 0, 1)); + add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(1U, 3U, 127U, 128U), TensorShape(128U), TensorShape(111U, 112U, 128U, 4U), PadStrideInfo(1, 1, 0, 1)); + } +}; + class LargeWinogradConvolutionLayer5x5Dataset final : public ConvolutionLayerDataset { public: diff --git a/tests/datasets/ShapeDatasets.h b/tests/datasets/ShapeDatasets.h index a5620ff7cf..68263c7793 100644 --- a/tests/datasets/ShapeDatasets.h +++ b/tests/datasets/ShapeDatasets.h @@ -388,6 +388,38 @@ public: } }; +/** Data set containing small 3x1 tensor shapes. */ +class Small3x1Shapes final : public ShapeDataset +{ +public: + Small3x1Shapes() + : ShapeDataset("Shape", + { + TensorShape{ 3U, 1U, 7U, 4U }, + TensorShape{ 3U, 1U, 4U, 13U }, + TensorShape{ 3U, 1U, 9U, 2U }, + TensorShape{ 3U, 1U, 3U, 5U }, + }) + { + } +}; + +/** Data set containing small 1x3 tensor shapes. */ +class Small1x3Shapes final : public ShapeDataset +{ +public: + Small1x3Shapes() + : ShapeDataset("Shape", + { + TensorShape{ 1U, 3U, 7U, 4U }, + TensorShape{ 1U, 3U, 4U, 13U }, + TensorShape{ 1U, 3U, 9U, 2U }, + TensorShape{ 1U, 3U, 3U, 5U }, + }) + { + } +}; + /** Data set containing large 3x3 tensor shapes. */ class Large3x3Shapes final : public ShapeDataset { @@ -404,6 +436,38 @@ public: } }; +/** Data set containing large 3x1 tensor shapes. */ +class Large3x1Shapes final : public ShapeDataset +{ +public: + Large3x1Shapes() + : ShapeDataset("Shape", + { + TensorShape{ 3U, 1U, 32U, 64U }, + TensorShape{ 3U, 1U, 51U, 13U }, + TensorShape{ 3U, 1U, 53U, 47U }, + TensorShape{ 3U, 1U, 128U, 384U }, + }) + { + } +}; + +/** Data set containing large 1x3 tensor shapes. */ +class Large1x3Shapes final : public ShapeDataset +{ +public: + Large1x3Shapes() + : ShapeDataset("Shape", + { + TensorShape{ 1U, 3U, 32U, 64U }, + TensorShape{ 1U, 3U, 51U, 13U }, + TensorShape{ 1U, 3U, 53U, 47U }, + TensorShape{ 1U, 3U, 128U, 384U }, + }) + { + } +}; + /** Data set containing small 5x5 tensor shapes. */ class Small5x5Shapes final : public ShapeDataset { diff --git a/tests/datasets/SmallConvolutionLayerDataset.h b/tests/datasets/SmallConvolutionLayerDataset.h index fed36de3dd..f05cc15c06 100644 --- a/tests/datasets/SmallConvolutionLayerDataset.h +++ b/tests/datasets/SmallConvolutionLayerDataset.h @@ -52,6 +52,36 @@ public: } }; +class SmallWinogradConvolutionLayer3x1Dataset final : public ConvolutionLayerDataset +{ +public: + SmallWinogradConvolutionLayer3x1Dataset() + { + // Channel size big enough to force multithreaded execution of the input transform + add_config(TensorShape(8U, 8U, 32U), TensorShape(3U, 1U, 32U, 1U), TensorShape(1U), TensorShape(6U, 8U, 1U), PadStrideInfo(1, 1, 0, 0)); + // Batch size 1 + add_config(TensorShape(8U, 8U, 2U), TensorShape(3U, 1U, 2U, 1U), TensorShape(1U), TensorShape(6U, 8U, 1U), PadStrideInfo(1, 1, 0, 0)); + // Batch size 4 + add_config(TensorShape(23U, 27U, 5U, 4U), TensorShape(3U, 1U, 5U, 21U), TensorShape(21U), TensorShape(21U, 27U, 21U, 4U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(8U, 8U, 2U), TensorShape(3U, 1U, 2U, 1U), TensorShape(1U), TensorShape(8U, 8U, 1U), PadStrideInfo(1, 1, 1, 0)); + } +}; + +class SmallWinogradConvolutionLayer1x3Dataset final : public ConvolutionLayerDataset +{ +public: + SmallWinogradConvolutionLayer1x3Dataset() + { + // Channel size big enough to force multithreaded execution of the input transform + add_config(TensorShape(8U, 8U, 32U), TensorShape(1U, 3U, 32U, 1U), TensorShape(1U), TensorShape(8U, 6U, 1U), PadStrideInfo(1, 1, 0, 0)); + // Batch size 1 + add_config(TensorShape(8U, 8U, 2U), TensorShape(1U, 3U, 2U, 1U), TensorShape(1U), TensorShape(8U, 6U, 1U), PadStrideInfo(1, 1, 0, 0)); + // Batch size 4 + add_config(TensorShape(23U, 27U, 5U, 4U), TensorShape(1U, 3U, 5U, 21U), TensorShape(21U), TensorShape(23U, 25U, 21U, 4U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(8U, 8U, 2U), TensorShape(1U, 3U, 2U, 1U), TensorShape(1U), TensorShape(8U, 8U, 1U), PadStrideInfo(1, 1, 0, 1)); + } +}; + class SmallWinogradConvolutionLayer5x5Dataset final : public ConvolutionLayerDataset { public: diff --git a/tests/datasets/WinogradInputTransformDataset.h b/tests/datasets/WinogradInputTransformDataset.h index e365f9657f..ca23984a1d 100644 --- a/tests/datasets/WinogradInputTransformDataset.h +++ b/tests/datasets/WinogradInputTransformDataset.h @@ -112,6 +112,36 @@ public: } }; +class SmallWinogradInputTransformDataset2x1_3x1 final : public WinogradInputTransformDataset +{ +public: + SmallWinogradInputTransformDataset2x1_3x1() + { + add_config(TensorShape(9U, 9U), WinogradInfo(Size2D(2U, 1U), Size2D(3U, 1U), Size2D(9U, 9U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW)); + add_config(TensorShape(27U, 13U, 2U), WinogradInfo(Size2D(2U, 1U), Size2D(3U, 1U), Size2D(27U, 13U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(128U, 64U, 1U, 3U), WinogradInfo(Size2D(2U, 1U), Size2D(3U, 1U), Size2D(128U, 64U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW)); + add_config(TensorShape(9U, 9U, 3U, 4U), WinogradInfo(Size2D(2U, 1U), Size2D(3U, 1U), Size2D(9U, 9U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(27U, 13U, 2U, 4U), WinogradInfo(Size2D(2U, 1U), Size2D(3U, 1U), Size2D(27U, 13U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW)); + add_config(TensorShape(9U, 9U, 3U, 5U), WinogradInfo(Size2D(2U, 1U), Size2D(3U, 1U), Size2D(9U, 9U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(14U, 14U, 512U, 2U), WinogradInfo(Size2D(2U, 1U), Size2D(3U, 1U), Size2D(14U, 14U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW)); + } +}; + +class SmallWinogradInputTransformDataset1x2_1x3 final : public WinogradInputTransformDataset +{ +public: + SmallWinogradInputTransformDataset1x2_1x3() + { + add_config(TensorShape(9U, 9U), WinogradInfo(Size2D(1U, 2U), Size2D(1U, 3U), Size2D(9U, 9U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW)); + add_config(TensorShape(27U, 13U, 2U), WinogradInfo(Size2D(1U, 2U), Size2D(1U, 3U), Size2D(27U, 13U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(128U, 64U, 1U, 3U), WinogradInfo(Size2D(1U, 2U), Size2D(1U, 3U), Size2D(128U, 64U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW)); + add_config(TensorShape(9U, 9U, 3U, 4U), WinogradInfo(Size2D(1U, 2U), Size2D(1U, 3U), Size2D(9U, 9U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(27U, 13U, 2U, 4U), WinogradInfo(Size2D(1U, 2U), Size2D(1U, 3U), Size2D(27U, 13U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW)); + add_config(TensorShape(9U, 9U, 3U, 5U), WinogradInfo(Size2D(1U, 2U), Size2D(1U, 3U), Size2D(9U, 9U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(14U, 14U, 512U, 2U), WinogradInfo(Size2D(1U, 2U), Size2D(1U, 3U), Size2D(14U, 14U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW)); + } +}; + class SmallWinogradInputTransformDataset4x4_3x3 final : public WinogradInputTransformDataset { public: @@ -127,6 +157,36 @@ public: } }; +class SmallWinogradInputTransformDataset4x1_3x1 final : public WinogradInputTransformDataset +{ +public: + SmallWinogradInputTransformDataset4x1_3x1() + { + add_config(TensorShape(9U, 9U), WinogradInfo(Size2D(4U, 1U), Size2D(3U, 1U), Size2D(9U, 9U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW)); + add_config(TensorShape(27U, 13U, 2U), WinogradInfo(Size2D(4U, 1U), Size2D(3U, 1U), Size2D(27U, 13U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(128U, 64U, 1U, 3U), WinogradInfo(Size2D(4U, 1U), Size2D(3U, 1U), Size2D(128U, 64U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW)); + add_config(TensorShape(9U, 9U, 3U, 4U), WinogradInfo(Size2D(4U, 1U), Size2D(3U, 1U), Size2D(9U, 9U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(27U, 13U, 2U, 4U), WinogradInfo(Size2D(4U, 1U), Size2D(3U, 1U), Size2D(27U, 13U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW)); + add_config(TensorShape(9U, 9U, 3U, 5U), WinogradInfo(Size2D(4U, 1U), Size2D(3U, 1U), Size2D(9U, 9U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(14U, 14U, 512U, 2U), WinogradInfo(Size2D(4U, 1U), Size2D(3U, 1U), Size2D(14U, 14U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW)); + } +}; + +class SmallWinogradInputTransformDataset1x4_1x3 final : public WinogradInputTransformDataset +{ +public: + SmallWinogradInputTransformDataset1x4_1x3() + { + add_config(TensorShape(9U, 9U), WinogradInfo(Size2D(1U, 4U), Size2D(1U, 3U), Size2D(9U, 9U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW)); + add_config(TensorShape(27U, 13U, 2U), WinogradInfo(Size2D(1U, 4U), Size2D(1U, 3U), Size2D(27U, 13U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(128U, 64U, 1U, 3U), WinogradInfo(Size2D(1U, 4U), Size2D(1U, 3U), Size2D(128U, 64U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW)); + add_config(TensorShape(9U, 9U, 3U, 4U), WinogradInfo(Size2D(1U, 4U), Size2D(1U, 3U), Size2D(9U, 9U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(27U, 13U, 2U, 4U), WinogradInfo(Size2D(1U, 4U), Size2D(1U, 3U), Size2D(27U, 13U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW)); + add_config(TensorShape(9U, 9U, 3U, 5U), WinogradInfo(Size2D(1U, 4U), Size2D(1U, 3U), Size2D(9U, 9U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(14U, 14U, 512U, 2U), WinogradInfo(Size2D(1U, 4U), Size2D(1U, 3U), Size2D(14U, 14U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW)); + } +}; + class SmallWinogradInputTransformDataset4x4_5x5 final : public WinogradInputTransformDataset { public: @@ -154,6 +214,30 @@ public: } }; +class LargeWinogradInputTransformDataset2x1_3x1 final : public WinogradInputTransformDataset +{ +public: + LargeWinogradInputTransformDataset2x1_3x1() + { + add_config(TensorShape(42U, 37U, 8U, 15U), WinogradInfo(Size2D(2U, 1U), Size2D(3U, 1U), Size2D(42U, 37U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW)); + add_config(TensorShape(57U, 60U, 13U, 8U), WinogradInfo(Size2D(2U, 1U), Size2D(3U, 1U), Size2D(57U, 60U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW)); + add_config(TensorShape(128U, 64U, 21U, 13U), WinogradInfo(Size2D(2U, 1U), Size2D(3U, 1U), Size2D(128U, 64U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(83U, 72U, 14U, 5U), WinogradInfo(Size2D(2U, 1U), Size2D(3U, 1U), Size2D(83U, 72U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + } +}; + +class LargeWinogradInputTransformDataset1x2_1x3 final : public WinogradInputTransformDataset +{ +public: + LargeWinogradInputTransformDataset1x2_1x3() + { + add_config(TensorShape(42U, 37U, 8U, 15U), WinogradInfo(Size2D(1U, 2U), Size2D(1U, 3U), Size2D(42U, 37U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW)); + add_config(TensorShape(57U, 60U, 13U, 8U), WinogradInfo(Size2D(1U, 2U), Size2D(1U, 3U), Size2D(57U, 60U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW)); + add_config(TensorShape(128U, 64U, 21U, 13U), WinogradInfo(Size2D(1U, 2U), Size2D(1U, 3U), Size2D(128U, 64U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(83U, 72U, 14U, 5U), WinogradInfo(Size2D(1U, 2U), Size2D(1U, 3U), Size2D(83U, 72U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + } +}; + class LargeWinogradInputTransformDataset4x4_3x3 final : public WinogradInputTransformDataset { public: @@ -166,6 +250,30 @@ public: } }; +class LargeWinogradInputTransformDataset4x1_3x1 final : public WinogradInputTransformDataset +{ +public: + LargeWinogradInputTransformDataset4x1_3x1() + { + add_config(TensorShape(42U, 37U, 8U, 15U), WinogradInfo(Size2D(4U, 1U), Size2D(3U, 1U), Size2D(42U, 37U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW)); + add_config(TensorShape(57U, 60U, 13U, 8U), WinogradInfo(Size2D(4U, 1U), Size2D(3U, 1U), Size2D(57U, 60U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW)); + add_config(TensorShape(128U, 64U, 21U, 13U), WinogradInfo(Size2D(4U, 1U), Size2D(3U, 1U), Size2D(128U, 64U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(83U, 72U, 14U, 5U), WinogradInfo(Size2D(4U, 1U), Size2D(3U, 1U), Size2D(83U, 72U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + } +}; + +class LargeWinogradInputTransformDataset1x4_1x3 final : public WinogradInputTransformDataset +{ +public: + LargeWinogradInputTransformDataset1x4_1x3() + { + add_config(TensorShape(42U, 37U, 8U, 15U), WinogradInfo(Size2D(1U, 4U), Size2D(1U, 3U), Size2D(42U, 37U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW)); + add_config(TensorShape(57U, 60U, 13U, 8U), WinogradInfo(Size2D(1U, 4U), Size2D(1U, 3U), Size2D(57U, 60U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW)); + add_config(TensorShape(128U, 64U, 21U, 13U), WinogradInfo(Size2D(1U, 4U), Size2D(1U, 3U), Size2D(128U, 64U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(83U, 72U, 14U, 5U), WinogradInfo(Size2D(1U, 4U), Size2D(1U, 3U), Size2D(83U, 72U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + } +}; + class LargeWinogradInputTransformDataset4x4_5x5 final : public WinogradInputTransformDataset { public: diff --git a/tests/datasets/WinogradOutputTransformDataset.h b/tests/datasets/WinogradOutputTransformDataset.h index c7ba3b2b7d..a4689c6ef1 100644 --- a/tests/datasets/WinogradOutputTransformDataset.h +++ b/tests/datasets/WinogradOutputTransformDataset.h @@ -99,12 +99,11 @@ private: std::vector<WinogradInfo> _info{}; }; -class SmallWinogradOutputTransformDataset final : public WinogradOutputTransformDataset +class SmallWinogradOutputTransformDatasetNCHW final : public WinogradOutputTransformDataset { public: - SmallWinogradOutputTransformDataset() + SmallWinogradOutputTransformDatasetNCHW() { - // NCHW // (2x2, 3x3) add_config(TensorShape(13U, 6U, 16U), WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(7U, 6U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); add_config(TensorShape(7U, 20U, 16U), WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(10U, 11U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); @@ -120,6 +119,34 @@ public: add_config(TensorShape(24U, 16U, 36U, 2U), WinogradInfo(Size2D(4U, 4U), Size2D(3U, 3U), Size2D(14U, 14U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW)); add_config(TensorShape(7U, 12U, 16U, 5U), WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(8U, 10U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + // (2x1, 3x1) + add_config(TensorShape(13U, 18U, 4U), WinogradInfo(Size2D(2U, 1U), Size2D(3U, 1U), Size2D(7U, 6U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(7U, 44U, 4U), WinogradInfo(Size2D(2U, 1U), Size2D(3U, 1U), Size2D(10U, 11U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(1U, 891U, 4U), WinogradInfo(Size2D(2U, 1U), Size2D(3U, 1U), Size2D(53U, 33U), PadStrideInfo(1, 1, 1, 0), DataLayout::NCHW)); + add_config(TensorShape(7U, 30U, 4U, 3U), WinogradInfo(Size2D(2U, 1U), Size2D(3U, 1U), Size2D(8U, 10U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(24U, 98U, 4U, 2U), WinogradInfo(Size2D(2U, 1U), Size2D(3U, 1U), Size2D(14U, 14U), PadStrideInfo(1, 1, 1, 0), DataLayout::NCHW)); + + // (1x2, 1x3) + add_config(TensorShape(13U, 14U, 4U), WinogradInfo(Size2D(1U, 2U), Size2D(1U, 3U), Size2D(7U, 6U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(7U, 50U, 4U), WinogradInfo(Size2D(1U, 2U), Size2D(1U, 3U), Size2D(10U, 11U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(1U, 901U, 4U), WinogradInfo(Size2D(1U, 2U), Size2D(1U, 3U), Size2D(53U, 33U), PadStrideInfo(1, 1, 0, 1), DataLayout::NCHW)); + add_config(TensorShape(7U, 32U, 4U, 3U), WinogradInfo(Size2D(1U, 2U), Size2D(1U, 3U), Size2D(8U, 10U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(24U, 98U, 4U, 2U), WinogradInfo(Size2D(1U, 2U), Size2D(1U, 3U), Size2D(14U, 14U), PadStrideInfo(1, 1, 0, 1), DataLayout::NCHW)); + + // (4x1, 3x1) + add_config(TensorShape(13U, 12U, 6U), WinogradInfo(Size2D(4U, 1U), Size2D(3U, 1U), Size2D(7U, 6U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(7U, 22U, 6U), WinogradInfo(Size2D(4U, 1U), Size2D(3U, 1U), Size2D(10U, 11U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(1U, 462U, 6U), WinogradInfo(Size2D(4U, 1U), Size2D(3U, 1U), Size2D(53U, 33U), PadStrideInfo(1, 1, 1, 0), DataLayout::NCHW)); + add_config(TensorShape(7U, 20U, 6U, 3U), WinogradInfo(Size2D(4U, 1U), Size2D(3U, 1U), Size2D(8U, 10U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(24U, 56U, 6U, 2U), WinogradInfo(Size2D(4U, 1U), Size2D(3U, 1U), Size2D(14U, 14U), PadStrideInfo(1, 1, 1, 0), DataLayout::NCHW)); + + // (1x4, 1x3) + add_config(TensorShape(13U, 7U, 6U), WinogradInfo(Size2D(1U, 4U), Size2D(1U, 3U), Size2D(7U, 6U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(7U, 30U, 6U), WinogradInfo(Size2D(1U, 4U), Size2D(1U, 3U), Size2D(10U, 11U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(1U, 477U, 6U), WinogradInfo(Size2D(1U, 4U), Size2D(1U, 3U), Size2D(53U, 33U), PadStrideInfo(1, 1, 0, 1), DataLayout::NCHW)); + add_config(TensorShape(7U, 16U, 6U, 3U), WinogradInfo(Size2D(1U, 4U), Size2D(1U, 3U), Size2D(8U, 10U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(24U, 56U, 6U, 2U), WinogradInfo(Size2D(1U, 4U), Size2D(1U, 3U), Size2D(14U, 14U), PadStrideInfo(1, 1, 0, 1), DataLayout::NCHW)); + // (4x4, 5x5) add_config(TensorShape(13U, 1U, 64U), WinogradInfo(Size2D(4U, 4U), Size2D(5U, 5U), Size2D(7U, 6U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); add_config(TensorShape(7U, 4U, 64U), WinogradInfo(Size2D(4U, 4U), Size2D(5U, 5U), Size2D(10U, 11U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); @@ -127,8 +154,14 @@ public: add_config(TensorShape(7U, 2U, 64U, 3U), WinogradInfo(Size2D(4U, 4U), Size2D(5U, 5U), Size2D(8U, 10U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); add_config(TensorShape(24U, 9U, 64U, 2U), WinogradInfo(Size2D(4U, 4U), Size2D(5U, 5U), Size2D(14U, 14U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW)); add_config(TensorShape(7U, 2U, 64U, 5U), WinogradInfo(Size2D(4U, 4U), Size2D(5U, 5U), Size2D(8U, 10U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + } +}; - // NHWC +class SmallWinogradOutputTransformDatasetNHWC final : public WinogradOutputTransformDataset +{ +public: + SmallWinogradOutputTransformDatasetNHWC() + { // (4x4, 3x3) add_config(TensorShape(13U, 4U, 36U), WinogradInfo(Size2D(4U, 4U), Size2D(3U, 3U), Size2D(10U, 9U), PadStrideInfo(1, 1, 0, 0), DataLayout::NHWC)); add_config(TensorShape(13U, 6U, 36U), WinogradInfo(Size2D(4U, 4U), Size2D(3U, 3U), Size2D(10U, 11U), PadStrideInfo(1, 1, 0, 0), DataLayout::NHWC)); @@ -146,10 +179,10 @@ public: } }; -class LargeWinogradOutputTransformDataset final : public WinogradOutputTransformDataset +class LargeWinogradOutputTransformDatasetNCHW final : public WinogradOutputTransformDataset { public: - LargeWinogradOutputTransformDataset() + LargeWinogradOutputTransformDatasetNCHW() { // NCHW // (2x2, 3x3) @@ -168,13 +201,51 @@ public: add_config(TensorShape(32U, 784U, 36U, 2U), WinogradInfo(Size2D(4U, 4U), Size2D(3U, 3U), Size2D(112U, 112U), PadStrideInfo(1, 1, 1, 0), DataLayout::NCHW)); add_config(TensorShape(13U, 196U, 36U, 5U), WinogradInfo(Size2D(4U, 4U), Size2D(3U, 3U), Size2D(56U, 56U), PadStrideInfo(1, 1, 0, 1), DataLayout::NCHW)); + // (2x1, 3x1) + add_config(TensorShape(64U, 24976U, 4U), WinogradInfo(Size2D(2U, 1U), Size2D(3U, 1U), Size2D(224U, 223U), PadStrideInfo(1, 1, 1, 0), DataLayout::NCHW)); + add_config(TensorShape(32U, 6160U, 4U), WinogradInfo(Size2D(2U, 1U), Size2D(3U, 1U), Size2D(112U, 110U), PadStrideInfo(1, 1, 1, 0), DataLayout::NCHW)); + add_config(TensorShape(13U, 1568U, 4U), WinogradInfo(Size2D(2U, 1U), Size2D(3U, 1U), Size2D(56U, 56U), PadStrideInfo(1, 1, 1, 0), DataLayout::NCHW)); + add_config(TensorShape(64U, 24753U, 4U, 3U), WinogradInfo(Size2D(2U, 1U), Size2D(3U, 1U), Size2D(224U, 223U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(32U, 6050U, 4U, 2U), WinogradInfo(Size2D(2U, 1U), Size2D(3U, 1U), Size2D(112U, 110U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(13U, 1512U, 4U, 5U), WinogradInfo(Size2D(2U, 1U), Size2D(3U, 1U), Size2D(56U, 56U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + + // (1x2, 1x3) + add_config(TensorShape(64U, 25088U, 4U), WinogradInfo(Size2D(1U, 2U), Size2D(1U, 3U), Size2D(224U, 223U), PadStrideInfo(1, 1, 0, 1), DataLayout::NCHW)); + add_config(TensorShape(32U, 6160U, 4U), WinogradInfo(Size2D(1U, 2U), Size2D(1U, 3U), Size2D(112U, 110U), PadStrideInfo(1, 1, 0, 1), DataLayout::NCHW)); + add_config(TensorShape(13U, 1568U, 4U), WinogradInfo(Size2D(1U, 2U), Size2D(1U, 3U), Size2D(56U, 56U), PadStrideInfo(1, 1, 0, 1), DataLayout::NCHW)); + add_config(TensorShape(64U, 24864U, 4U, 3U), WinogradInfo(Size2D(1U, 2U), Size2D(1U, 3U), Size2D(224U, 223U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(32U, 6048U, 4U, 2U), WinogradInfo(Size2D(1U, 2U), Size2D(1U, 3U), Size2D(112U, 110U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(13U, 1512U, 4U, 5U), WinogradInfo(Size2D(1U, 2U), Size2D(1U, 3U), Size2D(56U, 56U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + + // (4x1, 3x1) + add_config(TensorShape(64U, 12488U, 6U), WinogradInfo(Size2D(4U, 1U), Size2D(3U, 1U), Size2D(224U, 223U), PadStrideInfo(1, 1, 1, 0), DataLayout::NCHW)); + add_config(TensorShape(32U, 3080U, 6U), WinogradInfo(Size2D(4U, 1U), Size2D(3U, 1U), Size2D(112U, 110U), PadStrideInfo(1, 1, 1, 0), DataLayout::NCHW)); + add_config(TensorShape(13U, 784U, 6U), WinogradInfo(Size2D(4U, 1U), Size2D(3U, 1U), Size2D(56U, 56U), PadStrideInfo(1, 1, 1, 0), DataLayout::NCHW)); + add_config(TensorShape(64U, 12488U, 6U, 3U), WinogradInfo(Size2D(4U, 1U), Size2D(3U, 1U), Size2D(224U, 223U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(32U, 3080U, 6U, 2U), WinogradInfo(Size2D(4U, 1U), Size2D(3U, 1U), Size2D(112U, 110U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(13U, 784U, 6U, 5U), WinogradInfo(Size2D(4U, 1U), Size2D(3U, 1U), Size2D(56U, 56U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + + // (1x4, 1x3) + add_config(TensorShape(64U, 12544U, 6U), WinogradInfo(Size2D(1U, 4U), Size2D(1U, 3U), Size2D(224U, 223U), PadStrideInfo(1, 1, 0, 1), DataLayout::NCHW)); + add_config(TensorShape(32U, 3136U, 6U), WinogradInfo(Size2D(1U, 4U), Size2D(1U, 3U), Size2D(112U, 110U), PadStrideInfo(1, 1, 0, 1), DataLayout::NCHW)); + add_config(TensorShape(13U, 784U, 6U), WinogradInfo(Size2D(1U, 4U), Size2D(1U, 3U), Size2D(56U, 56U), PadStrideInfo(1, 1, 0, 1), DataLayout::NCHW)); + add_config(TensorShape(64U, 12544U, 6U, 3U), WinogradInfo(Size2D(1U, 4U), Size2D(1U, 3U), Size2D(224U, 223U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(32U, 3024U, 6U, 2U), WinogradInfo(Size2D(1U, 4U), Size2D(1U, 3U), Size2D(112U, 110U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + add_config(TensorShape(13U, 784U, 6U, 5U), WinogradInfo(Size2D(1U, 4U), Size2D(1U, 3U), Size2D(56U, 56U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW)); + // (4x4, 5x5) add_config(TensorShape(32U, 756U, 64U), WinogradInfo(Size2D(4U, 4U), Size2D(5U, 5U), Size2D(112U, 112U), PadStrideInfo(1, 1, 1, 0), DataLayout::NCHW)); add_config(TensorShape(13U, 182U, 64U), WinogradInfo(Size2D(4U, 4U), Size2D(5U, 5U), Size2D(56U, 56U), PadStrideInfo(1, 1, 0, 1), DataLayout::NCHW)); add_config(TensorShape(32U, 756U, 64U, 2U), WinogradInfo(Size2D(4U, 4U), Size2D(5U, 5U), Size2D(112U, 112U), PadStrideInfo(1, 1, 1, 0), DataLayout::NCHW)); add_config(TensorShape(13U, 182U, 64U, 5U), WinogradInfo(Size2D(4U, 4U), Size2D(5U, 5U), Size2D(56U, 56U), PadStrideInfo(1, 1, 0, 1), DataLayout::NCHW)); + } +}; - // NHWC +class LargeWinogradOutputTransformDatasetNHWC final : public WinogradOutputTransformDataset +{ +public: + LargeWinogradOutputTransformDatasetNHWC() + { // (4x4, 3x3) add_config(TensorShape(64U, 3136U, 36U), WinogradInfo(Size2D(4U, 4U), Size2D(3U, 3U), Size2D(224U, 224U), PadStrideInfo(1, 1, 1, 1), DataLayout::NHWC)); add_config(TensorShape(32U, 784U, 36U), WinogradInfo(Size2D(4U, 4U), Size2D(3U, 3U), Size2D(112U, 112U), PadStrideInfo(1, 1, 1, 0), DataLayout::NHWC)); diff --git a/tests/validation/CL/Winograd.cpp b/tests/validation/CL/Winograd.cpp index b869f4c314..f68ec8c286 100644 --- a/tests/validation/CL/Winograd.cpp +++ b/tests/validation/CL/Winograd.cpp @@ -23,6 +23,7 @@ */ #include "arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h" #include "arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h" +#include "arm_compute/core/Helpers.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLTensor.h" @@ -51,12 +52,66 @@ namespace validation { namespace { +// *INDENT-OFF* +// clang-format off constexpr AbsoluteTolerance<float> tolerance_f32(0.001f); constexpr AbsoluteTolerance<float> tolerance_convolution_layer_f32(0.1f); -const auto SmallWinogradInputTransformDataset = framework::dataset::concat(datasets::SmallWinogradInputTransformDataset2x2_3x3(), - framework::dataset::concat(datasets::SmallWinogradInputTransformDataset4x4_3x3(), datasets::SmallWinogradInputTransformDataset4x4_5x5())); -const auto LargeWinogradInputTransformDataset = framework::dataset::concat(datasets::LargeWinogradInputTransformDataset2x2_3x3(), - framework::dataset::concat(datasets::LargeWinogradInputTransformDataset4x4_3x3(), datasets::LargeWinogradInputTransformDataset4x4_5x5())); + +// Input transform +const auto SmallWinogradInputTransformDatasetNCHW = + framework::dataset::concat(datasets::SmallWinogradInputTransformDataset2x2_3x3(), + framework::dataset::concat(datasets::SmallWinogradInputTransformDataset2x1_3x1(), + framework::dataset::concat(datasets::SmallWinogradInputTransformDataset1x2_1x3(), + framework::dataset::concat(datasets::SmallWinogradInputTransformDataset4x4_3x3(), + framework::dataset::concat(datasets::SmallWinogradInputTransformDataset4x1_3x1(), + framework::dataset::concat(datasets::SmallWinogradInputTransformDataset1x4_1x3(), + datasets::SmallWinogradInputTransformDataset4x4_5x5())))))); + +const auto SmallWinogradInputTransformDatasetNHWC = framework::dataset::concat(datasets::SmallWinogradInputTransformDataset4x4_3x3(), + datasets::SmallWinogradInputTransformDataset4x4_5x5()); + +const auto LargeWinogradInputTransformDatasetNCHW = + framework::dataset::concat(datasets::LargeWinogradInputTransformDataset2x2_3x3(), + framework::dataset::concat(datasets::LargeWinogradInputTransformDataset2x1_3x1(), + framework::dataset::concat(datasets::LargeWinogradInputTransformDataset1x2_1x3(), + framework::dataset::concat(datasets::LargeWinogradInputTransformDataset4x4_3x3(), + framework::dataset::concat(datasets::LargeWinogradInputTransformDataset4x1_3x1(), + framework::dataset::concat(datasets::LargeWinogradInputTransformDataset1x4_1x3(), + datasets::LargeWinogradInputTransformDataset4x4_5x5())))))); + +const auto LargeWinogradInputTransformDatasetNHWC = + framework::dataset::concat(datasets::LargeWinogradInputTransformDataset4x4_3x3(), + datasets::LargeWinogradInputTransformDataset4x4_5x5()); + +// Filter transform +const auto SmallWinogradFilterTransformDatasetNCHW = + framework::dataset::concat(combine(datasets::Small3x3Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 2U), Size2D(4U, 4U) })), + framework::dataset::concat(combine(datasets::Small3x1Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 1U), Size2D(4U, 1U) })), + framework::dataset::concat(combine(datasets::Small1x3Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 2U), Size2D(1U, 4U) })), + combine(datasets::Small5x5Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 4U) }))))); + +const auto SmallWinogradFilterTransformDatasetNHWC = + framework::dataset::concat(combine(datasets::Small3x3Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 4U) })), + combine(datasets::Small5x5Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 4U) }))); + +const auto LargeWinogradFilterTransformDatasetNCHW = + framework::dataset::concat(combine(datasets::Large3x3Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 2U), Size2D(4U, 4U) })), + framework::dataset::concat(combine(datasets::Large3x1Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 1U), Size2D(4U, 1U) })), + framework::dataset::concat(combine(datasets::Large1x3Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 2U), Size2D(1U, 4U) })), + combine(datasets::Large5x5Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 4U) }))))); + +const auto LargeWinogradFilterTransformDatasetNHWC = + framework::dataset::concat(combine(datasets::Large3x3Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 4U) })), + combine(datasets::Large5x5Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 4U) }))); + +// Output transform +const auto SmallWinogradOutputTransformDatasetNCHW = datasets::SmallWinogradOutputTransformDatasetNCHW(); + +const auto SmallWinogradOutputTransformDatasetNHWC = datasets::SmallWinogradOutputTransformDatasetNHWC(); + +const auto LargeWinogradOutputTransformDatasetNCHW = datasets::LargeWinogradOutputTransformDatasetNCHW(); + +const auto LargeWinogradOutputTransformDatasetNHWC = datasets::LargeWinogradOutputTransformDatasetNHWC(); } // namespace using namespace arm_compute::misc::shape_calculator; @@ -65,9 +120,6 @@ TEST_SUITE(CL) TEST_SUITE(Winograd) TEST_SUITE(InputTransform) - -// *INDENT-OFF* -// clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( framework::dataset::make("InputInfo",{ TensorInfo(TensorShape(53U, 21U, 5U, 3U), 1, DataType::F16), // F16 not supported @@ -101,17 +153,20 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( { ARM_COMPUTE_EXPECT(bool(CLWinogradInputTransform::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), winograd_info)) == expected, framework::LogLevel::ERRORS); } -// clang-format on -// *INDENT-ON* using CLWinogradInputTransformFixture = WinogradInputTransformValidationFixture<CLTensor, CLAccessor, CLWinogradInputTransform, float>; -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(SmallWinogradInputTransformDataset, LargeWinogradInputTransformDataset), +TEST_SUITE(NCHW) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(SmallWinogradInputTransformDatasetNCHW, + LargeWinogradInputTransformDatasetNCHW), framework::dataset::make("DataLayout", { DataLayout::NCHW })), - framework::dataset::make("DataType", { DataType::F32 })), + framework::dataset::make("DataType", { DataType::F32 })), shape_in, winograd_info, data_layout, data_type) { - TensorShape shape_out = compute_winograd_input_transform_shape(TensorInfo(shape_in, 1, data_type), winograd_info); + TensorInfo tensor_info_in(shape_in, 1, data_type); + tensor_info_in.set_data_layout(data_layout); + + TensorShape shape_out = compute_winograd_input_transform_shape(tensor_info_in, winograd_info); // Create tensors CLTensor in = create_tensor<CLTensor>(shape_in, data_type, 1, 0, QuantizationInfo(), data_layout); @@ -127,28 +182,70 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(frame winograd_input_transform.configure(&in, &out, winograd_info); } -FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradInputTransformFixture, framework::DatasetMode::PRECOMMIT, combine(framework::dataset::concat(combine(SmallWinogradInputTransformDataset, - framework::dataset::make("DataLayout", { DataLayout::NCHW })), - combine(framework::dataset::concat(datasets::SmallWinogradInputTransformDataset4x4_3x3(), datasets::SmallWinogradInputTransformDataset4x4_5x5()), - framework::dataset::make("DataLayout", { DataLayout::NHWC }))), - framework::dataset::make("DataType", { DataType::F32 }))) +FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradInputTransformFixture, framework::DatasetMode::PRECOMMIT, combine(combine(SmallWinogradInputTransformDatasetNCHW, + framework::dataset::make("DataLayout", { DataLayout::NCHW })), + framework::dataset::make("DataType", { DataType::F32 }))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradInputTransformFixture, framework::DatasetMode::NIGHTLY, combine(combine(LargeWinogradInputTransformDatasetNCHW, + framework::dataset::make("DataLayout", { DataLayout::NCHW })), + framework::dataset::make("DataType", { DataType::F32 }))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} +TEST_SUITE_END() // NCHW + +TEST_SUITE(NHWC) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(SmallWinogradInputTransformDatasetNHWC, + LargeWinogradInputTransformDatasetNHWC), + framework::dataset::make("DataLayout", { DataLayout::NHWC })), + framework::dataset::make("DataType", { DataType::F32 })), + shape_in, winograd_info, data_layout, data_type) +{ + TensorShape shape_in_nhwc(shape_in); + + // Convert the shape to NHWC + permute(shape_in_nhwc, PermutationVector(2U, 0U, 1U)); + + // TensorInfo + TensorInfo tensor_info_in(shape_in_nhwc, 1, data_type); + tensor_info_in.set_data_layout(data_layout); + + TensorShape shape_out = compute_winograd_input_transform_shape(tensor_info_in, winograd_info); + + // Create tensors + CLTensor in = create_tensor<CLTensor>(shape_in_nhwc, data_type, 1, 0, QuantizationInfo(), data_layout); + CLTensor out = create_tensor<CLTensor>(shape_out, data_type); + + ARM_COMPUTE_EXPECT(in.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(out.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Create and configure function + CLWinogradInputTransform winograd_input_transform; + + // Configure the function + winograd_input_transform.configure(&in, &out, winograd_info); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradInputTransformFixture, framework::DatasetMode::PRECOMMIT, combine(combine(SmallWinogradInputTransformDatasetNHWC, + framework::dataset::make("DataLayout", { DataLayout::NHWC })), + framework::dataset::make("DataType", { DataType::F32 }))) { validate(CLAccessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradInputTransformFixture, framework::DatasetMode::NIGHTLY, combine(framework::dataset::concat(combine(LargeWinogradInputTransformDataset, - framework::dataset::make("DataLayout", { DataLayout::NCHW })), - combine(framework::dataset::concat(datasets::LargeWinogradInputTransformDataset4x4_3x3(), datasets::LargeWinogradInputTransformDataset4x4_5x5()), - framework::dataset::make("DataLayout", { DataLayout::NHWC }))), - framework::dataset::make("DataType", { DataType::F32 }))) +FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradInputTransformFixture, framework::DatasetMode::NIGHTLY, combine(combine(LargeWinogradInputTransformDatasetNHWC, + framework::dataset::make("DataLayout", { DataLayout::NHWC })), + framework::dataset::make("DataType", { DataType::F32 }))) { validate(CLAccessor(_target), _reference, tolerance_f32); } +TEST_SUITE_END() // NHWC TEST_SUITE_END() // InputTransform TEST_SUITE(FilterTransform) -// *INDENT-OFF* -// clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( framework::dataset::make("InputInfo",{ TensorInfo(TensorShape(3U, 3U, 5U, 3U), 1, DataType::F16), // F16 not supported @@ -182,19 +279,19 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( { ARM_COMPUTE_EXPECT(bool(CLWinogradFilterTransformKernel::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), winograd_info)) == expected, framework::LogLevel::ERRORS); } -// clang-format on -// *INDENT-ON* using CLWinogradFilterTransform = CLSynthetizeFunctionWithZeroConstantBorder<CLWinogradFilterTransformKernel, 0>; using CLWinogradFilterTransformFixture = WinogradFilterTransformValidationFixture<CLTensor, CLAccessor, CLWinogradFilterTransform, float>; -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::concat(datasets::Small3x3Shapes(), datasets::Large3x3Shapes()), - framework::dataset::make("OutputTile", { Size2D(2U, 2U), Size2D(4U, 4U) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW })), - framework::dataset::make("DataType", { DataType::F32 })), +TEST_SUITE(NCHW) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, + combine(combine(framework::dataset::concat(SmallWinogradFilterTransformDatasetNCHW, + LargeWinogradFilterTransformDatasetNCHW), + framework::dataset::make("DataLayout", { DataLayout::NCHW })), + framework::dataset::make("DataType", { DataType::F32 })), shape_a, output_tile, data_layout, data_type) { - WinogradInfo winograd_info(output_tile, Size2D(shape_a[0], shape_a[1]), Size2D() /* Not needed */, PadStrideInfo() /* Not needed */, DataLayout::NCHW /* Not needed */); + WinogradInfo winograd_info(output_tile, Size2D(shape_a[0], shape_a[1]), Size2D() /* Not needed */, PadStrideInfo() /* Not needed */, data_layout /* Not needed */); TensorShape shape_b = compute_winograd_filter_transform_shape(TensorInfo(shape_a, 1, data_type), winograd_info); @@ -210,37 +307,79 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combi winograd_filter_transform.configure(&a, &b, winograd_info); } -FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradFilterTransformFixture, framework::DatasetMode::ALL, - combine(framework::dataset::concat(combine(framework::dataset::concat(framework::dataset::concat(combine(datasets::Small3x3Shapes(), framework::dataset::make("OutputTile", Size2D(2U, 2U))), - combine(datasets::Small3x3Shapes(), - framework::dataset::make("OutputTile", Size2D(4U, 4U)))), - combine(datasets::Small5x5Shapes(), framework::dataset::make("OutputTile", Size2D(4U, 4U)))), - framework::dataset::make("DataLayout", { DataLayout::NCHW })), - combine(combine(framework::dataset::concat(datasets::Small3x3Shapes(), datasets::Small5x5Shapes()), framework::dataset::make("OutputTile", Size2D(4U, 4U))), framework::dataset::make("DataLayout", { DataLayout::NHWC }))), - framework::dataset::make("DataType", { DataType::F32 }))) +FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradFilterTransformFixture, framework::DatasetMode::PRECOMMIT, + combine(combine(SmallWinogradFilterTransformDatasetNCHW, + framework::dataset::make("DataLayout", { DataLayout::NCHW })), + framework::dataset::make("DataType", { DataType::F32 }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f32); } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradFilterTransformFixture, framework::DatasetMode::NIGHTLY, - combine(framework::dataset::concat(combine(framework::dataset::concat(framework::dataset::concat(combine(datasets::Large3x3Shapes(), framework::dataset::make("OutputTile", Size2D(2U, 2U))), - combine(datasets::Large3x3Shapes(), - framework::dataset::make("OutputTile", Size2D(4U, 4U)))), - combine(datasets::Large5x5Shapes(), framework::dataset::make("OutputTile", Size2D(4U, 4U)))), - framework::dataset::make("DataLayout", { DataLayout::NCHW })), - combine(combine(framework::dataset::concat(datasets::Large3x3Shapes(), datasets::Large5x5Shapes()), framework::dataset::make("OutputTile", Size2D(4U, 4U))), framework::dataset::make("DataLayout", { DataLayout::NHWC }))), - framework::dataset::make("DataType", { DataType::F32 }))) + combine(combine(LargeWinogradFilterTransformDatasetNCHW, + framework::dataset::make("DataLayout", { DataLayout::NCHW })), + framework::dataset::make("DataType", { DataType::F32 }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f32); } +TEST_SUITE_END() // NCHW + +TEST_SUITE(NHWC) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, + combine(combine(framework::dataset::concat(SmallWinogradFilterTransformDatasetNHWC, + LargeWinogradFilterTransformDatasetNHWC), + framework::dataset::make("DataLayout", { DataLayout::NHWC })), + framework::dataset::make("DataType", { DataType::F32 })), + shape_in, output_tile, data_layout, data_type) +{ + TensorShape shape_in_nhwc(shape_in); + + // Convert the shape to NHWC + permute(shape_in_nhwc, PermutationVector(2U, 0U, 1U)); + + // TensorInfo + TensorInfo tensor_info_in(shape_in_nhwc, 1, data_type); + tensor_info_in.set_data_layout(data_layout); + + WinogradInfo winograd_info(output_tile, Size2D(shape_in[0], shape_in[1]), Size2D() /* Not needed */, PadStrideInfo() /* Not needed */, data_layout /* Not needed */); + + TensorShape shape_b = compute_winograd_filter_transform_shape(tensor_info_in, winograd_info); + + // Create tensors + CLTensor a = create_tensor<CLTensor>(shape_in_nhwc, data_type, 1, 0, QuantizationInfo(), data_layout); + CLTensor b = create_tensor<CLTensor>(shape_b, data_type, 1, 0, QuantizationInfo(), data_layout); + + ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS); + // Create and configure function + CLWinogradFilterTransform winograd_filter_transform; + winograd_filter_transform.configure(&a, &b, winograd_info); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradFilterTransformFixture, framework::DatasetMode::PRECOMMIT, + combine(combine(SmallWinogradFilterTransformDatasetNHWC, + framework::dataset::make("DataLayout", { DataLayout::NHWC })), + framework::dataset::make("DataType", { DataType::F32 }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradFilterTransformFixture, framework::DatasetMode::NIGHTLY, + combine(combine(LargeWinogradFilterTransformDatasetNHWC, + framework::dataset::make("DataLayout", { DataLayout::NHWC })), + framework::dataset::make("DataType", { DataType::F32 }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} +TEST_SUITE_END() // NHWC TEST_SUITE_END() // FilterTransform TEST_SUITE(OutputTransform) -// *INDENT-OFF* -// clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( framework::dataset::make("InputInfo",{ TensorInfo(TensorShape(512U, 49U, 16U, 5U), 1, DataType::F16), // F16 not supported @@ -291,14 +430,14 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( { ARM_COMPUTE_EXPECT(bool(CLWinogradOutputTransformKernel::validate(&input_info.clone()->set_is_resizable(false), &bias_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), winograd_info)) == expected, framework::LogLevel::ERRORS); } -// clang-format on -// *INDENT-ON* using CLWinogradOutputTransform = CLSynthetizeFunctionWithZeroConstantBorder<CLWinogradOutputTransformKernel, 0>; using CLWinogradOutputTransformFixture = WinogradOutputTransformValidationFixture<CLTensor, CLAccessor, CLWinogradOutputTransform, float>; -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallWinogradOutputTransformDataset(), datasets::LargeWinogradOutputTransformDataset()), - framework::dataset::make("DataType", { DataType::F32 })), +TEST_SUITE(NCHW) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(SmallWinogradOutputTransformDatasetNCHW, + LargeWinogradOutputTransformDatasetNCHW), + framework::dataset::make("DataType", { DataType::F32 })), shape_a, winograd_info, data_type) { TensorShape shape_b = compute_winograd_output_transform_shape(TensorInfo(shape_a, 1, data_type), winograd_info); @@ -315,23 +454,62 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::da winograd_output_transform.configure(&a, nullptr, &b, winograd_info); } -FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradOutputTransformFixture, framework::DatasetMode::ALL, combine(datasets::SmallWinogradOutputTransformDataset(), framework::dataset::make("DataType", { DataType::F32 }))) +FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradOutputTransformFixture, framework::DatasetMode::ALL, + combine(SmallWinogradOutputTransformDatasetNCHW, + framework::dataset::make("DataType", { DataType::F32 }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradOutputTransformFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeWinogradOutputTransformDataset(), framework::dataset::make("DataType", { DataType::F32 }))) +FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradOutputTransformFixture, framework::DatasetMode::NIGHTLY, + combine(LargeWinogradOutputTransformDatasetNCHW, + framework::dataset::make("DataType", { DataType::F32 }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f32); } +TEST_SUITE_END() // NCHW +TEST_SUITE(NHWC) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(SmallWinogradOutputTransformDatasetNHWC, + LargeWinogradOutputTransformDatasetNHWC), + framework::dataset::make("DataType", { DataType::F32 })), + shape_a, winograd_info, data_type) +{ + TensorShape shape_b = compute_winograd_output_transform_shape(TensorInfo(shape_a, 1, data_type), winograd_info); + + // Create tensors + CLTensor a = create_tensor<CLTensor>(shape_a, data_type); + CLTensor b = create_tensor<CLTensor>(shape_b, data_type, 1, 0, QuantizationInfo(), winograd_info.output_data_layout); + + ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Create and configure function + CLWinogradOutputTransform winograd_output_transform; + winograd_output_transform.configure(&a, nullptr, &b, winograd_info); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradOutputTransformFixture, framework::DatasetMode::ALL, + combine(SmallWinogradOutputTransformDatasetNHWC, + framework::dataset::make("DataType", { DataType::F32 }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradOutputTransformFixture, framework::DatasetMode::NIGHTLY, + combine(LargeWinogradOutputTransformDatasetNHWC, + framework::dataset::make("DataType", { DataType::F32 }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} +TEST_SUITE_END() // NHWC TEST_SUITE_END() // OutputTransform TEST_SUITE(ConvolutionLayer) -// *INDENT-OFF* -// clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( framework::dataset::make("InputInfo", { TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F16), // FP16 not supported @@ -373,16 +551,14 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( { ARM_COMPUTE_EXPECT(bool(CLWinogradConvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &bias_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info)) == expected, framework::LogLevel::ERRORS); } -// clang-format on -// *INDENT-ON* using CLWinogradConvolutionLayerFastMathFixture = WinogradConvolutionLayerFastMathValidationFixture<CLTensor, CLAccessor, CLWinogradConvolutionLayer, float>; TEST_SUITE(Conv3x3) FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(), framework::dataset::make("DataType", { DataType::F32 })), - framework::dataset::make("ActivationLayerInfo", { ActivationLayerInfo() })), - framework::dataset::make("DataLayout", { DataLayout::NCHW }))) + framework::dataset::make("ActivationLayerInfo", { ActivationLayerInfo() })), + framework::dataset::make("DataLayout", { DataLayout::NCHW }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); @@ -391,20 +567,64 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, fram FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(), framework::dataset::make("DataType", { DataType::F32 })), - framework::dataset::make("ActivationLayerInfo", { ActivationLayerInfo() })), - framework::dataset::make("DataLayout", { DataLayout::NCHW }))) + framework::dataset::make("ActivationLayerInfo", { ActivationLayerInfo() })), + framework::dataset::make("DataLayout", { DataLayout::NCHW }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); } TEST_SUITE_END() // Conv3x3 +TEST_SUITE(Conv3x1) +FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x1Dataset(), + framework::dataset::make("DataType", { DataType::F32 })), + framework::dataset::make("ActivationLayerInfo", { ActivationLayerInfo() })), + framework::dataset::make("DataLayout", { DataLayout::NCHW }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY, + combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x1Dataset(), + framework::dataset::make("DataType", { DataType::F32 })), + framework::dataset::make("ActivationLayerInfo", { ActivationLayerInfo() })), + framework::dataset::make("DataLayout", { DataLayout::NCHW }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); +} +TEST_SUITE_END() // Conv3x1 + +TEST_SUITE(Conv1x3) +FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x3Dataset(), + framework::dataset::make("DataType", { DataType::F32 })), + framework::dataset::make("ActivationLayerInfo", { ActivationLayerInfo() })), + framework::dataset::make("DataLayout", { DataLayout::NCHW }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY, + combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x3Dataset(), + framework::dataset::make("DataType", { DataType::F32 })), + framework::dataset::make("ActivationLayerInfo", { ActivationLayerInfo() })), + framework::dataset::make("DataLayout", { DataLayout::NCHW }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); +} +TEST_SUITE_END() // Conv1x3 + TEST_SUITE(Conv5x5) FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x5Dataset(), framework::dataset::make("DataType", { DataType::F32 })), - framework::dataset::make("ActivationLayerInfo", { ActivationLayerInfo() })), - framework::dataset::make("DataLayout", { DataLayout::NCHW }))) + framework::dataset::make("ActivationLayerInfo", { ActivationLayerInfo() })), + framework::dataset::make("DataLayout", { DataLayout::NCHW }))) { // Validate output @@ -414,8 +634,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, fram FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x5Dataset(), framework::dataset::make("DataType", { DataType::F32 })), - framework::dataset::make("ActivationLayerInfo", { ActivationLayerInfo() })), - framework::dataset::make("DataLayout", { DataLayout::NCHW }))) + framework::dataset::make("ActivationLayerInfo", { ActivationLayerInfo() })), + framework::dataset::make("DataLayout", { DataLayout::NCHW }))) { // Validate output @@ -424,7 +644,6 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, fram TEST_SUITE_END() // Conv5x5 TEST_SUITE_END() // ConvolutionLayer - TEST_SUITE_END() // Winograd TEST_SUITE_END() // CL } // namespace validation diff --git a/tests/validation/Helpers.cpp b/tests/validation/Helpers.cpp index e2415a203e..ff69b1c4b6 100644 --- a/tests/validation/Helpers.cpp +++ b/tests/validation/Helpers.cpp @@ -215,7 +215,7 @@ void transpose_matrix(const SimpleTensor<float> &in, SimpleTensor<float> &out) template <typename T> void get_tile(const SimpleTensor<T> &in, SimpleTensor<T> &tile, const Coordinates &coord) { - ARM_COMPUTE_ERROR_ON(tile.shape().num_dimensions() != 2); + ARM_COMPUTE_ERROR_ON(tile.shape().num_dimensions() > 2); const int w_tile = tile.shape()[0]; const int h_tile = tile.shape()[1]; @@ -272,7 +272,36 @@ void get_tile(const SimpleTensor<T> &in, SimpleTensor<T> &tile, const Coordinate } } +template <typename T> +void zeros(SimpleTensor<T> &in, const Coordinates &anchor, const TensorShape &shape) +{ + ARM_COMPUTE_ERROR_ON(anchor.num_dimensions() != shape.num_dimensions()); + ARM_COMPUTE_ERROR_ON(in.shape().num_dimensions() > 2); + ARM_COMPUTE_ERROR_ON(shape.num_dimensions() > 2); + + // Check if with the dimensions greater than 2 we could have out-of-bound reads + for(size_t d = 0; d < Coordinates::num_max_dimensions; ++d) + { + if(anchor[d] < 0 || ((anchor[d] + shape[d]) > in.shape()[d])) + { + ARM_COMPUTE_ERROR("anchor[d] < 0 || (anchor[d] + shape[d]) > in.shape()[d]"); + } + } + + // Get input pointer + auto in_ptr = static_cast<T *>(in(anchor[0] + anchor[1] * in.shape()[0])); + + const unsigned int n = in.shape()[0]; + + for(unsigned int y = 0; y < shape[1]; ++y) + { + std::fill(in_ptr, in_ptr + shape[0], 0); + in_ptr += n; + } +} + template void get_tile(const SimpleTensor<float> &in, SimpleTensor<float> &roi, const Coordinates &coord); +template void zeros(SimpleTensor<float> &in, const Coordinates &anchor, const TensorShape &shape); } // namespace validation } // namespace test } // namespace arm_compute diff --git a/tests/validation/Helpers.h b/tests/validation/Helpers.h index 49432d693e..88262d5e66 100644 --- a/tests/validation/Helpers.h +++ b/tests/validation/Helpers.h @@ -259,6 +259,15 @@ void transpose_matrix(const SimpleTensor<float> &in, SimpleTensor<float> &out); */ template <typename T> void get_tile(const SimpleTensor<T> &in, SimpleTensor<T> &tile, const Coordinates &coord); + +/** Fill with zeros the input tensor in the area defined by anchor and shape + * + * @param[in] in Input tensor to fill with zeros + * @param[out] anchor Starting point of the zeros area + * @param[in] shape Ending point of the zeros area + */ +template <typename T> +void zeros(SimpleTensor<T> &in, const Coordinates &anchor, const TensorShape &shape); } // namespace validation } // namespace test } // namespace arm_compute diff --git a/tests/validation/fixtures/WinogradConvolutionLayerFixture.h b/tests/validation/fixtures/WinogradConvolutionLayerFixture.h index aca24f13ae..ac168ebe3c 100644 --- a/tests/validation/fixtures/WinogradConvolutionLayerFixture.h +++ b/tests/validation/fixtures/WinogradConvolutionLayerFixture.h @@ -259,7 +259,18 @@ protected: fill(bias, 2, 0.f, 0.f); } - WinogradInfo winograd_info(Size2D(4U, 4U), + // Set output tile + Size2D output_tile(4U, 4U); + if(weights_shape[0] == 1) + { + output_tile.width = 1; + } + else if(weights_shape[1] == 1) + { + output_tile.height = 1; + } + + WinogradInfo winograd_info(output_tile, Size2D(weights_shape[0], weights_shape[1]), Size2D(input_shape[0], input_shape[1]), info, diff --git a/tests/validation/reference/Winograd.cpp b/tests/validation/reference/Winograd.cpp index 197d218129..5be4fe274b 100644 --- a/tests/validation/reference/Winograd.cpp +++ b/tests/validation/reference/Winograd.cpp @@ -29,6 +29,7 @@ #include "arm_compute/core/Types.h" #include <algorithm> +#include <cmath> namespace arm_compute { @@ -142,12 +143,24 @@ void initialize_matrix_transform(SimpleTensor<T> &src, const Size2D &output_tile { { WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3), WinogradTransformType::INPUT), imatrix2x2_3x3 }, { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3), WinogradTransformType::INPUT), imatrix4x4_3x3 }, + { WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(3, 1), WinogradTransformType::INPUT), imatrix2x2_3x3 }, + { WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(3, 1), WinogradTransformType::INPUT), imatrix4x4_3x3 }, + { WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 3), WinogradTransformType::INPUT), imatrix2x2_3x3 }, + { WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3), WinogradTransformType::INPUT), imatrix4x4_3x3 }, { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5), WinogradTransformType::INPUT), imatrix4x4_5x5 }, { WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3), WinogradTransformType::FILTER), fmatrix2x2_3x3 }, { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3), WinogradTransformType::FILTER), fmatrix4x4_3x3 }, + { WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(3, 1), WinogradTransformType::FILTER), fmatrix2x2_3x3 }, + { WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(3, 1), WinogradTransformType::FILTER), fmatrix4x4_3x3 }, + { WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 3), WinogradTransformType::FILTER), fmatrix2x2_3x3 }, + { WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3), WinogradTransformType::FILTER), fmatrix4x4_3x3 }, { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5), WinogradTransformType::FILTER), fmatrix4x4_5x5 }, { WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3), WinogradTransformType::OUTPUT), omatrix2x2_3x3 }, { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3), WinogradTransformType::OUTPUT), omatrix4x4_3x3 }, + { WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(3, 1), WinogradTransformType::OUTPUT), omatrix2x2_3x3 }, + { WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(3, 1), WinogradTransformType::OUTPUT), omatrix4x4_3x3 }, + { WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 3), WinogradTransformType::OUTPUT), omatrix2x2_3x3 }, + { WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3), WinogradTransformType::OUTPUT), omatrix4x4_3x3 }, { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5), WinogradTransformType::OUTPUT), omatrix4x4_5x5 }, }; @@ -175,6 +188,20 @@ void initialize_matrix_transform(SimpleTensor<T> &src, const Size2D &output_tile } // namespace template <typename T> +void print_tile(SimpleTensor<T> &in) +{ + for(int y = 0; y < in.shape()[1]; y++) + { + for(int x = 0; x < in.shape()[0]; x++) + { + std::cout << in[x + y * in.shape()[0]] << " "; + } + + std::cout << std::endl; + } +} + +template <typename T> SimpleTensor<T> winograd_input_transform(const SimpleTensor<T> &in, const TensorShape &output_shape, const WinogradInfo &winograd_info) { ARM_COMPUTE_ERROR_ON(in.data_layout() != DataLayout::NCHW); @@ -189,7 +216,10 @@ SimpleTensor<T> winograd_input_transform(const SimpleTensor<T> &in, const Tensor const unsigned int tile_w = output_tile_size.width + kernel_size.width - 1; const unsigned int tile_h = output_tile_size.height + kernel_size.height - 1; - TensorShape tile_dims(tile_w, tile_h); + // Get the maximum dimension from the tile size + const unsigned int tile_max_dim = std::max(tile_w, tile_h); + + TensorShape tile_dims(tile_max_dim, tile_max_dim); // Simple tensor for the input tile SimpleTensor<T> src_tile{ tile_dims, in.data_type() }; @@ -217,11 +247,46 @@ SimpleTensor<T> winograd_input_transform(const SimpleTensor<T> &in, const Tensor const int in_d = in.shape().z(); const int out_d = out.shape().z(); const int num_batches = in.shape().total_size() / (in_w * in_h * in_d); - const int num_tiles_x = std::ceil((in_w - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right()) / static_cast<float>(output_tile_size.width)); - const int num_tiles_y = std::ceil((in_h - (kernel_size.height - 1) + conv_info.pad_top() + conv_info.pad_bottom()) / static_cast<float>(output_tile_size.height)); const int step_x = output_tile_size.width; const int step_y = output_tile_size.height; + // Compute the number of output tiles along the x and y direction of size "output_tile_size" + const Size2D num_tiles = compute_winograd_convolution_tiles(Size2D(in_w, in_h), + kernel_size, + output_tile_size, + conv_info); + + const int num_tiles_x = num_tiles.width; + const int num_tiles_y = num_tiles.height; + + // In case of 1D convolution, the input tile has to be partially filled with zeros + int start_x_zero = 0; + int start_y_zero = 0; + int end_x_zero = 0; + int end_y_zero = 0; + + if(output_tile_size.width == 1) + { + start_x_zero = 1; + start_y_zero = 0; + end_x_zero = tile_max_dim - 1; + end_y_zero = tile_max_dim; + } + else if(output_tile_size.height == 1) + { + start_x_zero = 0; + start_y_zero = 1; + end_x_zero = tile_max_dim; + end_y_zero = tile_max_dim - 1; + } + + // Set the anchor and shape of the zeros area + const Coordinates anchor_zeros(start_x_zero, start_y_zero); + const TensorShape shape_zeros(end_x_zero, end_y_zero); + + // If we have a vertical filter (i.e. 1x3, 1x5,..), we need to take the elements along the y direction (step = width of the output tile) + const int step_y_transf_tile = kernel_size.width == 1 ? tile_max_dim : 1; + ARM_COMPUTE_ERROR_ON((num_tiles_x * num_tiles_y) != static_cast<int>(out.shape().y())); for(int b = 0; b < num_batches; ++b) @@ -238,6 +303,9 @@ SimpleTensor<T> winograd_input_transform(const SimpleTensor<T> &in, const Tensor // Get the tile from the input tensor get_tile(in, src_tile, Coordinates(xi, yi, z, b)); + // Fill partially with zeros in case of 1D convolution + zeros(src_tile, anchor_zeros, shape_zeros); + // Compute the transformation matrix_multiply(matrix, src_tile, tmp_tile); matrix_multiply(tmp_tile, matrix_transposed, dst_tile); @@ -247,7 +315,7 @@ SimpleTensor<T> winograd_input_transform(const SimpleTensor<T> &in, const Tensor { int xo = z; int yo = x + y * num_tiles_x; - out[coords2index(out.shape(), Coordinates(xo, yo, i, b))] = dst_tile[i]; + out[coords2index(out.shape(), Coordinates(xo, yo, i, b))] = dst_tile[i * step_y_transf_tile]; } } } @@ -268,27 +336,31 @@ SimpleTensor<T> winograd_filter_transform(const SimpleTensor<T> &in, const Tenso const Size2D output_tile_size = winograd_info.output_tile_size; const Size2D kernel_size = winograd_info.kernel_size; - TensorShape kernel_tile_dims(kernel_size.width, kernel_size.height); - // Calculate dimensions for the tile const unsigned int input_tile_w = output_tile_size.width + kernel_size.width - 1; const unsigned int input_tile_h = output_tile_size.height + kernel_size.height - 1; const unsigned int input_tile_area = input_tile_w * input_tile_h; + // Get the maximum dimension from the filter size + const unsigned int kernel_max_dim = std::max(kernel_size.width, kernel_size.height); + + // Get the maximum dimension from the input tile + const unsigned int input_tile_max_dim = std::max(input_tile_w, input_tile_h); + // Simple tensor for the input tile - SimpleTensor<T> input_tile{ kernel_tile_dims, in.data_type(), 1 }; + SimpleTensor<T> input_tile{ TensorShape(kernel_max_dim, kernel_max_dim), in.data_type(), 1 }; // Simple tensor for the transformation matrix - SimpleTensor<T> trans_matrix{ TensorShape(kernel_tile_dims[0], input_tile_w), in.data_type(), 1 }; + SimpleTensor<T> trans_matrix{ TensorShape(kernel_max_dim, input_tile_max_dim), in.data_type(), 1 }; // Simple tensor for the transformation matrix transpose - SimpleTensor<T> trans_matrix_transposed{ TensorShape(input_tile_w, kernel_tile_dims[0]), in.data_type(), 1 }; + SimpleTensor<T> trans_matrix_transposed{ TensorShape(input_tile_max_dim, kernel_max_dim), in.data_type(), 1 }; // Simple tensor for the temporary tile - SimpleTensor<T> tmp_tile{ TensorShape(kernel_tile_dims[0], input_tile_w), in.data_type(), 1 }; + SimpleTensor<T> tmp_tile{ TensorShape(kernel_max_dim, input_tile_max_dim), in.data_type(), 1 }; // Simple tensor for the output tile - SimpleTensor<T> transf_tile{ TensorShape(input_tile_w, input_tile_w), in.data_type(), 1 }; + SimpleTensor<T> transf_tile{ TensorShape(input_tile_max_dim, input_tile_max_dim), in.data_type(), 1 }; // Initialize matrix for the filter transform initialize_matrix_transform(trans_matrix, output_tile_size, kernel_size, WinogradTransformType::FILTER); @@ -300,6 +372,9 @@ SimpleTensor<T> winograd_filter_transform(const SimpleTensor<T> &in, const Tenso const int num_filters = in.shape()[3]; const int num_batches = in.shape().total_size() / (kernel_size.area() * num_channels * num_filters); + // If we have a vertical filter (i.e. 1x3, 1x5,..), we need to take the elements along the y direction (step_y_transf_tile = width of the output tile) + const int step_y_transf_tile = kernel_size.width == 1 ? input_tile_max_dim : 1; + for(int n = 0; n < num_batches; ++n) { for(int w = 0; w < num_filters; ++w) @@ -321,7 +396,7 @@ SimpleTensor<T> winograd_filter_transform(const SimpleTensor<T> &in, const Tenso // Store the values across the channels for(unsigned int i = 0; i < input_tile_area; ++i) { - out[output_offset + i * num_filters * num_channels] = transf_tile[i]; + out[output_offset + i * num_filters * num_channels] = transf_tile[i * step_y_transf_tile]; } } } @@ -350,15 +425,19 @@ SimpleTensor<T> winograd_output_transform(const SimpleTensor<T> &in, const Simpl ARM_COMPUTE_ERROR_ON(in.shape()[2] != (in_tile_w * in_tile_h)); ARM_COMPUTE_ERROR_ON(in.shape()[0] != out.shape()[get_data_layout_dimension_index(winograd_info.output_data_layout, DataLayoutDimension::CHANNEL)]); + // Get the maximum dimension from the tile size + const unsigned int in_tile_max_dim = std::max(in_tile_w, in_tile_h); + const unsigned int out_tile_max_dim = std::max(output_tile_size.width, output_tile_size.height); + // Compute tile dimensions // Input tile dimensions - TensorShape in_tile_dims(in_tile_w, in_tile_h); + TensorShape in_tile_dims(in_tile_max_dim, in_tile_max_dim); // Output tile dimensions - TensorShape out_tile_dims(output_tile_size.width, output_tile_size.height); + TensorShape out_tile_dims(out_tile_max_dim, out_tile_max_dim); // Transformation matrix dimensions - TensorShape tr_tile_dims(in_tile_w, output_tile_size.width); + TensorShape tr_tile_dims(in_tile_max_dim, out_tile_max_dim); // Create tensors // Simple tensor for the input tile @@ -400,15 +479,24 @@ SimpleTensor<T> winograd_output_transform(const SimpleTensor<T> &in, const Simpl const int stridez_out = stridey_out * h_out; const int stridew_out = stridez_out * c_out; - // Compute number of elements to process in the X and Y direction - const int num_elements_x = input_dimensions.width - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right(); - const int num_elements_y = input_dimensions.height - (kernel_size.height - 1) + conv_info.pad_top() + conv_info.pad_bottom(); - const int num_tiles_x = std::ceil(num_elements_x / static_cast<float>(output_tile_size.width)); - const int num_tiles_y = std::ceil(num_elements_y / static_cast<float>(output_tile_size.height)); + // Compute the number of output tiles along the x and y direction of size "output_tile_size" + const Size2D num_tiles = compute_winograd_convolution_tiles(Size2D(input_dimensions.width, input_dimensions.height), + kernel_size, + output_tile_size, + conv_info); + + const int num_tiles_x = num_tiles.width; + const int num_tiles_y = num_tiles.height; ARM_COMPUTE_UNUSED(num_tiles_y); ARM_COMPUTE_ERROR_ON(in.shape()[1] != static_cast<unsigned int>(num_tiles_x * num_tiles_y)); + // If we have a vertical filter (i.e. 1x3, 1x5,..), we still need to take the elements along the x direction (step_y_transf_tile = 1) + const int step_y_transf_tile = kernel_size.width == 1 ? 1 : output_tile.shape()[0]; + + // Initialize with zeros the input tile + zeros(input_tile, Coordinates(0, 0), input_tile.shape()); + for(int n = 0; n < num_batches; ++n) { for(int y = 0; y < h_in; ++y) @@ -441,7 +529,7 @@ SimpleTensor<T> winograd_output_transform(const SimpleTensor<T> &in, const Simpl // Check out-of-bound writes if((xo + xi < w_out) && (yo + yi < h_out)) { - out[output_offset + yi * stridey_out + xi] = output_tile[xi + yi * out_tile_w]; + out[output_offset + yi * stridey_out + xi] = output_tile[xi + yi * step_y_transf_tile]; // Add bias out[output_offset + yi * stridey_out + xi] += b[zo]; |