From f1cf394ae882e6e8fb2e0986f88d2548b82a85bb Mon Sep 17 00:00:00 2001 From: Pablo Tello Date: Thu, 19 Sep 2019 16:39:04 +0100 Subject: COMPMID-2575 Implement Winograd 7x1/1x7 with FP16 Change-Id: I3851418bba75fb0cb8c244c88828af019008067a Signed-off-by: giuros01 Reviewed-on: https://review.mlplatform.org/c/1984 Reviewed-by: Pablo Marquez Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- tests/datasets/WinogradOutputTransformDataset.h | 48 ++++++++------- tests/validation/CL/Winograd.cpp | 82 ++++++++++++++++--------- 2 files changed, 78 insertions(+), 52 deletions(-) (limited to 'tests') diff --git a/tests/datasets/WinogradOutputTransformDataset.h b/tests/datasets/WinogradOutputTransformDataset.h index fe0b6e7a09..d15a16e8b7 100644 --- a/tests/datasets/WinogradOutputTransformDataset.h +++ b/tests/datasets/WinogradOutputTransformDataset.h @@ -222,22 +222,6 @@ public: add_config(TensorShape(7U, 16U, 8U, 3U), WinogradInfo(Size2D(1U, 4U), Size2D(1U, 5U), Size2D(8U, 10U), PadStrideInfo(1, 1, 0, 0), DataLayout::NHWC)); add_config(TensorShape(24U, 42U, 8U, 2U), WinogradInfo(Size2D(1U, 4U), Size2D(1U, 5U), Size2D(14U, 14U), PadStrideInfo(1, 1, 0, 1), DataLayout::NHWC)); add_config(TensorShape(7U, 24U, 8U, 5U), WinogradInfo(Size2D(1U, 4U), Size2D(1U, 5U), Size2D(8U, 10U), PadStrideInfo(1, 1, 0, 2), DataLayout::NHWC)); - } -}; - -class SmallWinogradOutputTransformDatasetNHWC_F32 : public SmallWinogradOutputTransformDatasetNHWC_F16 -{ -public: - SmallWinogradOutputTransformDatasetNHWC_F32() - : SmallWinogradOutputTransformDatasetNHWC_F16() - { - // (2x2, 7x7) - add_config(TensorShape(13U, 4U, 64U), WinogradInfo(Size2D(2U, 2U), Size2D(7U, 7U), Size2D(9U, 9U), PadStrideInfo(1, 1, 0, 0), DataLayout::NHWC)); - add_config(TensorShape(7U, 6U, 64U), WinogradInfo(Size2D(2U, 2U), Size2D(7U, 7U), Size2D(10U, 11U), PadStrideInfo(1, 1, 0, 0), DataLayout::NHWC)); - add_config(TensorShape(5U, 360U, 64U), WinogradInfo(Size2D(2U, 2U), Size2D(7U, 7U), Size2D(53U, 33U), PadStrideInfo(1, 1, 0, 1), DataLayout::NHWC)); - add_config(TensorShape(7U, 2U, 64U, 3U), WinogradInfo(Size2D(2U, 2U), Size2D(7U, 7U), Size2D(8U, 10U), PadStrideInfo(1, 1, 0, 0), DataLayout::NHWC)); - add_config(TensorShape(24U, 25U, 64U, 2U), WinogradInfo(Size2D(2U, 2U), Size2D(7U, 7U), Size2D(14U, 14U), PadStrideInfo(1, 1, 1, 1), DataLayout::NHWC)); - add_config(TensorShape(7U, 2U, 64U, 5U), WinogradInfo(Size2D(2U, 2U), Size2D(7U, 7U), Size2D(8U, 10U), PadStrideInfo(1, 1, 0, 0), DataLayout::NHWC)); // (2x1, 7x1) add_config(TensorShape(13U, 18U, 8U), WinogradInfo(Size2D(2U, 1U), Size2D(7U, 1U), Size2D(9U, 9U), PadStrideInfo(1, 1, 0, 0), DataLayout::NHWC)); @@ -257,6 +241,22 @@ public: } }; +class SmallWinogradOutputTransformDatasetNHWC_F32 : public SmallWinogradOutputTransformDatasetNHWC_F16 +{ +public: + SmallWinogradOutputTransformDatasetNHWC_F32() + : SmallWinogradOutputTransformDatasetNHWC_F16() + { + // (2x2, 7x7) + add_config(TensorShape(13U, 4U, 64U), WinogradInfo(Size2D(2U, 2U), Size2D(7U, 7U), Size2D(9U, 9U), PadStrideInfo(1, 1, 0, 0), DataLayout::NHWC)); + add_config(TensorShape(7U, 6U, 64U), WinogradInfo(Size2D(2U, 2U), Size2D(7U, 7U), Size2D(10U, 11U), PadStrideInfo(1, 1, 0, 0), DataLayout::NHWC)); + add_config(TensorShape(5U, 360U, 64U), WinogradInfo(Size2D(2U, 2U), Size2D(7U, 7U), Size2D(53U, 33U), PadStrideInfo(1, 1, 0, 1), DataLayout::NHWC)); + add_config(TensorShape(7U, 2U, 64U, 3U), WinogradInfo(Size2D(2U, 2U), Size2D(7U, 7U), Size2D(8U, 10U), PadStrideInfo(1, 1, 0, 0), DataLayout::NHWC)); + add_config(TensorShape(24U, 25U, 64U, 2U), WinogradInfo(Size2D(2U, 2U), Size2D(7U, 7U), Size2D(14U, 14U), PadStrideInfo(1, 1, 1, 1), DataLayout::NHWC)); + add_config(TensorShape(7U, 2U, 64U, 5U), WinogradInfo(Size2D(2U, 2U), Size2D(7U, 7U), Size2D(8U, 10U), PadStrideInfo(1, 1, 0, 0), DataLayout::NHWC)); + } +}; + class LargeWinogradOutputTransformDatasetNCHW : public WinogradOutputTransformDataset { public: @@ -376,14 +376,7 @@ public: add_config(TensorShape(13U, 784U, 8U), WinogradInfo(Size2D(1U, 4U), Size2D(1U, 5U), Size2D(56U, 56U), PadStrideInfo(1, 1, 0, 1), DataLayout::NHWC)); add_config(TensorShape(32U, 3024U, 8U, 2U), WinogradInfo(Size2D(1U, 4U), Size2D(1U, 5U), Size2D(112U, 112U), PadStrideInfo(1, 1, 0, 0), DataLayout::NHWC)); add_config(TensorShape(13U, 784U, 8U, 5U), WinogradInfo(Size2D(1U, 4U), Size2D(1U, 5U), Size2D(56U, 56U), PadStrideInfo(1, 1, 0, 1), DataLayout::NHWC)); - } -}; -class LargeWinogradOutputTransformDatasetNHWC_F32 : public LargeWinogradOutputTransformDatasetNHWC_F16 -{ -public: - LargeWinogradOutputTransformDatasetNHWC_F32() - { // (2x1, 7x1) add_config(TensorShape(32U, 6160U, 8U), WinogradInfo(Size2D(2U, 1U), Size2D(7U, 1U), Size2D(112U, 112U), PadStrideInfo(1, 1, 2, 0), DataLayout::NHWC)); add_config(TensorShape(13U, 1456U, 8U), WinogradInfo(Size2D(2U, 1U), Size2D(7U, 1U), Size2D(56U, 56U), PadStrideInfo(1, 1, 1, 0), DataLayout::NHWC)); @@ -397,6 +390,15 @@ public: add_config(TensorShape(13U, 1456U, 8U, 5U), WinogradInfo(Size2D(1U, 2U), Size2D(1U, 7U), Size2D(56U, 56U), PadStrideInfo(1, 1, 0, 1), DataLayout::NHWC)); } }; + +class LargeWinogradOutputTransformDatasetNHWC_F32 : public LargeWinogradOutputTransformDatasetNHWC_F16 +{ +public: + LargeWinogradOutputTransformDatasetNHWC_F32() + : LargeWinogradOutputTransformDatasetNHWC_F16() + { + } +}; } // namespace datasets } // namespace test } // namespace arm_compute diff --git a/tests/validation/CL/Winograd.cpp b/tests/validation/CL/Winograd.cpp index 5894d7fabd..de8b8af5ef 100644 --- a/tests/validation/CL/Winograd.cpp +++ b/tests/validation/CL/Winograd.cpp @@ -61,7 +61,7 @@ const AbsoluteTolerance tolerance_convolution_layer_f16(half(0.4f)); RelativeTolerance rel_tolerance_f16(half(0.2)); /**< Tolerance value for comparing reference's output against implementation's output for FP16 data types */ constexpr float tolerance_num = 0.05f; /**< Tolerance number */ constexpr float abs_tolerance_convolution_layer_f16 = 2.5f; /**< Tolerance number */ -constexpr float tolerance_num_convolution_f16 = 0.15f; /**< Tolerance number */ +constexpr float tolerance_num_f16 = 0.15f; /**< Tolerance number */ // Input transform const auto SmallWinogradInputTransformDatasetNCHW = @@ -80,12 +80,12 @@ const auto SmallWinogradInputTransformDatasetNHWC = framework::dataset::concat(d framework::dataset::concat(datasets::SmallWinogradInputTransformDataset1x4_1x3(), framework::dataset::concat(datasets::SmallWinogradInputTransformDataset4x4_5x5(), framework::dataset::concat(datasets::SmallWinogradInputTransformDataset4x1_5x1(), - datasets::SmallWinogradInputTransformDataset1x4_1x5()))))); + framework::dataset::concat(datasets::SmallWinogradInputTransformDataset1x4_1x5(), + framework::dataset::concat(datasets::SmallWinogradInputTransformDataset2x1_7x1(), + datasets::SmallWinogradInputTransformDataset1x2_1x7()))))))); const auto SmallWinogradInputTransformDatasetNHWC_FP32 = framework::dataset::concat(SmallWinogradInputTransformDatasetNHWC, - framework::dataset::concat(datasets::SmallWinogradInputTransformDataset1x2_1x7(), - framework::dataset::concat(datasets::SmallWinogradInputTransformDataset2x1_7x1(), - datasets::SmallWinogradInputTransformDataset2x2_7x7()))); + datasets::SmallWinogradInputTransformDataset2x2_7x7()); const auto LargeWinogradInputTransformDatasetNCHW = framework::dataset::concat(datasets::LargeWinogradInputTransformDataset2x2_3x3(), @@ -96,7 +96,9 @@ const auto LargeWinogradInputTransformDatasetNCHW = framework::dataset::concat(datasets::LargeWinogradInputTransformDataset1x4_1x3(), framework::dataset::concat(datasets::LargeWinogradInputTransformDataset4x4_5x5(), framework::dataset::concat(datasets::LargeWinogradInputTransformDataset4x1_5x1(), - datasets::LargeWinogradInputTransformDataset1x4_1x5())))))))); + framework::dataset::concat(datasets::LargeWinogradInputTransformDataset1x4_1x5(), + framework::dataset::concat(datasets::LargeWinogradInputTransformDataset1x2_1x7(), + datasets::LargeWinogradInputTransformDataset2x1_7x1())))))))))); const auto LargeWinogradInputTransformDatasetNHWC = framework::dataset::concat(datasets::LargeWinogradInputTransformDataset4x4_3x3(), @@ -106,9 +108,7 @@ const auto LargeWinogradInputTransformDatasetNHWC = const auto LargeWinogradInputTransformDatasetNHWC_FP32 = framework::dataset::concat(LargeWinogradInputTransformDatasetNHWC, - framework::dataset::concat(datasets::LargeWinogradInputTransformDataset1x2_1x7(), - framework::dataset::concat(datasets::LargeWinogradInputTransformDataset2x1_7x1(), - (datasets::LargeWinogradInputTransformDataset2x2_7x7())))); + (datasets::LargeWinogradInputTransformDataset2x2_7x7())); // Filter transform const auto SmallWinogradFilterTransformDatasetNCHW = @@ -125,13 +125,13 @@ const auto SmallWinogradFilterTransformDatasetNHWC_F16 = framework::dataset::concat(combine(datasets::Small1x3Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 4U) })), framework::dataset::concat(combine(datasets::Small5x5Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 4U) })), framework::dataset::concat(combine(datasets::Small5x1Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 1U) })), - (combine(datasets::Small1x5Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 4U) })))))))); + framework::dataset::concat(combine(datasets::Small1x5Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 4U) })), + framework::dataset::concat(combine(datasets::Small1x7Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 2U) })), + combine(datasets::Small7x1Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 1U) }))))))))); const auto SmallWinogradFilterTransformDatasetNHWC_F32 = framework::dataset::concat(SmallWinogradFilterTransformDatasetNHWC_F16, - framework::dataset::concat(combine(datasets::Small7x7Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 2U) })), - framework::dataset::concat(combine(datasets::Small7x1Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 1U) })), - combine(datasets::Small1x7Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 2U) }))))); + combine(datasets::Small7x7Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 2U) }))); const auto LargeWinogradFilterTransformDatasetNCHW = framework::dataset::concat(combine(datasets::Large3x3Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 2U), Size2D(4U, 4U) })), @@ -147,13 +147,13 @@ const auto LargeWinogradFilterTransformDatasetNHWC_F16 = framework::dataset::concat(combine(datasets::Large1x3Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 4U) })), framework::dataset::concat(combine(datasets::Large5x5Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 4U) })), framework::dataset::concat(combine(datasets::Large5x1Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 1U) })), - combine(datasets::Large1x5Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 4U) }))))))); + framework::dataset::concat(combine(datasets::Large1x5Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 4U) })), + framework::dataset::concat(combine(datasets::Large7x1Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 1U) })), + combine(datasets::Large1x7Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 2U) }))))))))); const auto LargeWinogradFilterTransformDatasetNHWC_F32 = framework::dataset::concat(LargeWinogradFilterTransformDatasetNHWC_F16, - framework::dataset::concat(combine(datasets::Large7x7Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 2U) })), - framework::dataset::concat(combine(datasets::Large7x1Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 1U) })), - combine(datasets::Large1x7Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 2U) }))))); + combine(datasets::Large7x7Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 2U) }))); // Output transform const auto SmallWinogradOutputTransformDatasetNCHW = datasets::SmallWinogradOutputTransformDatasetNCHW(); @@ -269,14 +269,14 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradInputTransformFixtureFP16, framework: framework::dataset::make("DataLayout", { DataLayout::NHWC })), framework::dataset::make("DataType", { DataType::F16 }))) { - validate(CLAccessor(_target), _reference, tolerance_f16); + validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num_f16); } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradInputTransformFixtureFP16, framework::DatasetMode::NIGHTLY, combine(combine(LargeWinogradInputTransformDatasetNHWC, framework::dataset::make("DataLayout", { DataLayout::NHWC })), framework::dataset::make("DataType", { DataType::F16 }))) { - validate(CLAccessor(_target), _reference, tolerance_f16); + validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num_f16); } TEST_SUITE_END() // FP16 TEST_SUITE(FP32) @@ -385,7 +385,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradFilterTransformFixtureFP16, framework framework::dataset::make("DataType", { DataType::F16 }))) { // Validate output - validate(CLAccessor(_target), _reference, tolerance_f16); + validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num_f16); } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradFilterTransformFixtureFP16, framework::DatasetMode::NIGHTLY, @@ -394,7 +394,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradFilterTransformFixtureFP16, framework framework::dataset::make("DataType", { DataType::F16 }))) { // Validate output - validate(CLAccessor(_target), _reference, tolerance_f16); + validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num_f16); } TEST_SUITE_END() // FP16 TEST_SUITE(FP32) @@ -524,7 +524,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradOutputTransformFixtureFP16, framework framework::dataset::make("ActivationInfo",{ ActivationLayerInfo() }) )) { // Validate output - validate(CLAccessor(_target), _reference, tolerance_f16); + validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num_f16); } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradOutputTransformFixtureFP16, framework::DatasetMode::NIGHTLY, @@ -533,7 +533,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradOutputTransformFixtureFP16, framework framework::dataset::make("ActivationInfo",{ ActivationLayerInfo() }) )) { // Validate output - validate(CLAccessor(_target), _reference, tolerance_f16); + validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num_f16); } TEST_SUITE_END() // FP16 TEST_SUITE(FP32) @@ -754,7 +754,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, fr framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output - validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_convolution_f16); + validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16); } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY, @@ -776,7 +776,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, fr framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output - validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_convolution_f16); + validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16); } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY, @@ -798,7 +798,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, fr framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output - validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_convolution_f16); + validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16); } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY, @@ -821,7 +821,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, fr { // Validate output - validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_convolution_f16); + validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16); } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY, @@ -845,7 +845,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, fr { // Validate output - validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_convolution_f16); + validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16); } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY, @@ -869,7 +869,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, fr { // Validate output - validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_convolution_f16); + validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16); } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY, @@ -884,6 +884,30 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, fr } TEST_SUITE_END() // Conv1x5 +TEST_SUITE(Conv1x7) +FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x7Dataset(), + framework::dataset::make("DataType", { DataType::F16 })), + ActivationFunctionsSmallDataset), + framework::dataset::make("DataLayout", { DataLayout::NHWC }))) + +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY, + combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x7Dataset(), + framework::dataset::make("DataType", { DataType::F16 })), + ActivationFunctionsDataset), + framework::dataset::make("DataLayout", { DataLayout::NHWC }))) + +{ + // Validate output + validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16); +} +TEST_SUITE_END() // Conv1x7 + TEST_SUITE_END() // FP16 TEST_SUITE_END() // ConvolutionLayer -- cgit v1.2.1