From 916d1bcee42051721a82cfb46b52855c2fe56646 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Mon, 13 Aug 2018 11:20:41 +0100 Subject: COMPMID-1498 - Enable grouping in CLGEMMConvolutionLayer Change-Id: I15c7df21773145b03f42b6f78bd7ad2e5b8a5219 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/144126 Tested-by: Jenkins Reviewed-by: Giorgio Arena Reviewed-by: Georgios Pinitas --- tests/datasets/LargeConvolutionLayerDataset.h | 38 +++++-- tests/datasets/SmallConvolutionLayerDataset.h | 35 ++++++ tests/validation/CL/ConvolutionLayer.cpp | 117 +++++++++++++++++++-- tests/validation/CL/WeightsReshape.cpp | 4 +- .../validation/fixtures/ConvolutionLayerFixture.h | 17 ++- tests/validation/reference/ConvolutionLayer.cpp | 49 +++++---- tests/validation/reference/ConvolutionLayer.h | 2 +- 7 files changed, 218 insertions(+), 44 deletions(-) (limited to 'tests') diff --git a/tests/datasets/LargeConvolutionLayerDataset.h b/tests/datasets/LargeConvolutionLayerDataset.h index 3eb98dbeea..170d562f6c 100644 --- a/tests/datasets/LargeConvolutionLayerDataset.h +++ b/tests/datasets/LargeConvolutionLayerDataset.h @@ -166,31 +166,51 @@ public: // Batch size 1 add_config(TensorShape(227U, 227U, 3U), TensorShape(11U, 11U, 3U, 96U), TensorShape(96U), TensorShape(55U, 55U, 96U), PadStrideInfo(4, 4, 0, 0)); add_config(TensorShape(27U, 27U, 96U), TensorShape(5U, 5U, 96U, 256U), TensorShape(256U), TensorShape(27U, 27U, 256U), PadStrideInfo(1, 1, 2, 2)); - add_config(TensorShape(13U, 13U, 256U), TensorShape(3U, 3U, 256U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U), PadStrideInfo(1, 1, 1, 1)); - add_config(TensorShape(13U, 13U, 384U), TensorShape(3U, 3U, 384U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U), PadStrideInfo(1, 1, 1, 1)); - add_config(TensorShape(13U, 13U, 384U), TensorShape(3U, 3U, 384U, 256U), TensorShape(256U), TensorShape(13U, 13U, 256U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(13U, 13U, 256U), TensorShape(1U, 1U, 256U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(13U, 13U, 384U), TensorShape(1U, 1U, 384U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U), PadStrideInfo(1, 1, 0, 0)); add_config(TensorShape(224U, 224U, 3U), TensorShape(7U, 7U, 3U, 64U), TensorShape(64U), TensorShape(112U, 112U, 64U), PadStrideInfo(2, 2, 3, 3)); add_config(TensorShape(28U, 28U, 256U), TensorShape(1U, 1U, 256U, 64U), TensorShape(64U), TensorShape(28U, 28U, 64U), PadStrideInfo(1, 1, 0, 0)); // Batch size 4 add_config(TensorShape(227U, 227U, 3U, 4U), TensorShape(11U, 11U, 3U, 96U), TensorShape(96U), TensorShape(55U, 55U, 96U, 4U), PadStrideInfo(4, 4, 0, 0)); add_config(TensorShape(27U, 27U, 96U, 4U), TensorShape(5U, 5U, 96U, 256U), TensorShape(256U), TensorShape(27U, 27U, 256U, 4U), PadStrideInfo(1, 1, 2, 2)); - add_config(TensorShape(13U, 13U, 256U, 4U), TensorShape(3U, 3U, 256U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U, 4U), PadStrideInfo(1, 1, 1, 1)); - add_config(TensorShape(13U, 13U, 384U, 4U), TensorShape(3U, 3U, 384U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U, 4U), PadStrideInfo(1, 1, 1, 1)); - add_config(TensorShape(13U, 13U, 384U, 4U), TensorShape(3U, 3U, 384U, 256U), TensorShape(256U), TensorShape(13U, 13U, 256U, 4U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(13U, 13U, 256U, 4U), TensorShape(1U, 1U, 256U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U, 4U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(13U, 13U, 384U, 4U), TensorShape(1U, 1U, 384U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U, 4U), PadStrideInfo(1, 1, 0, 0)); add_config(TensorShape(224U, 224U, 3U, 4U), TensorShape(7U, 7U, 3U, 64U), TensorShape(64U), TensorShape(112U, 112U, 64U, 4U), PadStrideInfo(2, 2, 3, 3)); add_config(TensorShape(28U, 28U, 256U, 4U), TensorShape(1U, 1U, 256U, 64U), TensorShape(64U), TensorShape(28U, 28U, 64U, 4U), PadStrideInfo(1, 1, 0, 0)); // Batch size 8 add_config(TensorShape(227U, 227U, 3U, 8U), TensorShape(11U, 11U, 3U, 96U), TensorShape(96U), TensorShape(55U, 55U, 96U, 8U), PadStrideInfo(4, 4, 0, 0)); add_config(TensorShape(27U, 27U, 96U, 8U), TensorShape(5U, 5U, 96U, 256U), TensorShape(256U), TensorShape(27U, 27U, 256U, 8U), PadStrideInfo(1, 1, 2, 2)); - add_config(TensorShape(13U, 13U, 256U, 8U), TensorShape(3U, 3U, 256U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U, 8U), PadStrideInfo(1, 1, 1, 1)); - add_config(TensorShape(13U, 13U, 384U, 8U), TensorShape(3U, 3U, 384U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U, 8U), PadStrideInfo(1, 1, 1, 1)); - add_config(TensorShape(13U, 13U, 384U, 8U), TensorShape(3U, 3U, 384U, 256U), TensorShape(256U), TensorShape(13U, 13U, 256U, 8U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(13U, 13U, 256U, 8U), TensorShape(1U, 1U, 256U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U, 8U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(13U, 13U, 384U, 8U), TensorShape(1U, 1U, 384U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U, 8U), PadStrideInfo(1, 1, 0, 0)); add_config(TensorShape(224U, 224U, 3U, 8U), TensorShape(7U, 7U, 3U, 64U), TensorShape(64U), TensorShape(112U, 112U, 64U, 8U), PadStrideInfo(2, 2, 3, 3)); add_config(TensorShape(28U, 28U, 256U, 8U), TensorShape(1U, 1U, 256U, 64U), TensorShape(64U), TensorShape(28U, 28U, 64U, 8U), PadStrideInfo(1, 1, 0, 0)); // Arbitrary batch size add_config(TensorShape(227U, 227U, 3U, 5U), TensorShape(11U, 11U, 3U, 96U), TensorShape(96U), TensorShape(55U, 55U, 96U, 5U), PadStrideInfo(4, 4, 0, 0)); } }; + +class LargeGroupedConvolutionLayerDataset final : public ConvolutionLayerDataset +{ +public: + LargeGroupedConvolutionLayerDataset() + { + // Batch size 1 + add_config(TensorShape(227U, 227U, 4U), TensorShape(11U, 11U, 2U, 96U), TensorShape(96U), TensorShape(55U, 55U, 96U), PadStrideInfo(4, 4, 0, 0)); + add_config(TensorShape(27U, 27U, 96U), TensorShape(5U, 5U, 24U, 256U), TensorShape(256U), TensorShape(27U, 27U, 256U), PadStrideInfo(1, 1, 2, 2)); + add_config(TensorShape(13U, 13U, 256U), TensorShape(1U, 1U, 128U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(13U, 13U, 384U), TensorShape(3U, 3U, 128U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U), PadStrideInfo(1, 1, 1, 1)); + // Batch size 4 + add_config(TensorShape(227U, 227U, 4U, 4U), TensorShape(11U, 11U, 2U, 96U), TensorShape(96U), TensorShape(55U, 55U, 96U, 4U), PadStrideInfo(4, 4, 0, 0)); + add_config(TensorShape(27U, 27U, 96U, 4U), TensorShape(5U, 5U, 24U, 256U), TensorShape(256U), TensorShape(27U, 27U, 256U, 4U), PadStrideInfo(1, 1, 2, 2)); + add_config(TensorShape(13U, 13U, 256U, 4U), TensorShape(3U, 3U, 128U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U, 4U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(13U, 13U, 384U, 4U), TensorShape(3U, 3U, 128U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U, 4U), PadStrideInfo(1, 1, 1, 1)); + // Batch size 8 + add_config(TensorShape(227U, 227U, 4U, 8U), TensorShape(11U, 11U, 2U, 96U), TensorShape(96U), TensorShape(55U, 55U, 96U, 8U), PadStrideInfo(4, 4, 0, 0)); + add_config(TensorShape(27U, 27U, 96U, 8U), TensorShape(5U, 5U, 24U, 256U), TensorShape(256U), TensorShape(27U, 27U, 256U, 8U), PadStrideInfo(1, 1, 2, 2)); + add_config(TensorShape(13U, 13U, 256U, 8U), TensorShape(3U, 3U, 128U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U, 8U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(13U, 13U, 384U, 8U), TensorShape(3U, 3U, 128U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U, 8U), PadStrideInfo(1, 1, 1, 1)); + } +}; } // namespace datasets } // namespace test } // namespace arm_compute diff --git a/tests/datasets/SmallConvolutionLayerDataset.h b/tests/datasets/SmallConvolutionLayerDataset.h index ae12dd4b16..a288d07902 100644 --- a/tests/datasets/SmallConvolutionLayerDataset.h +++ b/tests/datasets/SmallConvolutionLayerDataset.h @@ -146,6 +146,41 @@ public: add_config(TensorShape(33U, 27U, 7U, 5U), TensorShape(5U, 7U, 7U, 16U), TensorShape(16U), TensorShape(10U, 11U, 16U, 5U), PadStrideInfo(3, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)); } }; + +class SmallGroupedConvolutionLayerDataset final : public ConvolutionLayerDataset +{ +public: + SmallGroupedConvolutionLayerDataset() + { + // Batch size 1 + // Number of groups = 2 + add_config(TensorShape(23U, 27U, 8U), TensorShape(1U, 1U, 4U, 24U), TensorShape(24U), TensorShape(12U, 27U, 24U), PadStrideInfo(2, 1, 0, 0)); + add_config(TensorShape(33U, 27U, 12U), TensorShape(5U, 5U, 6U, 16U), TensorShape(16U), TensorShape(11U, 12U, 16U), PadStrideInfo(3, 2, 1, 0)); + // Number of groups = 4 + add_config(TensorShape(23U, 27U, 8U), TensorShape(1U, 1U, 2U, 24U), TensorShape(24U), TensorShape(12U, 27U, 24U), PadStrideInfo(2, 1, 0, 0)); + add_config(TensorShape(33U, 27U, 12U), TensorShape(5U, 5U, 4U, 15U), TensorShape(15U), TensorShape(11U, 12U, 15U), PadStrideInfo(3, 2, 1, 0)); + + // Batch size 4 + // Number of groups = 2 + add_config(TensorShape(23U, 27U, 8U, 4U), TensorShape(1U, 1U, 4U, 24U), TensorShape(24U), TensorShape(12U, 27U, 24U, 4U), PadStrideInfo(2, 1, 0, 0)); + add_config(TensorShape(33U, 27U, 12U, 4U), TensorShape(5U, 5U, 6U, 16U), TensorShape(16U), TensorShape(11U, 12U, 16U, 4U), PadStrideInfo(3, 2, 1, 0)); + // Number of groups = 4 + add_config(TensorShape(23U, 27U, 8U, 4U), TensorShape(1U, 1U, 2U, 24U), TensorShape(24U), TensorShape(12U, 27U, 24U, 4U), PadStrideInfo(2, 1, 0, 0)); + add_config(TensorShape(33U, 27U, 12U, 4U), TensorShape(5U, 5U, 4U, 15U), TensorShape(15U), TensorShape(11U, 12U, 15U, 4U), PadStrideInfo(3, 2, 1, 0)); + + // Arbitrary batch size + add_config(TensorShape(23U, 27U, 8U, 5U), TensorShape(1U, 1U, 4U, 24U), TensorShape(24U), TensorShape(12U, 27U, 24U, 5U), PadStrideInfo(2, 1, 0, 0)); + add_config(TensorShape(33U, 27U, 12U, 3U), TensorShape(5U, 5U, 6U, 16U), TensorShape(16U), TensorShape(11U, 12U, 16U, 3U), PadStrideInfo(3, 2, 1, 0)); + // Number of groups = 4 + add_config(TensorShape(23U, 27U, 8U, 2U), TensorShape(1U, 1U, 2U, 24U), TensorShape(24U), TensorShape(12U, 27U, 24U, 2U), PadStrideInfo(2, 1, 0, 0)); + add_config(TensorShape(33U, 27U, 12U, 5U), TensorShape(5U, 5U, 4U, 15U), TensorShape(15U), TensorShape(11U, 12U, 15U, 5U), PadStrideInfo(3, 2, 1, 0)); + + // Asymmetric padding + add_config(TensorShape(33U, 27U, 8U, 5U), TensorShape(5U, 7U, 2U, 16U), TensorShape(16U), TensorShape(11U, 12U, 16U, 5U), PadStrideInfo(3, 2, 1, 1, 2, 0, DimensionRoundingType::FLOOR)); + add_config(TensorShape(33U, 27U, 8U, 5U), TensorShape(5U, 7U, 4U, 16U), TensorShape(16U), TensorShape(11U, 12U, 16U, 5U), PadStrideInfo(3, 2, 1, 1, 0, 2, DimensionRoundingType::FLOOR)); + add_config(TensorShape(33U, 27U, 6U, 5U), TensorShape(5U, 7U, 3U, 16U), TensorShape(16U), TensorShape(11U, 12U, 16U, 5U), PadStrideInfo(3, 2, 2, 1, 2, 0, DimensionRoundingType::FLOOR)); + } +}; } // namespace datasets } // namespace test } // namespace arm_compute diff --git a/tests/validation/CL/ConvolutionLayer.cpp b/tests/validation/CL/ConvolutionLayer.cpp index 54fdc0c386..5c96cd4c59 100644 --- a/tests/validation/CL/ConvolutionLayer.cpp +++ b/tests/validation/CL/ConvolutionLayer.cpp @@ -58,6 +58,14 @@ const auto CNNDataTypes = framework::dataset::make("DataType", DataType::F32, DataType::QASYMM8, }); + +/** Grouped CNN data types */ +const auto GroupedCNNDataTypes = framework::dataset::make("DataType", +{ + DataType::F16, + DataType::F32 +}); + const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo", { ActivationLayerInfo(), @@ -219,7 +227,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMConvolutionLayerFixture, framework: // Validate output validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); } -TEST_SUITE_END() +TEST_SUITE_END() // FP16 TEST_SUITE(FP32) @@ -244,8 +252,8 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMConvolutionLayerFixture, framework // Validate output validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, absolute_tolerance_float); } -TEST_SUITE_END() -TEST_SUITE_END() +TEST_SUITE_END() // FP32 +TEST_SUITE_END() // Float template using CLGEMMConvolutionLayerQuantizedFixture = ConvolutionValidationQuantizedFixture; @@ -280,11 +288,106 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMConvolutionLayerQuantizedFixture // Validate output validate(CLAccessor(_target), _reference, tolerance_qasymm8); } -TEST_SUITE_END() -TEST_SUITE_END() +TEST_SUITE_END() // QASYMM8 +TEST_SUITE_END() // Quantized -TEST_SUITE_END() -TEST_SUITE_END() +TEST_SUITE_END() // GEMMConvolutionLayer + +template +using CLGEMMGroupedConvolutionLayerFixture = ConvolutionValidationFixture; + +TEST_SUITE(GroupedGEMMConvolutionLayer) + +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallGroupedConvolutionLayerDataset(), datasets::LargeGroupedConvolutionLayerDataset()), + GroupedCNNDataTypes), + ActivationFunctionsDataset), + input_shape, weights_shape, bias_shape, output_shape, info, dilation, data_type, act_info) +{ + ARM_COMPUTE_ERROR_ON((input_shape[2] % weights_shape[2]) != 0); + + // The number of groups is calculated dividing the number of input channels of the input tensor by the number of input channels of the weights shape + const int num_groups = input_shape[2] / weights_shape[2]; + + // Create tensors + CLTensor src = create_tensor(input_shape, data_type); + CLTensor weights = create_tensor(weights_shape, data_type, 1); + CLTensor bias = create_tensor(bias_shape, data_type, 1); + CLTensor dst = create_tensor(output_shape, data_type, 1); + + ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Create and configure function + CLGEMMConvolutionLayer conv; + conv.configure(&src, &weights, &bias, &dst, info, WeightsInfo(), dilation, act_info, num_groups); + + // Validate valid region + const ValidRegion src_valid_region = shape_to_valid_region(input_shape); + const ValidRegion weights_valid_region = shape_to_valid_region(weights_shape); + const ValidRegion bias_valid_region = shape_to_valid_region(bias_shape); + const ValidRegion dst_valid_region = shape_to_valid_region(output_shape); + + validate(src.info()->valid_region(), src_valid_region); + validate(weights.info()->valid_region(), weights_valid_region); + validate(bias.info()->valid_region(), bias_valid_region); + validate(dst.info()->valid_region(), dst_valid_region); + + // Validate padding + //TODO(COMPMID-415) Need to validate padding? +} + +TEST_SUITE(Float) +TEST_SUITE(FP32) + +FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMGroupedConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallGroupedConvolutionLayerDataset(), + framework::dataset::make("ReshapeWeights", { true })), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NCHW })), + ActivationFunctionsDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32, tolerance_num); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMGroupedConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeGroupedConvolutionLayerDataset(), + framework::dataset::make("ReshapeWeights", { true })), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NCHW })), + ActivationFunctionsDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32, tolerance_num); +} +TEST_SUITE_END() // FP32 + +TEST_SUITE(FP16) + +FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMGroupedConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallGroupedConvolutionLayerDataset(), + framework::dataset::make("ReshapeWeights", { true })), + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", { DataLayout::NCHW })), + ActivationFunctionsDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32, tolerance_num); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMGroupedConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeGroupedConvolutionLayerDataset(), + framework::dataset::make("ReshapeWeights", { true })), + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", { DataLayout::NCHW })), + ActivationFunctionsDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32, tolerance_num); +} +TEST_SUITE_END() // FP16 +TEST_SUITE_END() // Float + +TEST_SUITE_END() // GroupedGEMMConvolutionLayer +TEST_SUITE_END() // CL } // namespace validation } // namespace test } // namespace arm_compute diff --git a/tests/validation/CL/WeightsReshape.cpp b/tests/validation/CL/WeightsReshape.cpp index 6dae0c7625..30c231d499 100644 --- a/tests/validation/CL/WeightsReshape.cpp +++ b/tests/validation/CL/WeightsReshape.cpp @@ -79,7 +79,7 @@ TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, CLWeightsReshapeFixture, framework::DatasetMode::ALL, combine(combine(combine(datasets::GroupedWeightsSmallShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("HasBias", { true, false })), - framework::dataset::make("NumGroups", { 1, 2, 3 }))) + framework::dataset::make("NumGroups", { 1, 2, 3, 4 }))) { // Validate output validate(CLAccessor(_target), _reference); @@ -87,7 +87,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWeightsReshapeFixture, framework::Data FIXTURE_DATA_TEST_CASE(RunLarge, CLWeightsReshapeFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::GroupedWeightsLargeShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("HasBias", { true, false })), - framework::dataset::make("NumGroups", { 1, 2, 3 }))) + framework::dataset::make("NumGroups", { 1, 2, 3, 4 }))) { // Validate output validate(CLAccessor(_target), _reference); diff --git a/tests/validation/fixtures/ConvolutionLayerFixture.h b/tests/validation/fixtures/ConvolutionLayerFixture.h index 4a6326480c..3b420eac09 100644 --- a/tests/validation/fixtures/ConvolutionLayerFixture.h +++ b/tests/validation/fixtures/ConvolutionLayerFixture.h @@ -102,6 +102,10 @@ protected: TensorType compute_target(TensorShape input_shape, TensorShape weights_shape, const TensorShape &bias_shape, TensorShape output_shape, const PadStrideInfo &info, bool reshape_weights, const Size2D &dilation, const ActivationLayerInfo act_info) { + ARM_COMPUTE_ERROR_ON((input_shape[2] % weights_shape[2]) != 0); + + const unsigned int num_groups = input_shape[2] / weights_shape[2]; + if(_data_layout == DataLayout::NHWC) { permute(input_shape, PermutationVector(2U, 0U, 1U)); @@ -123,7 +127,7 @@ protected: // Create and configure function FunctionType conv; - conv.configure(&src, &weights, &bias, &dst, info, weights_info, dilation, act_info); + conv.configure(&src, &weights, &bias, &dst, info, weights_info, dilation, act_info, num_groups); ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS); @@ -155,6 +159,10 @@ protected: SimpleTensor compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info, const Size2D &dilation, const ActivationLayerInfo act_info) { + ARM_COMPUTE_ERROR_ON((input_shape[2] % weights_shape[2]) != 0); + + const unsigned int num_groups = input_shape[2] / weights_shape[2]; + // Create reference SimpleTensor src{ input_shape, _data_type, 1, _quantization_info }; SimpleTensor weights{ weights_shape, _data_type, 1, _quantization_info }; @@ -165,9 +173,9 @@ protected: fill(weights, 1); fill(bias, 2); - return (act_info.enabled()) ? reference::activation_layer(reference::convolution_layer(src, weights, bias, output_shape, info, dilation), + return (act_info.enabled()) ? reference::activation_layer(reference::convolution_layer(src, weights, bias, output_shape, info, dilation, num_groups), act_info) : - reference::convolution_layer(src, weights, bias, output_shape, info, dilation); + reference::convolution_layer(src, weights, bias, output_shape, info, dilation, num_groups); } TensorType _target{}; @@ -187,7 +195,8 @@ public: void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, bool reshape_weights, DataType data_type, DataLayout data_layout, ActivationLayerInfo act_info) { - ConvolutionValidationGenericFixture::setup(input_shape, weights_shape, bias_shape, output_shape, info, dilation, reshape_weights, data_type, data_layout, + ConvolutionValidationGenericFixture::setup(input_shape, weights_shape, bias_shape, output_shape, info, dilation, reshape_weights, + data_type, data_layout, QuantizationInfo(), act_info); } }; diff --git a/tests/validation/reference/ConvolutionLayer.cpp b/tests/validation/reference/ConvolutionLayer.cpp index 2d314059dd..f41a6fc8c4 100644 --- a/tests/validation/reference/ConvolutionLayer.cpp +++ b/tests/validation/reference/ConvolutionLayer.cpp @@ -47,8 +47,10 @@ namespace template SimpleTensor convolution_layer_nchw(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, SimpleTensor &dst, const PadStrideInfo &info, - const Size2D &dilation) + const Size2D &dilation, unsigned int num_groups) { + ARM_COMPUTE_ERROR_ON((src.shape()[2] / num_groups) != weights.shape()[2]); + // Compute reference const int width_in = src.shape().x(); const int height_in = src.shape().y(); @@ -78,23 +80,28 @@ SimpleTensor convolution_layer_nchw(const SimpleTensor &src, const SimpleT { for(int xi = start_xi; xi < start_xi + end_xi; xi += stride_xi) { - for(int ofm = 0; ofm < depth_out; ++ofm) + for(int group = 0; group < static_cast(num_groups); ++group) { - // Compute input and output offsets - const int offset_in = r * width_in * height_in * depth_in; - const int xo = (xi - start_xi) / stride_xi; - const int yo = (yi - start_yi) / stride_yi; - const int offset_out = xo + yo * width_out + ofm * width_out * height_out + r * width_out * height_out * depth_out; + for(int ofm = 0; ofm < static_cast(depth_out / num_groups); ++ofm) + { + // Compute input and output offsets + const int offset_in = r * width_in * height_in * depth_in + (group * (depth_in / num_groups) * width_in * height_in); + const int xo = (xi - start_xi) / stride_xi; + const int yo = (yi - start_yi) / stride_yi; + const int offset_out = xo + yo * width_out + ((ofm + group * (depth_out / num_groups)) * width_out * height_out) + (r * width_out * height_out * depth_out); + const int offset_w = (ofm + group * (depth_out / num_groups)) * width_weights * height_weights * depth_weights; + const int offset_b = (ofm + group * (depth_out / num_groups)); - ARM_COMPUTE_ASSERT(xo < width_out); - ARM_COMPUTE_ASSERT(yo < height_out); + ARM_COMPUTE_ASSERT(xo < width_out); + ARM_COMPUTE_ASSERT(yo < height_out); - // Compute 3D convolution - convolution_3d::detail::convolution3d(src, weights, bias, dst, - offset_in, ofm * width_weights * height_weights * depth_weights, ofm, offset_out, - xi, yi, - width_in, height_in, depth_in, - width_weights, height_weights, dilation.x(), dilation.y()); + // Compute 3D convolution + convolution_3d::detail::convolution3d(src, weights, bias, dst, + offset_in, offset_w, offset_b, offset_out, + xi, yi, + width_in, height_in, (depth_in / num_groups), + width_weights, height_weights, dilation.x(), dilation.y()); + } } } } @@ -104,7 +111,7 @@ SimpleTensor convolution_layer_nchw(const SimpleTensor &src, const SimpleT } template SimpleTensor convolution_layer(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, const TensorShape &output_shape, const PadStrideInfo &info, - const Size2D &dilation) + const Size2D &dilation, unsigned int num_groups) { // Create reference SimpleTensor dst{ output_shape, src.data_type(), 1, src.quantization_info() }; @@ -115,20 +122,20 @@ SimpleTensor convolution_layer(const SimpleTensor &src, const SimpleTensor SimpleTensor weights_nchw = reference::permute(weights, PermutationVector(1U, 2U, 0U)); SimpleTensor dst_nchw = reference::permute(dst, PermutationVector(1U, 2U, 0U)); - return reference::permute(convolution_layer_nchw(src_nchw, weights_nchw, bias, dst_nchw, info, dilation), PermutationVector(2U, 0U, 1U)); + return reference::permute(convolution_layer_nchw(src_nchw, weights_nchw, bias, dst_nchw, info, dilation, num_groups), PermutationVector(2U, 0U, 1U)); } else { - return convolution_layer_nchw(src, weights, bias, dst, info, dilation); + return convolution_layer_nchw(src, weights, bias, dst, info, dilation, num_groups); } } template SimpleTensor convolution_layer(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, const TensorShape &output_shape, - const PadStrideInfo &info, const Size2D &dilation); + const PadStrideInfo &info, const Size2D &dilation, unsigned int num_groups); template SimpleTensor convolution_layer(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, const TensorShape &output_shape, - const PadStrideInfo &info, const Size2D &dilation); + const PadStrideInfo &info, const Size2D &dilation, unsigned int num_groups); template SimpleTensor convolution_layer(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, const TensorShape &output_shape, - const PadStrideInfo &info, const Size2D &dilation); + const PadStrideInfo &info, const Size2D &dilation, unsigned int num_groups); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/ConvolutionLayer.h b/tests/validation/reference/ConvolutionLayer.h index ff3b1531f4..ccce53a209 100644 --- a/tests/validation/reference/ConvolutionLayer.h +++ b/tests/validation/reference/ConvolutionLayer.h @@ -37,7 +37,7 @@ namespace reference { template SimpleTensor convolution_layer(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, const TensorShape &output_shape, const PadStrideInfo &info, - const Size2D &dilation = Size2D(1U, 1U)); + const Size2D &dilation = Size2D(1U, 1U), unsigned int num_groups = 1); } // namespace reference } // namespace validation } // namespace test -- cgit v1.2.1