From 5e281814c5110724d99fe8ee64bdf42ef2c31bce Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Tue, 6 Jul 2021 13:19:41 +0100 Subject: Fix manual LOOP_UNROLLING The issue is caused by the number of iterations passed to LOOP_UNROLLING. When we use the manual LOOP_UNROLLING, the number of iterations must be less than or equal to 128. To overcome this problem, we create a utility function to check if any of the critical iterations (kernel dimensions) are beyond that limit. If so, the utility function, disable the manual loop unrolling. Resolves COMPMID-4609 Change-Id: I7221c967609e462a5abd1cbb74e2a120f344fcb3 Signed-off-by: Gian Marco Iodice Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5913 Reviewed-by: Georgios Pinitas Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins --- tests/datasets/DepthwiseConvolutionLayerDataset.h | 10 +++++++++ tests/validation/CL/DepthwiseConvolutionLayer.cpp | 13 +++++++++++ .../fixtures/DepthwiseConvolutionLayerFixture.h | 26 ++++++++++++---------- 3 files changed, 37 insertions(+), 12 deletions(-) (limited to 'tests') diff --git a/tests/datasets/DepthwiseConvolutionLayerDataset.h b/tests/datasets/DepthwiseConvolutionLayerDataset.h index 3b17910eac..82ea40ff52 100644 --- a/tests/datasets/DepthwiseConvolutionLayerDataset.h +++ b/tests/datasets/DepthwiseConvolutionLayerDataset.h @@ -155,6 +155,16 @@ public: } }; +/** Dataset containing large kernel size for generic depthwise convolution. */ +class LargeKernelSizeDepthwiseConvolutionLayerNHWCDataset final : public DepthwiseConvolutionLayerDataset +{ +public: + LargeKernelSizeDepthwiseConvolutionLayerNHWCDataset() + { + add_config(TensorShape(6U, 210U, 8U), Size2D(4U, 194U), PadStrideInfo(1, 1, 0, 0)); + } +}; + /** Dataset containing small, 3x3 depthwise convolution shapes. */ class SmallDepthwiseConvolutionLayerDataset3x3 final : public DepthwiseConvolutionLayerDataset { diff --git a/tests/validation/CL/DepthwiseConvolutionLayer.cpp b/tests/validation/CL/DepthwiseConvolutionLayer.cpp index 79a2678b44..b2cff2b792 100644 --- a/tests/validation/CL/DepthwiseConvolutionLayer.cpp +++ b/tests/validation/CL/DepthwiseConvolutionLayer.cpp @@ -382,6 +382,7 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture, { validate(CLAccessor(_target), _reference, tolerance_f32); } + TEST_SUITE(Dilation) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::ALL, @@ -418,6 +419,7 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture, { validate(CLAccessor(_target), _reference, tolerance_f32); } + FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(), large_depth_multipliers), framework::dataset::make("DataType", @@ -428,6 +430,17 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture, validate(CLAccessor(_target), _reference, tolerance_f32); } +FIXTURE_DATA_TEST_CASE_NEW(RunLargeKernelSize, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::ALL, + combine(combine(combine(combine(datasets::LargeKernelSizeDepthwiseConvolutionLayerNHWCDataset(), + framework::dataset::make("DepthMultiplier", { 1 })), + framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NHWC })), + ActivationFunctionsDataset)) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} + TEST_SUITE(Dilation) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(), depth_multipliers), diff --git a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h index 0e02ae28ca..ddbab7fe13 100644 --- a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h +++ b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h @@ -113,6 +113,13 @@ public: target_to_use = &_target; } + add_padding_x({ &_src, &_biases }, _data_layout); + add_padding_x({ &_weights }, _data_layout, true); + if(!_in_place) + { + add_padding_x({ &_target }, _data_layout); + } + // Create Depthwise Convolution configure function _dwc.configure(&_src, &_weights, &_biases, target_to_use, _pad_stride_info, _depth_multiplier, _act_info, _dilation); @@ -124,12 +131,6 @@ public: void allocate_and_run_target() { - add_padding_x({ &_src, &_weights, &_biases }, _data_layout); - if(!_in_place) - { - add_padding_x({ &_target }, _data_layout); - } - // Allocate tensors _src.allocator()->allocate(); _weights.allocator()->allocate(); @@ -317,6 +318,10 @@ public: _biases = create_tensor(_biases_shape, _data_type, 1, QuantizationInfo(), _data_layout); _target = create_tensor(TensorShape(), _data_type, 1, QuantizationInfo(), _data_layout); + add_padding_x({ &_src, &_biases, &_target }, _data_layout); + add_padding_x({ &_weights }, _data_layout, true); + add_padding_y({ &_src, &_target }, _data_layout); + // Create Depthwise Convolution configure function const ConvolutionInfo info { @@ -332,9 +337,6 @@ public: void allocate_and_run_target() { - add_padding_x({ &_src, &_weights, &_biases, &_target }, _data_layout); - add_padding_y({ &_src, &_target }, _data_layout); - // Allocate tensors _src.allocator()->allocate(); _weights.allocator()->allocate(); @@ -482,6 +484,9 @@ public: _conv_info, _depth_multiplier, _act_info, _dilation }; + add_padding_x({ &_src, &_biases, &_target }, _data_layout); + add_padding_x({ &_weights }, _data_layout, _export_to_cl_image); // Don't add left padding if cl image will be used + // Create Depthwise Convolution configure function _dwc.configure(&_src, &_weights, &_biases, target_to_use, dwc_info, conv_kernel_info); @@ -493,9 +498,6 @@ public: void allocate_and_run_target() { - add_padding_x({ &_src, &_biases, &_target }, _data_layout); - add_padding_x({ &_weights }, _data_layout, _export_to_cl_image); // Don't add left padding if cl image will be used - // Allocate tensors _src.allocator()->allocate(); _weights.allocator()->allocate(); -- cgit v1.2.1