From 5e281814c5110724d99fe8ee64bdf42ef2c31bce Mon Sep 17 00:00:00 2001
From: Gian Marco Iodice <gianmarco.iodice@arm.com>
Date: Tue, 6 Jul 2021 13:19:41 +0100
Subject: Fix manual LOOP_UNROLLING

The issue is caused by the number of iterations passed to
LOOP_UNROLLING. When we use the manual LOOP_UNROLLING, the number of
iterations must be less than or equal to 128.
To overcome this problem, we create a utility function to check if
any of the critical iterations (kernel dimensions) are beyond that
limit. If so, the utility function, disable the manual loop unrolling.

Resolves COMPMID-4609

Change-Id: I7221c967609e462a5abd1cbb74e2a120f344fcb3
Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5913
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
---
 tests/datasets/DepthwiseConvolutionLayerDataset.h  | 10 +++++++++
 tests/validation/CL/DepthwiseConvolutionLayer.cpp  | 13 +++++++++++
 .../fixtures/DepthwiseConvolutionLayerFixture.h    | 26 ++++++++++++----------
 3 files changed, 37 insertions(+), 12 deletions(-)

(limited to 'tests')
diff --git a/tests/datasets/DepthwiseConvolutionLayerDataset.h b/tests/datasets/DepthwiseConvolutionLayerDataset.h
index 3b17910eac..82ea40ff52 100644
--- a/tests/datasets/DepthwiseConvolutionLayerDataset.h
+++ b/tests/datasets/DepthwiseConvolutionLayerDataset.h
@@ -155,6 +155,16 @@ public:
     }
 };
 
+/** Dataset containing large kernel size for generic depthwise convolution. */
+class LargeKernelSizeDepthwiseConvolutionLayerNHWCDataset final : public DepthwiseConvolutionLayerDataset
+{
+public:
+    LargeKernelSizeDepthwiseConvolutionLayerNHWCDataset()
+    {
+        add_config(TensorShape(6U, 210U, 8U), Size2D(4U, 194U), PadStrideInfo(1, 1, 0, 0));
+    }
+};
+
 /** Dataset containing small, 3x3 depthwise convolution shapes. */
 class SmallDepthwiseConvolutionLayerDataset3x3 final : public DepthwiseConvolutionLayerDataset
 {
diff --git a/tests/validation/CL/DepthwiseConvolutionLayer.cpp b/tests/validation/CL/DepthwiseConvolutionLayer.cpp
index 79a2678b44..b2cff2b792 100644
--- a/tests/validation/CL/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/CL/DepthwiseConvolutionLayer.cpp
@@ -382,6 +382,7 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<float>,
 {
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
+
 TEST_SUITE(Dilation)
 
 FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL,
@@ -418,6 +419,7 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<float>,
 {
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
+
 FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
                            large_depth_multipliers),
                            framework::dataset::make("DataType",
@@ -428,6 +430,17 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<float>,
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
 
+FIXTURE_DATA_TEST_CASE_NEW(RunLargeKernelSize, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL,
+                           combine(combine(combine(combine(datasets::LargeKernelSizeDepthwiseConvolutionLayerNHWCDataset(),
+                                                           framework::dataset::make("DepthMultiplier", { 1 })),
+                                                   framework::dataset::make("DataType",
+                                                                            DataType::F32)),
+                                           framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+                                   ActivationFunctionsDataset))
+{
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
 TEST_SUITE(Dilation)
 FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
                                                                                                                      depth_multipliers),
diff --git a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
index 0e02ae28ca..ddbab7fe13 100644
--- a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
+++ b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
@@ -113,6 +113,13 @@ public:
             target_to_use = &_target;
         }
 
+        add_padding_x({ &_src, &_biases }, _data_layout);
+        add_padding_x({ &_weights }, _data_layout, true);
+        if(!_in_place)
+        {
+            add_padding_x({ &_target }, _data_layout);
+        }
+
         // Create Depthwise Convolution configure function
         _dwc.configure(&_src, &_weights, &_biases, target_to_use, _pad_stride_info, _depth_multiplier, _act_info, _dilation);
 
@@ -124,12 +131,6 @@ public:
 
     void allocate_and_run_target()
     {
-        add_padding_x({ &_src, &_weights, &_biases }, _data_layout);
-        if(!_in_place)
-        {
-            add_padding_x({ &_target }, _data_layout);
-        }
-
         // Allocate tensors
         _src.allocator()->allocate();
         _weights.allocator()->allocate();
@@ -317,6 +318,10 @@ public:
         _biases  = create_tensor<TensorType>(_biases_shape, _data_type, 1, QuantizationInfo(), _data_layout);
         _target  = create_tensor<TensorType>(TensorShape(), _data_type, 1, QuantizationInfo(), _data_layout);
 
+        add_padding_x({ &_src, &_biases, &_target }, _data_layout);
+        add_padding_x({ &_weights }, _data_layout, true);
+        add_padding_y({ &_src, &_target }, _data_layout);
+
         // Create Depthwise Convolution configure function
         const ConvolutionInfo info
         {
@@ -332,9 +337,6 @@ public:
 
     void allocate_and_run_target()
     {
-        add_padding_x({ &_src, &_weights, &_biases, &_target }, _data_layout);
-        add_padding_y({ &_src, &_target }, _data_layout);
-
         // Allocate tensors
         _src.allocator()->allocate();
         _weights.allocator()->allocate();
@@ -482,6 +484,9 @@ public:
             _conv_info, _depth_multiplier, _act_info, _dilation
         };
 
+        add_padding_x({ &_src, &_biases, &_target }, _data_layout);
+        add_padding_x({ &_weights }, _data_layout, _export_to_cl_image); // Don't add left padding if cl image will be used
+
         // Create Depthwise Convolution configure function
         _dwc.configure(&_src, &_weights, &_biases, target_to_use, dwc_info, conv_kernel_info);
 
@@ -493,9 +498,6 @@ public:
 
     void allocate_and_run_target()
     {
-        add_padding_x({ &_src, &_biases, &_target }, _data_layout);
-        add_padding_x({ &_weights }, _data_layout, _export_to_cl_image); // Don't add left padding if cl image will be used
-
         // Allocate tensors
         _src.allocator()->allocate();
         _weights.allocator()->allocate();
-- 
cgit v1.2.1