From bc4e31113be0af320f44b338969d6972b64ca4de Mon Sep 17 00:00:00 2001
From: SiCongLi <sicong.li@arm.com>
Date: Tue, 29 Jun 2021 13:18:30 +0100
Subject: Implement FP GPU depthwise convolution 1x1 kernel for in-place
 computation

* Implement in-place graph node mutator for 1x1 depthwise convolution

* Add in-place to validation fixture except for
  DepthwiseConvolutionLayerNativeValidationFixture as it would be a
  duplicate test otherwise (DepthwiseConvolutionLayerNative test tests
  the underlying kernel)

Resolves: COMPMID-4432

Change-Id: Id7f10f5ebdce7d49f550c0b62dbaaab7f5b59d29
Signed-off-by: SiCongLi <sicong.li@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5874
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
---
 tests/datasets/DepthwiseConvolutionLayerDataset.h  | 19 +++++
 tests/validation/CL/DepthwiseConvolutionLayer.cpp  | 30 +++++++-
 .../fixtures/DepthwiseConvolutionLayerFixture.h    | 83 +++++++++++++++-------
 3 files changed, 106 insertions(+), 26 deletions(-)

(limited to 'tests')
diff --git a/tests/datasets/DepthwiseConvolutionLayerDataset.h b/tests/datasets/DepthwiseConvolutionLayerDataset.h
index a19e7ee8cf..53b5248374 100644
--- a/tests/datasets/DepthwiseConvolutionLayerDataset.h
+++ b/tests/datasets/DepthwiseConvolutionLayerDataset.h
@@ -262,6 +262,25 @@ public:
         add_config(TensorShape(9U, 9U, 32U), Size2D(5U, 5U), PadStrideInfo(2, 2, 4, 4, 4, 4, DimensionRoundingType::CEIL), Size2D(2U, 2U));
     }
 };
+
+/** Dataset containing in-place 1x1 depthwise convolution shapes.
+ *
+ * For a depthwise convolution op to be in-place:
+ * * Output has the same shape as the input;
+ *      * 1x1 filter
+ *      * stride == 1
+ *      * dilations == 1
+ *      * No paddings
+*/
+class SmallInPlaceDepthwiseConvolutionLayerDataset final : public DepthwiseConvolutionLayerDataset
+{
+public:
+    SmallInPlaceDepthwiseConvolutionLayerDataset()
+    {
+        add_config(TensorShape(7U, 7U, 1U), Size2D(1U, 1U), PadStrideInfo(1, 1, 0, 0));
+        add_config(TensorShape(11U, 13U, 16U), Size2D(1U, 1U), PadStrideInfo(1, 1, 0, 0));
+    }
+};
 } // namespace datasets
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/validation/CL/DepthwiseConvolutionLayer.cpp b/tests/validation/CL/DepthwiseConvolutionLayer.cpp
index c88f7c1624..22922f41a2 100644
--- a/tests/validation/CL/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/CL/DepthwiseConvolutionLayer.cpp
@@ -156,6 +156,8 @@ template <typename T>
 using CLDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerValidationFixture<CLTensor, CLAccessor, CLDepthwiseConvolutionLayer, T>;
 template <typename T>
 using CLDepthwiseConvolutionLayerMixedDataLayoutFixture = DepthwiseConvolutionLayerValidationFixture<CLTensor, CLAccessor, CLDepthwiseConvolutionLayer, T, true>;
+template <typename T>
+using CLDepthwiseConvolutionLayerInPlaceFixture = DepthwiseConvolutionLayerValidationFixture<CLTensor, CLAccessor, CLDepthwiseConvolutionLayer, T, false, true>;
 
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
@@ -290,6 +292,19 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, f
 }
 TEST_SUITE_END() // Dilation
 TEST_SUITE_END() // Generic
+
+TEST_SUITE(InPlace)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerInPlaceFixture<half>, framework::DatasetMode::ALL,
+                           combine(combine(combine(combine(datasets::SmallInPlaceDepthwiseConvolutionLayerDataset(),
+                                                           framework::dataset::make("DepthMultiplier", { 1 })),
+                                                   framework::dataset::make("DataType",
+                                                                            DataType::F16)),
+                                           framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+                                   ActivationFunctionsDataset))
+{
+    validate(CLAccessor(_src), _reference, tolerance_f16, tolerance_num);
+}
+TEST_SUITE_END() // InPlace
 TEST_SUITE_END() // FP16
 
 TEST_SUITE(FP32)
@@ -355,7 +370,7 @@ FIXTURE_DATA_TEST_CASE_NEW(RunMixedDataLayout, CLDepthwiseConvolutionLayerMixedD
                                                    framework::dataset::make("DataType",
                                                                             DataType::F32)),
                                            framework::dataset::make("DataLayout", DataLayout::NHWC)),
-                                    framework::dataset::make("ActivationInfo", ActivationLayerInfo())))
+                                   framework::dataset::make("ActivationInfo", ActivationLayerInfo())))
 {
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
@@ -436,6 +451,19 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<float>,
 }
 TEST_SUITE_END() // Dilation
 TEST_SUITE_END() // Generic
+
+TEST_SUITE(InPlace)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerInPlaceFixture<float>, framework::DatasetMode::ALL,
+                           combine(combine(combine(combine(datasets::SmallInPlaceDepthwiseConvolutionLayerDataset(),
+                                                           framework::dataset::make("DepthMultiplier", { 1 })),
+                                                   framework::dataset::make("DataType",
+                                                                            DataType::F32)),
+                                           framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+                                   ActivationFunctionsDataset))
+{
+    validate(CLAccessor(_src), _reference, tolerance_f32);
+}
+TEST_SUITE_END() // InPlace
 TEST_SUITE_END() // FP32
 TEST_SUITE_END() // Float
 
diff --git a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
index 19ec6b2560..c255cc5c13 100644
--- a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
+++ b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
@@ -59,8 +59,9 @@ public:
     void setup(TensorShape in_shape, Size2D kernel_size, PadStrideInfo pad_stride_info, Size2D dilation,
                unsigned int depth_multiplier, DataType input_data_type, DataType weights_data_type,
                QuantizationInfo input_quantization_info, QuantizationInfo weights_quantization_info, QuantizationInfo output_quantization_info,
-               DataLayout data_layout, ActivationLayerInfo act_info, bool mixed_layout = false)
+               DataLayout data_layout, ActivationLayerInfo act_info, bool mixed_layout = false, bool in_place = false)
     {
+        ARM_COMPUTE_ERROR_ON(mixed_layout && in_place);
         _mixed_layout              = mixed_layout;
         _input_shape               = in_shape;
         _input_data_type           = input_data_type;
@@ -73,6 +74,7 @@ public:
         _act_info                  = act_info;
         _depth_multiplier          = depth_multiplier;
         _dilation                  = dilation;
+        _in_place                  = in_place;
 
         _bias_data_type = is_data_type_quantized(_input_data_type) ? DataType::S32 : _input_data_type;
 
@@ -101,13 +103,18 @@ public:
         }
 
         // Create tensors
-        _src     = create_tensor<TensorType>(input_shape, _input_data_type, 1, _input_quantization_info, _data_layout);
-        _weights = create_tensor<TensorType>(weights_shape, _weights_data_type, 1, _weights_quantization_info, _data_layout);
-        _biases  = create_tensor<TensorType>(_biases_shape, _bias_data_type, 1, _input_quantization_info, _data_layout);
-        _target  = create_tensor<TensorType>(output_shape, _input_data_type, 1, _output_quantization_info, _data_layout);
+        _src                      = create_tensor<TensorType>(input_shape, _input_data_type, 1, _input_quantization_info, _data_layout);
+        _weights                  = create_tensor<TensorType>(weights_shape, _weights_data_type, 1, _weights_quantization_info, _data_layout);
+        _biases                   = create_tensor<TensorType>(_biases_shape, _bias_data_type, 1, _input_quantization_info, _data_layout);
+        TensorType *target_to_use = nullptr;
+        if(!_in_place)
+        {
+            _target       = create_tensor<TensorType>(output_shape, _input_data_type, 1, _output_quantization_info, _data_layout);
+            target_to_use = &_target;
+        }
 
         // Create Depthwise Convolution configure function
-        _dwc.configure(&_src, &_weights, &_biases, &_target, _pad_stride_info, _depth_multiplier, _act_info, _dilation);
+        _dwc.configure(&_src, &_weights, &_biases, target_to_use, _pad_stride_info, _depth_multiplier, _act_info, _dilation);
 
         ARM_COMPUTE_ASSERT(_src.info()->is_resizable());
         ARM_COMPUTE_ASSERT(_weights.info()->is_resizable());
@@ -117,18 +124,26 @@ public:
 
     void allocate_and_run_target()
     {
-        add_padding_x({ &_src, &_weights, &_biases, &_target }, _data_layout);
+        add_padding_x({ &_src, &_weights, &_biases }, _data_layout);
+        if(!_in_place)
+        {
+            add_padding_x({ &_target }, _data_layout);
+        }
 
         // Allocate tensors
         _src.allocator()->allocate();
         _weights.allocator()->allocate();
         _biases.allocator()->allocate();
-        _target.allocator()->allocate();
 
         ARM_COMPUTE_ASSERT(!_src.info()->is_resizable());
         ARM_COMPUTE_ASSERT(!_weights.info()->is_resizable());
         ARM_COMPUTE_ASSERT(!_biases.info()->is_resizable());
-        ARM_COMPUTE_ASSERT(!_target.info()->is_resizable());
+
+        if(!_in_place)
+        {
+            _target.allocator()->allocate();
+            ARM_COMPUTE_ASSERT(!_target.info()->is_resizable());
+        }
 
         // Fill tensors
         fill(AccessorType(_src), 0);
@@ -163,6 +178,7 @@ public:
 protected:
     void mix_layout(FunctionType &layer, TensorType &src, TensorType &dst)
     {
+        ARM_COMPUTE_ERROR_ON(_in_place);
         // Test Multi DataLayout graph cases, when the data layout changes after configure
         src.info()->set_data_layout(_data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW);
         dst.info()->set_data_layout(_data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW);
@@ -240,9 +256,10 @@ protected:
     unsigned int        _depth_multiplier{};
     Size2D              _dilation{};
     bool                _mixed_layout{ false };
+    bool                _in_place{ false };
 };
 
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false>
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false, bool in_place = false>
 class DepthwiseConvolutionLayerValidationFixture : public DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>
 {
 public:
@@ -252,7 +269,7 @@ public:
     {
         DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>::setup(in_shape, kernel_size, pad_stride_info, dilation, depth_multiplier,
                                                                                                                data_type, data_type, QuantizationInfo(), QuantizationInfo(), QuantizationInfo(),
-                                                                                                               data_layout, act_info, mixed_layout);
+                                                                                                               data_layout, act_info, mixed_layout, in_place);
     }
 };
 
@@ -393,7 +410,7 @@ protected:
     unsigned int  _depth_multiplier{};
 };
 
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool in_place = false>
 class DepthwiseConvolutionLayerNativeConfigurableValidationFixture : public DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>
 {
 public:
@@ -407,6 +424,7 @@ public:
         _data_layout      = data_layout;
         _act_info         = act_info;
         _n0               = n0;
+        _in_place         = in_place;
 
         _input_shape   = TensorShape(width, height, channel, batch);
         _weights_shape = TensorShape(kernel_size.width, kernel_size.height, channel * _depth_multiplier);
@@ -434,10 +452,15 @@ public:
         }
 
         // Create tensors
-        _src     = create_tensor<TensorType>(input_shape, _data_type, 1, QuantizationInfo(), _data_layout);
-        _weights = create_tensor<TensorType>(weights_shape, _data_type, 1, QuantizationInfo(), _data_layout);
-        _biases  = create_tensor<TensorType>(_biases_shape, _data_type, 1, QuantizationInfo(), _data_layout);
-        _target  = create_tensor<TensorType>(TensorShape(), _data_type, 1, QuantizationInfo(), _data_layout);
+        _src                      = create_tensor<TensorType>(input_shape, _data_type, 1, QuantizationInfo(), _data_layout);
+        _weights                  = create_tensor<TensorType>(weights_shape, _data_type, 1, QuantizationInfo(), _data_layout);
+        _biases                   = create_tensor<TensorType>(_biases_shape, _data_type, 1, QuantizationInfo(), _data_layout);
+        TensorType *target_to_use = nullptr;
+        if(!_in_place)
+        {
+            _target       = create_tensor<TensorType>(TensorShape(), _data_type, 1, QuantizationInfo(), _data_layout);
+            target_to_use = &_target;
+        }
 
         DWCWeightsKernelInfo dwc_weights_info;
         dwc_weights_info.n0 = _n0;
@@ -446,7 +469,7 @@ public:
         dwc_info.activation_info = _act_info;
 
         // Create Depthwise Convolution configure function
-        _dwc.configure(&_src, &_weights, &_biases, &_target, dwc_weights_info, dwc_info, _conv_info, _depth_multiplier, _dilation);
+        _dwc.configure(&_src, &_weights, &_biases, target_to_use, dwc_weights_info, dwc_info, _conv_info, _depth_multiplier, _dilation);
 
         ARM_COMPUTE_ASSERT(_src.info()->is_resizable());
         ARM_COMPUTE_ASSERT(_weights.info()->is_resizable());
@@ -462,12 +485,15 @@ public:
         _src.allocator()->allocate();
         _weights.allocator()->allocate();
         _biases.allocator()->allocate();
-        _target.allocator()->allocate();
 
         ARM_COMPUTE_ASSERT(!_src.info()->is_resizable());
         ARM_COMPUTE_ASSERT(!_weights.info()->is_resizable());
         ARM_COMPUTE_ASSERT(!_biases.info()->is_resizable());
-        ARM_COMPUTE_ASSERT(!_target.info()->is_resizable());
+        if(!_in_place)
+        {
+            _target.allocator()->allocate();
+            ARM_COMPUTE_ASSERT(!_target.info()->is_resizable());
+        }
 
         // Fill tensors
         fill(AccessorType(_src), 0);
@@ -476,13 +502,19 @@ public:
 
         // Test Multi DataLayout graph cases, when the data layout changes after configure
         _src.info()->set_data_layout(_data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW);
-        _target.info()->set_data_layout(_data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW);
+        if(!_in_place)
+        {
+            _target.info()->set_data_layout(_data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW);
+        }
 
         // Compute function
         _dwc.run();
 
         // Reinstating original data layout for the test suite to properly check the values
-        _target.info()->set_data_layout(_data_layout);
+        if(!_in_place)
+        {
+            _target.info()->set_data_layout(_data_layout);
+        }
     }
 
     void compute_reference()
@@ -541,9 +573,10 @@ protected:
     Size2D              _dilation{};
     unsigned int        _depth_multiplier{};
     unsigned int        _n0{};
+    bool                _in_place{ false };
 };
 
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false>
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false, bool in_place = false>
 class DepthwiseConvolutionLayerValidationQuantizedFixture : public DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>
 {
 public:
@@ -553,11 +586,11 @@ public:
     {
         DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>::setup(in_shape, kernel_size, pad_stride_info, dilation, depth_multiplier, data_type,
                                                                                                                data_type, input_quantization_info, input_quantization_info, output_quantization_info,
-                                                                                                               data_layout, act_info, mixed_layout);
+                                                                                                               data_layout, act_info, mixed_layout, in_place);
     }
 };
 
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T, typename TW>
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, typename TW, bool in_place = false>
 class DepthwiseConvolutionLayerValidationQuantizedPerChannelFixture : public DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, TW>
 {
 public:
@@ -579,7 +612,7 @@ public:
         DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, TW>::setup(in_shape, kernel_size, pad_stride_info, dilation, depth_multiplier,
                                                                                                                 input_data_type, weights_data_type,
                                                                                                                 input_quantization_info, QuantizationInfo(weights_scales), output_quantization_info,
-                                                                                                                data_layout, act_info);
+                                                                                                                data_layout, act_info, false, in_place);
     }
 };
 } // namespace validation
-- 
cgit v1.2.1