From bc4e31113be0af320f44b338969d6972b64ca4de Mon Sep 17 00:00:00 2001
From: SiCongLi <sicong.li@arm.com>
Date: Tue, 29 Jun 2021 13:18:30 +0100
Subject: Implement FP GPU depthwise convolution 1x1 kernel for in-place
 computation

* Implement in-place graph node mutator for 1x1 depthwise convolution

* Add in-place to validation fixture except for
  DepthwiseConvolutionLayerNativeValidationFixture as it would be a
  duplicate test otherwise (DepthwiseConvolutionLayerNative test tests
  the underlying kernel)

Resolves: COMPMID-4432

Change-Id: Id7f10f5ebdce7d49f550c0b62dbaaab7f5b59d29
Signed-off-by: SiCongLi <sicong.li@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5874
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
---
 .../fixtures/DepthwiseConvolutionLayerFixture.h    | 83 +++++++++++++++-------
 1 file changed, 58 insertions(+), 25 deletions(-)

(limited to 'tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h')
diff --git a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
index 19ec6b2560..c255cc5c13 100644
--- a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
+++ b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
@@ -59,8 +59,9 @@ public:
     void setup(TensorShape in_shape, Size2D kernel_size, PadStrideInfo pad_stride_info, Size2D dilation,
                unsigned int depth_multiplier, DataType input_data_type, DataType weights_data_type,
                QuantizationInfo input_quantization_info, QuantizationInfo weights_quantization_info, QuantizationInfo output_quantization_info,
-               DataLayout data_layout, ActivationLayerInfo act_info, bool mixed_layout = false)
+               DataLayout data_layout, ActivationLayerInfo act_info, bool mixed_layout = false, bool in_place = false)
     {
+        ARM_COMPUTE_ERROR_ON(mixed_layout && in_place);
         _mixed_layout              = mixed_layout;
         _input_shape               = in_shape;
         _input_data_type           = input_data_type;
@@ -73,6 +74,7 @@ public:
         _act_info                  = act_info;
         _depth_multiplier          = depth_multiplier;
         _dilation                  = dilation;
+        _in_place                  = in_place;
 
         _bias_data_type = is_data_type_quantized(_input_data_type) ? DataType::S32 : _input_data_type;
 
@@ -101,13 +103,18 @@ public:
         }
 
         // Create tensors
-        _src     = create_tensor<TensorType>(input_shape, _input_data_type, 1, _input_quantization_info, _data_layout);
-        _weights = create_tensor<TensorType>(weights_shape, _weights_data_type, 1, _weights_quantization_info, _data_layout);
-        _biases  = create_tensor<TensorType>(_biases_shape, _bias_data_type, 1, _input_quantization_info, _data_layout);
-        _target  = create_tensor<TensorType>(output_shape, _input_data_type, 1, _output_quantization_info, _data_layout);
+        _src                      = create_tensor<TensorType>(input_shape, _input_data_type, 1, _input_quantization_info, _data_layout);
+        _weights                  = create_tensor<TensorType>(weights_shape, _weights_data_type, 1, _weights_quantization_info, _data_layout);
+        _biases                   = create_tensor<TensorType>(_biases_shape, _bias_data_type, 1, _input_quantization_info, _data_layout);
+        TensorType *target_to_use = nullptr;
+        if(!_in_place)
+        {
+            _target       = create_tensor<TensorType>(output_shape, _input_data_type, 1, _output_quantization_info, _data_layout);
+            target_to_use = &_target;
+        }
 
         // Create Depthwise Convolution configure function
-        _dwc.configure(&_src, &_weights, &_biases, &_target, _pad_stride_info, _depth_multiplier, _act_info, _dilation);
+        _dwc.configure(&_src, &_weights, &_biases, target_to_use, _pad_stride_info, _depth_multiplier, _act_info, _dilation);
 
         ARM_COMPUTE_ASSERT(_src.info()->is_resizable());
         ARM_COMPUTE_ASSERT(_weights.info()->is_resizable());
@@ -117,18 +124,26 @@ public:
 
     void allocate_and_run_target()
     {
-        add_padding_x({ &_src, &_weights, &_biases, &_target }, _data_layout);
+        add_padding_x({ &_src, &_weights, &_biases }, _data_layout);
+        if(!_in_place)
+        {
+            add_padding_x({ &_target }, _data_layout);
+        }
 
         // Allocate tensors
         _src.allocator()->allocate();
         _weights.allocator()->allocate();
         _biases.allocator()->allocate();
-        _target.allocator()->allocate();
 
         ARM_COMPUTE_ASSERT(!_src.info()->is_resizable());
         ARM_COMPUTE_ASSERT(!_weights.info()->is_resizable());
         ARM_COMPUTE_ASSERT(!_biases.info()->is_resizable());
-        ARM_COMPUTE_ASSERT(!_target.info()->is_resizable());
+
+        if(!_in_place)
+        {
+            _target.allocator()->allocate();
+            ARM_COMPUTE_ASSERT(!_target.info()->is_resizable());
+        }
 
         // Fill tensors
         fill(AccessorType(_src), 0);
@@ -163,6 +178,7 @@ public:
 protected:
     void mix_layout(FunctionType &layer, TensorType &src, TensorType &dst)
     {
+        ARM_COMPUTE_ERROR_ON(_in_place);
         // Test Multi DataLayout graph cases, when the data layout changes after configure
         src.info()->set_data_layout(_data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW);
         dst.info()->set_data_layout(_data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW);
@@ -240,9 +256,10 @@ protected:
     unsigned int        _depth_multiplier{};
     Size2D              _dilation{};
     bool                _mixed_layout{ false };
+    bool                _in_place{ false };
 };
 
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false>
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false, bool in_place = false>
 class DepthwiseConvolutionLayerValidationFixture : public DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>
 {
 public:
@@ -252,7 +269,7 @@ public:
     {
         DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>::setup(in_shape, kernel_size, pad_stride_info, dilation, depth_multiplier,
                                                                                                                data_type, data_type, QuantizationInfo(), QuantizationInfo(), QuantizationInfo(),
-                                                                                                               data_layout, act_info, mixed_layout);
+                                                                                                               data_layout, act_info, mixed_layout, in_place);
     }
 };
 
@@ -393,7 +410,7 @@ protected:
     unsigned int  _depth_multiplier{};
 };
 
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool in_place = false>
 class DepthwiseConvolutionLayerNativeConfigurableValidationFixture : public DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>
 {
 public:
@@ -407,6 +424,7 @@ public:
         _data_layout      = data_layout;
         _act_info         = act_info;
         _n0               = n0;
+        _in_place         = in_place;
 
         _input_shape   = TensorShape(width, height, channel, batch);
         _weights_shape = TensorShape(kernel_size.width, kernel_size.height, channel * _depth_multiplier);
@@ -434,10 +452,15 @@ public:
         }
 
         // Create tensors
-        _src     = create_tensor<TensorType>(input_shape, _data_type, 1, QuantizationInfo(), _data_layout);
-        _weights = create_tensor<TensorType>(weights_shape, _data_type, 1, QuantizationInfo(), _data_layout);
-        _biases  = create_tensor<TensorType>(_biases_shape, _data_type, 1, QuantizationInfo(), _data_layout);
-        _target  = create_tensor<TensorType>(TensorShape(), _data_type, 1, QuantizationInfo(), _data_layout);
+        _src                      = create_tensor<TensorType>(input_shape, _data_type, 1, QuantizationInfo(), _data_layout);
+        _weights                  = create_tensor<TensorType>(weights_shape, _data_type, 1, QuantizationInfo(), _data_layout);
+        _biases                   = create_tensor<TensorType>(_biases_shape, _data_type, 1, QuantizationInfo(), _data_layout);
+        TensorType *target_to_use = nullptr;
+        if(!_in_place)
+        {
+            _target       = create_tensor<TensorType>(TensorShape(), _data_type, 1, QuantizationInfo(), _data_layout);
+            target_to_use = &_target;
+        }
 
         DWCWeightsKernelInfo dwc_weights_info;
         dwc_weights_info.n0 = _n0;
@@ -446,7 +469,7 @@ public:
         dwc_info.activation_info = _act_info;
 
         // Create Depthwise Convolution configure function
-        _dwc.configure(&_src, &_weights, &_biases, &_target, dwc_weights_info, dwc_info, _conv_info, _depth_multiplier, _dilation);
+        _dwc.configure(&_src, &_weights, &_biases, target_to_use, dwc_weights_info, dwc_info, _conv_info, _depth_multiplier, _dilation);
 
         ARM_COMPUTE_ASSERT(_src.info()->is_resizable());
         ARM_COMPUTE_ASSERT(_weights.info()->is_resizable());
@@ -462,12 +485,15 @@ public:
         _src.allocator()->allocate();
         _weights.allocator()->allocate();
         _biases.allocator()->allocate();
-        _target.allocator()->allocate();
 
         ARM_COMPUTE_ASSERT(!_src.info()->is_resizable());
         ARM_COMPUTE_ASSERT(!_weights.info()->is_resizable());
         ARM_COMPUTE_ASSERT(!_biases.info()->is_resizable());
-        ARM_COMPUTE_ASSERT(!_target.info()->is_resizable());
+        if(!_in_place)
+        {
+            _target.allocator()->allocate();
+            ARM_COMPUTE_ASSERT(!_target.info()->is_resizable());
+        }
 
         // Fill tensors
         fill(AccessorType(_src), 0);
@@ -476,13 +502,19 @@ public:
 
         // Test Multi DataLayout graph cases, when the data layout changes after configure
         _src.info()->set_data_layout(_data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW);
-        _target.info()->set_data_layout(_data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW);
+        if(!_in_place)
+        {
+            _target.info()->set_data_layout(_data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW);
+        }
 
         // Compute function
         _dwc.run();
 
         // Reinstating original data layout for the test suite to properly check the values
-        _target.info()->set_data_layout(_data_layout);
+        if(!_in_place)
+        {
+            _target.info()->set_data_layout(_data_layout);
+        }
     }
 
     void compute_reference()
@@ -541,9 +573,10 @@ protected:
     Size2D              _dilation{};
     unsigned int        _depth_multiplier{};
     unsigned int        _n0{};
+    bool                _in_place{ false };
 };
 
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false>
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false, bool in_place = false>
 class DepthwiseConvolutionLayerValidationQuantizedFixture : public DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>
 {
 public:
@@ -553,11 +586,11 @@ public:
     {
         DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>::setup(in_shape, kernel_size, pad_stride_info, dilation, depth_multiplier, data_type,
                                                                                                                data_type, input_quantization_info, input_quantization_info, output_quantization_info,
-                                                                                                               data_layout, act_info, mixed_layout);
+                                                                                                               data_layout, act_info, mixed_layout, in_place);
     }
 };
 
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T, typename TW>
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, typename TW, bool in_place = false>
 class DepthwiseConvolutionLayerValidationQuantizedPerChannelFixture : public DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, TW>
 {
 public:
@@ -579,7 +612,7 @@ public:
         DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, TW>::setup(in_shape, kernel_size, pad_stride_info, dilation, depth_multiplier,
                                                                                                                 input_data_type, weights_data_type,
                                                                                                                 input_quantization_info, QuantizationInfo(weights_scales), output_quantization_info,
-                                                                                                                data_layout, act_info);
+                                                                                                                data_layout, act_info, false, in_place);
     }
 };
 } // namespace validation
-- 
cgit v1.2.1