From 8155c0253c00aa9e26651361460c66feb39829a6 Mon Sep 17 00:00:00 2001
From: Gian Marco Iodice <gianmarco.iodice@arm.com>
Date: Fri, 16 Apr 2021 15:08:59 +0100
Subject: Rework OpenCL Depthwise Convolution

- Remove dedicated kernels for NCHW. Now we only use NHWC with permute
- Remove specialized kernels for 3x3 NHWC
- Simplify CLDepthwiseConvolutionLayer.cpp to call just the native
  implementation for both floating-point and quantized data types
- Develop two parametric opencl kernels for depthwise convolution layer NHWC
  (floating-point and quantized)
- Add support to export the weights to cl_image
- Extend test for depthwise convolution on opencl

Resolves COMPMID-4417

Change-Id: Ibe533f79c2860f9cac8e921895d5a8f947753a5c
Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5893
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
---
 .../fixtures/DepthwiseConvolutionLayerFixture.h    | 63 +++++++++++++++-------
 1 file changed, 43 insertions(+), 20 deletions(-)

(limited to 'tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h')

diff --git a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
index c255cc5c13..0e02ae28ca 100644
--- a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
+++ b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
@@ -198,7 +198,7 @@ protected:
         {
             case DataType::QASYMM8:
             {
-                std::uniform_int_distribution<uint8_t> distribution(0, 10);
+                std::uniform_int_distribution<uint8_t> distribution(0, 15);
                 library->fill(tensor, distribution, i);
                 break;
             }
@@ -292,7 +292,7 @@ public:
 
         if(padding_valid)
         {
-            _conv_info = PadStrideInfo();
+            _conv_info = PadStrideInfo(stride.width, stride.height);
         }
         else
         {
@@ -333,6 +333,7 @@ public:
     void allocate_and_run_target()
     {
         add_padding_x({ &_src, &_weights, &_biases, &_target }, _data_layout);
+        add_padding_y({ &_src, &_target }, _data_layout);
 
         // Allocate tensors
         _src.allocator()->allocate();
@@ -416,15 +417,16 @@ class DepthwiseConvolutionLayerNativeConfigurableValidationFixture : public Dept
 public:
     template <typename...>
     void setup(size_t width, size_t height, size_t channel, size_t batch, Size2D kernel_size, size_t depth_multiplier, Size2D dilation, Size2D stride, bool padding_valid, DataType data_type,
-               DataLayout data_layout, const ActivationLayerInfo &act_info, unsigned int n0)
+               DataLayout data_layout, const ActivationLayerInfo &act_info, unsigned int n0, bool export_to_cl_image)
     {
-        _dilation         = dilation;
-        _depth_multiplier = depth_multiplier;
-        _data_type        = data_type;
-        _data_layout      = data_layout;
-        _act_info         = act_info;
-        _n0               = n0;
-        _in_place         = in_place;
+        _dilation           = dilation;
+        _depth_multiplier   = depth_multiplier;
+        _data_type          = data_type;
+        _data_layout        = data_layout;
+        _act_info           = act_info;
+        _n0                 = n0;
+        _export_to_cl_image = export_to_cl_image;
+        _in_place           = in_place;
 
         _input_shape   = TensorShape(width, height, channel, batch);
         _weights_shape = TensorShape(kernel_size.width, kernel_size.height, channel * _depth_multiplier);
@@ -432,11 +434,11 @@ public:
 
         if(padding_valid)
         {
-            _conv_info = PadStrideInfo();
+            _conv_info = calculate_same_pad(_input_shape, _weights_shape, PadStrideInfo(stride.width, stride.height), DataLayout::NCHW, _dilation);
         }
         else
         {
-            _conv_info = calculate_same_pad(_input_shape, _weights_shape, PadStrideInfo(stride.width, stride.height), DataLayout::NCHW, _dilation);
+            _conv_info = PadStrideInfo(stride.width, stride.height);
         }
     }
 
@@ -462,14 +464,26 @@ public:
             target_to_use = &_target;
         }
 
-        DWCWeightsKernelInfo dwc_weights_info;
-        dwc_weights_info.n0 = _n0;
+        DWCComputeKernelInfo dwc_info;
+        dwc_info.n0                         = _n0;
+        dwc_info.m0                         = _conv_info.stride().first == 1 && _dilation.x() == 1 ? 8 : 1;
+        dwc_info.export_weights_to_cl_image = _export_to_cl_image;
 
-        DWCKernelInfo dwc_info;
-        dwc_info.activation_info = _act_info;
+#if defined(ARM_COMPUTE_OPENCL_ENABLED)
+        if(_export_to_cl_image)
+        {
+            _validate_output |= image2d_from_buffer_supported(CLKernelLibrary::get().get_device());
+            _validate_output |= (get_cl_image_pitch_alignment(CLKernelLibrary::get().get_device()) != 0);
+        }
+#endif // ARM_COMPUTE_OPENCL_ENABLED
+
+        const ConvolutionInfo conv_kernel_info
+        {
+            _conv_info, _depth_multiplier, _act_info, _dilation
+        };
 
         // Create Depthwise Convolution configure function
-        _dwc.configure(&_src, &_weights, &_biases, target_to_use, dwc_weights_info, dwc_info, _conv_info, _depth_multiplier, _dilation);
+        _dwc.configure(&_src, &_weights, &_biases, target_to_use, dwc_info, conv_kernel_info);
 
         ARM_COMPUTE_ASSERT(_src.info()->is_resizable());
         ARM_COMPUTE_ASSERT(_weights.info()->is_resizable());
@@ -479,7 +493,8 @@ public:
 
     void allocate_and_run_target()
     {
-        add_padding_x({ &_src, &_weights, &_biases, &_target }, _data_layout);
+        add_padding_x({ &_src, &_biases, &_target }, _data_layout);
+        add_padding_x({ &_weights }, _data_layout, _export_to_cl_image); // Don't add left padding if cl image will be used
 
         // Allocate tensors
         _src.allocator()->allocate();
@@ -508,7 +523,10 @@ public:
         }
 
         // Compute function
-        _dwc.run();
+        if(_validate_output)
+        {
+            _dwc.run();
+        }
 
         // Reinstating original data layout for the test suite to properly check the values
         if(!_in_place)
@@ -529,7 +547,10 @@ public:
 
         const ConvolutionInfo info{ _conv_info, _depth_multiplier, _act_info, _dilation };
         const TensorShape     dst_shape = compute_depthwise_convolution_shape(TensorInfo(_input_shape, 1, _data_type), TensorInfo(_weights_shape, 1, _data_type), info);
-        _reference                      = reference::activation_layer(reference::depthwise_convolution(src, weights, biases, dst_shape, _conv_info, _depth_multiplier, _dilation), _act_info);
+        if(_validate_output)
+        {
+            _reference = reference::activation_layer(reference::depthwise_convolution(src, weights, biases, dst_shape, _conv_info, _depth_multiplier, _dilation), _act_info);
+        }
     }
 
 protected:
@@ -573,6 +594,8 @@ protected:
     Size2D              _dilation{};
     unsigned int        _depth_multiplier{};
     unsigned int        _n0{};
+    bool                _export_to_cl_image{};
+    bool                _validate_output{ true };
     bool                _in_place{ false };
 };
 
-- 
cgit v1.2.1