Rework OpenCL Depthwise Convolution

- Remove dedicated kernels for NCHW. Now we only use NHWC with permute - Remove specialized kernels for 3x3 NHWC - Simplify CLDepthwiseConvolutionLayer.cpp to call just the native implementation for both floating-point and quantized data types - Develop two parametric opencl kernels for depthwise convolution layer NHWC (floating-point and quantized) - Add support to export the weights to cl_image - Extend test for depthwise convolution on opencl Resolves COMPMID-4417 Change-Id: Ibe533f79c2860f9cac8e921895d5a8f947753a5c Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5893 Reviewed-by: Giorgio Arena <giorgio.arena@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
author: Gian Marco Iodice <gianmarco.iodice@arm.com> 2021-04-16 15:08:59 +0100
committer: Gian Marco Iodice <gianmarco.iodice@arm.com> 2021-07-02 15:56:45 +0000
commit: 8155c0253c00aa9e26651361460c66feb39829a6 (patch)
tree: 41dacc432d4d1f1daa32d20d15e5120c11b9fa56 /tests/validation
parent: 2eb5d16b839cbc28c6cb7f0de7a0bf15290b425a (diff)
download: ComputeLibrary-8155c0253c00aa9e26651361460c66feb39829a6.tar.gz
3 files changed, 207 insertions, 42 deletions
diff --git a/tests/validation/CL/DepthwiseConvolutionLayer.cpp b/tests/validation/CL/DepthwiseConvolutionLayer.cpp
index 22922f41a2..79a2678b44 100644
--- a/tests/validation/CL/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/CL/DepthwiseConvolutionLayer.cpp
@@ -48,14 +48,13 @@ constexpr RelativeTolerance<float>   tolerance_f32(0.01f);                  /**<
 constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(0);                  /**< Tolerance value for comparing reference's output against implementation's output for DataType::QASYMM8 */
 constexpr float                      tolerance_num = 0.05f;                 /**< Tolerance number */
 
-const auto depth_multipliers       = framework::dataset::make("DepthMultiplier", { 1, 2, 5 });
+const auto depth_multipliers       = framework::dataset::make("DepthMultiplier", { 1, 5 });
 const auto large_depth_multipliers = framework::dataset::make("DepthMultiplier", { 1, 2, 5, 8 });
 
 //Activation Functions
 const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
 {
     ActivationLayerInfo(),
-    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f, 0.f)
 });
 } // namespace
@@ -481,8 +480,8 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture
                            combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
                                                                            depth_multipliers),
                                                                    framework::dataset::make("DataType", DataType::QASYMM8)),
-                                                           framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10), QuantizationInfo(2.2f, 10) })),
-                                                   framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })),
+                                                           framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 128), QuantizationInfo(2.2f, 10) })),
+                                                   framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(1.f, 128) })),
                                            framework::dataset::make("DataLayout", { DataLayout::NHWC })), // NCHW is tested with int8
                                    ActivationFunctionsDataset))
 {
diff --git a/tests/validation/CL/DepthwiseConvolutionLayerNative.cpp b/tests/validation/CL/DepthwiseConvolutionLayerNative.cpp
index f640ee2b18..f565255719 100644
--- a/tests/validation/CL/DepthwiseConvolutionLayerNative.cpp
+++ b/tests/validation/CL/DepthwiseConvolutionLayerNative.cpp
@@ -62,7 +62,7 @@ RelativeTolerance<half_float::half>  rel_tolerance_f16(half_float::half(0.01f));
 constexpr float                      abs_tolerance_f16(0.03f);
 
 /** Width values to test - Precommit */
-const auto width_values_precommit = framework::dataset::make("width", { 1U, 17U, 32U } );
+const auto width_values_precommit = framework::dataset::make("width", { 1U, 33U } );
 
 /** Width values to test - Nightly */
 const auto width_values_nightly = framework::dataset::make("width", { 53U, 47U } );
@@ -79,6 +79,12 @@ const auto channel_values_precommit = framework::dataset::make("channels", { 15U
 /** Channel values to test - Nightly */
 const auto channel_values_nightly = framework::dataset::make("channels", { 33U, 19U });
 
+/** Channel values to test with cl_image support - Precommit */
+const auto channel_values_export_to_cl_image_precommit = framework::dataset::make("channels", { 16U });
+
+/** Channel values to test with cl_image support - Nightly */
+const auto channel_values_export_to_cl_image_nightly = framework::dataset::make("channels", { 32U });
+
 /** Batch values to test - Precommit */
 const auto batch_values_precommit = framework::dataset::make("batch", { 1U, 2U });
 
@@ -115,11 +121,17 @@ const auto n0_values_precommit = framework::dataset::make("N0", {2, 4});
 /** N0 values to test - Nightly */
 const auto n0_values_nightly = framework::dataset::make("N0", {3, 8});
 
+/** N0 values to test with cl_image support - Precommit */
+const auto n0_values_export_to_cl_image_precommit = framework::dataset::make("N0", {4});
+
+/** N0 values to test with cl_image support - Nightly */
+const auto n0_values_export_to_cl_image_nightly = framework::dataset::make("N0", {8});
+
 /** Activation values to test */
 const auto act_values = framework::dataset::make("Activation",
 {
     ActivationLayerInfo(),
-    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 8.f, 2.f),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.5f),
 });
 
 } // namespace
@@ -129,7 +141,7 @@ TEST_SUITE(DepthwiseConvolutionLayerNative)
 TEST_SUITE(Float)
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::ALL,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
                                                                                                 width_values_precommit,
                                                                                                 height_values_precommit),
                                                                                                 channel_values_precommit),
@@ -142,14 +154,15 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<fl
                                                                                                 framework::dataset::make("DataType", DataType::F32)),
                                                                                                 data_layout_values),
                                                                                                 act_values),
-                                                                                                n0_values_precommit))
+                                                                                                n0_values_precommit),
+                                                                                                framework::dataset::make("ExportToCLImage", false)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
 }
 
 FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::NIGHTLY,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
                                                                                                 width_values_nightly,
                                                                                                 height_values_nightly),
                                                                                                 channel_values_nightly),
@@ -162,16 +175,79 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<fl
                                                                                                 framework::dataset::make("DataType", DataType::F32)),
                                                                                                 data_layout_values),
                                                                                                 act_values),
-                                                                                                n0_values_nightly))
+                                                                                                n0_values_nightly),
+                                                                                                framework::dataset::make("ExportToCLImage", false)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
 }
+
+TEST_SUITE(ExportWeightsToCLImage)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::ALL,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                                                width_values_precommit,
+                                                                                                height_values_precommit),
+                                                                                                channel_values_export_to_cl_image_precommit),
+                                                                                                batch_values_precommit),
+                                                                                                kernel_sz_values_precommit),
+                                                                                                framework::dataset::make("depth_multiplier", 1)),
+                                                                                                dilation_values),
+                                                                                                stride_values),
+                                                                                                padding_valid_values),
+                                                                                                framework::dataset::make("DataType", DataType::F32)),
+                                                                                                data_layout_values),
+                                                                                                act_values),
+                                                                                                n0_values_export_to_cl_image_precommit),
+                                                                                                framework::dataset::make("ExportToCLImage", true)))
+{
+   // Validate output
+    if(_validate_output)
+    {
+        // Validate output
+        validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
+}
+
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::NIGHTLY,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                                                width_values_nightly,
+                                                                                                height_values_nightly),
+                                                                                                channel_values_export_to_cl_image_nightly),
+                                                                                                batch_values_nightly),
+                                                                                                kernel_sz_values_nightly),
+                                                                                                framework::dataset::make("depth_multiplier", 1)),
+                                                                                                dilation_values),
+                                                                                                stride_values),
+                                                                                                padding_valid_values),
+                                                                                                framework::dataset::make("DataType", DataType::F32)),
+                                                                                                data_layout_values),
+                                                                                                act_values),
+                                                                                                n0_values_export_to_cl_image_nightly),
+                                                                                                framework::dataset::make("ExportToCLImage", true)))
+{
+   // Validate output
+    if(_validate_output)
+    {
+        // Validate output
+        validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
+}
+TEST_SUITE_END() // ExportWeightsToCLImage
 TEST_SUITE_END() // FP32
 
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<half>, framework::DatasetMode::ALL,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
                                                                                                 width_values_precommit,
                                                                                                 height_values_precommit),
                                                                                                 channel_values_precommit),
@@ -184,14 +260,15 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<ha
                                                                                                 framework::dataset::make("DataType", DataType::F16)),
                                                                                                 data_layout_values),
                                                                                                 act_values),
-                                                                                                n0_values_precommit))
+                                                                                                n0_values_precommit),
+                                                                                                framework::dataset::make("ExportToCLImage", false)))
 {
     // Validate output
         validate(CLAccessor(_target), _reference, rel_tolerance_f16);
 }
 
 FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<half>, framework::DatasetMode::NIGHTLY,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
                                                                                                 width_values_nightly,
                                                                                                 height_values_nightly),
                                                                                                 channel_values_nightly),
@@ -204,18 +281,80 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<ha
                                                                                                 framework::dataset::make("DataType", DataType::F16)),
                                                                                                 data_layout_values),
                                                                                                 act_values),
-                                                                                                n0_values_nightly))
+                                                                                                n0_values_nightly),
+                                                                                                framework::dataset::make("ExportToCLImage", false)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
 }
+TEST_SUITE(ExportWeightsToCLImage)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<half>, framework::DatasetMode::ALL,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                                                width_values_precommit,
+                                                                                                height_values_precommit),
+                                                                                                channel_values_export_to_cl_image_precommit),
+                                                                                                batch_values_precommit),
+                                                                                                kernel_sz_values_precommit),
+                                                                                                framework::dataset::make("depth_multiplier", 1)),
+                                                                                                dilation_values),
+                                                                                                stride_values),
+                                                                                                padding_valid_values),
+                                                                                                framework::dataset::make("DataType", DataType::F16)),
+                                                                                                data_layout_values),
+                                                                                                act_values),
+                                                                                                n0_values_export_to_cl_image_precommit),
+                                                                                                framework::dataset::make("ExportToCLImage", true)))
+{
+   // Validate output
+    if(_validate_output)
+    {
+        // Validate output
+        validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
+}
+
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<half>, framework::DatasetMode::NIGHTLY,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                                                width_values_nightly,
+                                                                                                height_values_nightly),
+                                                                                                channel_values_export_to_cl_image_nightly),
+                                                                                                batch_values_nightly),
+                                                                                                kernel_sz_values_nightly),
+                                                                                                framework::dataset::make("depth_multiplier", 1)),
+                                                                                                dilation_values),
+                                                                                                stride_values),
+                                                                                                padding_valid_values),
+                                                                                                framework::dataset::make("DataType", DataType::F16)),
+                                                                                                data_layout_values),
+                                                                                                act_values),
+                                                                                                n0_values_export_to_cl_image_nightly),
+                                                                                                framework::dataset::make("ExportToCLImage", true)))
+{
+   // Validate output
+    if(_validate_output)
+    {
+        // Validate output
+        validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
+}
+TEST_SUITE_END() // ExportWeightsToCLImage
 TEST_SUITE_END() // FP16
 TEST_SUITE_END() // Float
 TEST_SUITE(DepthMultiplier)
 TEST_SUITE(Float)
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::ALL,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
                                                                                                 width_values_precommit,
                                                                                                 height_values_precommit),
                                                                                                 channel_values_precommit),
@@ -228,14 +367,15 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<fl
                                                                                                 framework::dataset::make("DataType", DataType::F32)),
                                                                                                 data_layout_values),
                                                                                                 act_values),
-                                                                                                framework::dataset::make("N0", 1)))
+                                                                                                framework::dataset::make("N0", 1)),
+                                                                                                framework::dataset::make("ExportToCLImage", false)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
 }
 
 FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::NIGHTLY,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
                                                                                                 width_values_nightly,
                                                                                                 height_values_nightly),
                                                                                                 channel_values_nightly),
@@ -248,7 +388,8 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<fl
                                                                                                 framework::dataset::make("DataType", DataType::F32)),
                                                                                                 data_layout_values),
                                                                                                 act_values),
-                                                                                                framework::dataset::make("N0", 1)))
+                                                                                                framework::dataset::make("N0", 1)),
+                                                                                                framework::dataset::make("ExportToCLImage", false)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
@@ -257,7 +398,7 @@ TEST_SUITE_END() // FP32
 
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<half>, framework::DatasetMode::ALL,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
                                                                                                 width_values_precommit,
                                                                                                 height_values_precommit),
                                                                                                 channel_values_precommit),
@@ -270,14 +411,15 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<ha
                                                                                                 framework::dataset::make("DataType", DataType::F16)),
                                                                                                 data_layout_values),
                                                                                                 act_values),
-                                                                                                framework::dataset::make("N0", 1)))
+                                                                                                framework::dataset::make("N0", 1)),
+                                                                                                framework::dataset::make("ExportToCLImage", false)))
 {
     // Validate output
         validate(CLAccessor(_target), _reference, rel_tolerance_f16);
 }
 
 FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<half>, framework::DatasetMode::NIGHTLY,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
                                                                                                 width_values_nightly,
                                                                                                 height_values_nightly),
                                                                                                 channel_values_nightly),
@@ -290,7 +432,8 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<ha
                                                                                                 framework::dataset::make("DataType", DataType::F16)),
                                                                                                 data_layout_values),
                                                                                                 act_values),
-                                                                                                framework::dataset::make("N0", 1)))
+                                                                                                framework::dataset::make("N0", 1)),
+                                                                                                framework::dataset::make("ExportToCLImage", false)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
diff --git a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
index c255cc5c13..0e02ae28ca 100644
--- a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
+++ b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
@@ -198,7 +198,7 @@ protected:
         {
             case DataType::QASYMM8:
             {
-                std::uniform_int_distribution<uint8_t> distribution(0, 10);
+                std::uniform_int_distribution<uint8_t> distribution(0, 15);
                 library->fill(tensor, distribution, i);
                 break;
             }
@@ -292,7 +292,7 @@ public:
 
         if(padding_valid)
         {
-            _conv_info = PadStrideInfo();
+            _conv_info = PadStrideInfo(stride.width, stride.height);
         }
         else
         {
@@ -333,6 +333,7 @@ public:
     void allocate_and_run_target()
     {
         add_padding_x({ &_src, &_weights, &_biases, &_target }, _data_layout);
+        add_padding_y({ &_src, &_target }, _data_layout);
 
         // Allocate tensors
         _src.allocator()->allocate();
@@ -416,15 +417,16 @@ class DepthwiseConvolutionLayerNativeConfigurableValidationFixture : public Dept
 public:
     template <typename...>
     void setup(size_t width, size_t height, size_t channel, size_t batch, Size2D kernel_size, size_t depth_multiplier, Size2D dilation, Size2D stride, bool padding_valid, DataType data_type,
-               DataLayout data_layout, const ActivationLayerInfo &act_info, unsigned int n0)
+               DataLayout data_layout, const ActivationLayerInfo &act_info, unsigned int n0, bool export_to_cl_image)
     {
-        _dilation         = dilation;
-        _depth_multiplier = depth_multiplier;
-        _data_type        = data_type;
-        _data_layout      = data_layout;
-        _act_info         = act_info;
-        _n0               = n0;
-        _in_place         = in_place;
+        _dilation           = dilation;
+        _depth_multiplier   = depth_multiplier;
+        _data_type          = data_type;
+        _data_layout        = data_layout;
+        _act_info           = act_info;
+        _n0                 = n0;
+        _export_to_cl_image = export_to_cl_image;
+        _in_place           = in_place;
 
         _input_shape   = TensorShape(width, height, channel, batch);
         _weights_shape = TensorShape(kernel_size.width, kernel_size.height, channel * _depth_multiplier);
@@ -432,11 +434,11 @@ public:
 
         if(padding_valid)
         {
-            _conv_info = PadStrideInfo();
+            _conv_info = calculate_same_pad(_input_shape, _weights_shape, PadStrideInfo(stride.width, stride.height), DataLayout::NCHW, _dilation);
         }
         else
         {
-            _conv_info = calculate_same_pad(_input_shape, _weights_shape, PadStrideInfo(stride.width, stride.height), DataLayout::NCHW, _dilation);
+            _conv_info = PadStrideInfo(stride.width, stride.height);
         }
     }
 
@@ -462,14 +464,26 @@ public:
             target_to_use = &_target;
         }
 
-        DWCWeightsKernelInfo dwc_weights_info;
-        dwc_weights_info.n0 = _n0;
+        DWCComputeKernelInfo dwc_info;
+        dwc_info.n0                         = _n0;
+        dwc_info.m0                         = _conv_info.stride().first == 1 && _dilation.x() == 1 ? 8 : 1;
+        dwc_info.export_weights_to_cl_image = _export_to_cl_image;
 
-        DWCKernelInfo dwc_info;
-        dwc_info.activation_info = _act_info;
+#if defined(ARM_COMPUTE_OPENCL_ENABLED)
+        if(_export_to_cl_image)
+        {
+            _validate_output |= image2d_from_buffer_supported(CLKernelLibrary::get().get_device());
+            _validate_output |= (get_cl_image_pitch_alignment(CLKernelLibrary::get().get_device()) != 0);
+        }
+#endif // ARM_COMPUTE_OPENCL_ENABLED
+
+        const ConvolutionInfo conv_kernel_info
+        {
+            _conv_info, _depth_multiplier, _act_info, _dilation
+        };
 
         // Create Depthwise Convolution configure function
-        _dwc.configure(&_src, &_weights, &_biases, target_to_use, dwc_weights_info, dwc_info, _conv_info, _depth_multiplier, _dilation);
+        _dwc.configure(&_src, &_weights, &_biases, target_to_use, dwc_info, conv_kernel_info);
 
         ARM_COMPUTE_ASSERT(_src.info()->is_resizable());
         ARM_COMPUTE_ASSERT(_weights.info()->is_resizable());
@@ -479,7 +493,8 @@ public:
 
     void allocate_and_run_target()
     {
-        add_padding_x({ &_src, &_weights, &_biases, &_target }, _data_layout);
+        add_padding_x({ &_src, &_biases, &_target }, _data_layout);
+        add_padding_x({ &_weights }, _data_layout, _export_to_cl_image); // Don't add left padding if cl image will be used
 
         // Allocate tensors
         _src.allocator()->allocate();
@@ -508,7 +523,10 @@ public:
         }
 
         // Compute function
-        _dwc.run();
+        if(_validate_output)
+        {
+            _dwc.run();
+        }
 
         // Reinstating original data layout for the test suite to properly check the values
         if(!_in_place)
@@ -529,7 +547,10 @@ public:
 
         const ConvolutionInfo info{ _conv_info, _depth_multiplier, _act_info, _dilation };
         const TensorShape     dst_shape = compute_depthwise_convolution_shape(TensorInfo(_input_shape, 1, _data_type), TensorInfo(_weights_shape, 1, _data_type), info);
-        _reference                      = reference::activation_layer(reference::depthwise_convolution(src, weights, biases, dst_shape, _conv_info, _depth_multiplier, _dilation), _act_info);
+        if(_validate_output)
+        {
+            _reference = reference::activation_layer(reference::depthwise_convolution(src, weights, biases, dst_shape, _conv_info, _depth_multiplier, _dilation), _act_info);
+        }
     }
 
 protected:
@@ -573,6 +594,8 @@ protected:
     Size2D              _dilation{};
     unsigned int        _depth_multiplier{};
     unsigned int        _n0{};
+    bool                _export_to_cl_image{};
+    bool                _validate_output{ true };
     bool                _in_place{ false };
 };
author	Gian Marco Iodice <gianmarco.iodice@arm.com>	2021-04-16 15:08:59 +0100
committer	Gian Marco Iodice <gianmarco.iodice@arm.com>	2021-07-02 15:56:45 +0000
commit	8155c0253c00aa9e26651361460c66feb39829a6 (patch)
tree	41dacc432d4d1f1daa32d20d15e5120c11b9fa56 /tests/validation
parent	2eb5d16b839cbc28c6cb7f0de7a0bf15290b425a (diff)
download	ComputeLibrary-8155c0253c00aa9e26651361460c66feb39829a6.tar.gz