From dfca60b8e8805966624c7c941f289e090e3d73bb Mon Sep 17 00:00:00 2001
From: Giorgio Arena <giorgio.arena@arm.com>
Date: Wed, 31 Jan 2018 10:30:59 +0000
Subject: COMPMID-811 Add NHWC data format support for CL depthwise convolution
 QASYMM8

Change-Id: I89de432f3fbcba7abf9e1d4f8396a4334b4fa2c2
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/118324
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
---
 tests/datasets/DepthwiseConvolutionLayerDataset.h  | 12 +++++-
 tests/validation/CL/DepthwiseConvolutionLayer.cpp  | 43 +++++++++++++---------
 .../validation/NEON/DepthwiseConvolutionLayer.cpp  | 18 +++++----
 .../fixtures/DepthwiseConvolutionLayerFixture.h    |  4 +-
 .../reference/DepthwiseConvolutionLayer.cpp        | 30 +++++++++++----
 5 files changed, 72 insertions(+), 35 deletions(-)

(limited to 'tests')
diff --git a/tests/datasets/DepthwiseConvolutionLayerDataset.h b/tests/datasets/DepthwiseConvolutionLayerDataset.h
index 0413cd0c94..1e77a0c8dd 100644
--- a/tests/datasets/DepthwiseConvolutionLayerDataset.h
+++ b/tests/datasets/DepthwiseConvolutionLayerDataset.h
@@ -160,11 +160,20 @@ public:
     {
         add_config(TensorShape(3U, 3U, 2U), TensorShape(3U, 3U, 2U), TensorShape(1U, 1U, 2U), PadStrideInfo(1, 1, 0, 0));
         add_config(TensorShape(7U, 7U, 3U, 2U), TensorShape(3U, 3U, 3U), TensorShape(5U, 5U, 3U, 2U), PadStrideInfo(1, 1, 0, 0));
-        add_config(TensorShape(33U, 27U, 11U), TensorShape(3U, 3U, 11U), TensorShape(11U, 14U, 11U), PadStrideInfo(3, 2, 1, 1));
         add_config(TensorShape(21U, 31U, 9U, 4U), TensorShape(3U, 3U, 9U), TensorShape(21U, 15U, 9U, 4U), PadStrideInfo(1, 2, 1, 0));
         add_config(TensorShape(33U, 27U, 11U, 3U), TensorShape(3U, 3U, 11U), TensorShape(31U, 14U, 11U, 3U), PadStrideInfo(1, 2, 0, 1));
         // Asymmetric padding
         add_config(TensorShape(33U, 27U, 11U), TensorShape(3U, 3U, 11U), TensorShape(16U, 13U, 11U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR));
+    }
+};
+
+class SmallDepthwiseConvolutionLayerDataset3x3NCHW final : public DepthwiseConvolutionLayerDataset
+{
+public:
+    SmallDepthwiseConvolutionLayerDataset3x3NCHW()
+    {
+        add_config(TensorShape(33U, 27U, 11U), TensorShape(3U, 3U, 11U), TensorShape(11U, 14U, 11U), PadStrideInfo(3, 2, 1, 1));
+        // Asymmetric padding
         add_config(TensorShape(33U, 27U, 11U), TensorShape(3U, 3U, 11U), TensorShape(18U, 14U, 11U), PadStrideInfo(2, 2, 3, 1, 2, 1, DimensionRoundingType::FLOOR));
     }
 };
@@ -176,7 +185,6 @@ public:
     LargeDepthwiseConvolutionLayerDataset3x3()
     {
         add_config(TensorShape(233U, 277U, 55U, 3U), TensorShape(3U, 3U, 55U), TensorShape(116U, 275U, 55U, 3U), PadStrideInfo(2, 1, 0, 0));
-        add_config(TensorShape(333U, 277U, 77U), TensorShape(3U, 3U, 77U), TensorShape(111U, 138U, 77U), PadStrideInfo(3, 2, 1, 0));
         add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(177U, 156U, 22U), PadStrideInfo(1, 2, 1, 1));
         add_config(TensorShape(233U, 277U, 55U), TensorShape(3U, 3U, 55U), TensorShape(231U, 138U, 55U), PadStrideInfo(1, 2, 0, 0));
         add_config(TensorShape(333U, 277U, 77U, 5U), TensorShape(3U, 3U, 77U), TensorShape(166U, 93U, 77U, 5U), PadStrideInfo(2, 3, 0, 1));
diff --git a/tests/validation/CL/DepthwiseConvolutionLayer.cpp b/tests/validation/CL/DepthwiseConvolutionLayer.cpp
index 1779ff1aee..450bb21e77 100644
--- a/tests/validation/CL/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/CL/DepthwiseConvolutionLayer.cpp
@@ -76,10 +76,12 @@ using CLDepthwiseConvolutionLayerFixture3x3 = DepthwiseConvolutionLayerValidatio
 TEST_SUITE(Float)
 TEST_SUITE(F16)
 TEST_SUITE(W3x3)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
-                                                                                                                   framework::dataset::make("DataType",
-                                                                                                                           DataType::F16)),
-                                                                                                                   framework::dataset::make("DataLayout", DataLayout::NCHW)))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::ALL,
+                       combine(combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                                                                  datasets::SmallDepthwiseConvolutionLayerDataset3x3NCHW()),
+                                       framework::dataset::make("DataType",
+                                                                DataType::F16)),
+                               framework::dataset::make("DataLayout", DataLayout::NCHW)))
 {
     validate(CLAccessor(_target), _reference, tolerance_f16);
 }
@@ -95,10 +97,12 @@ TEST_SUITE_END()
 
 TEST_SUITE(FP32)
 TEST_SUITE(W3x3)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
-                                                                                                                    framework::dataset::make("DataType",
-                                                                                                                            DataType::F32)),
-                                                                                                                    framework::dataset::make("DataLayout", DataLayout::NCHW)))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL,
+                       combine(combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                                                                  datasets::SmallDepthwiseConvolutionLayerDataset3x3NCHW()),
+                                       framework::dataset::make("DataType",
+                                                                DataType::F32)),
+                               framework::dataset::make("DataLayout", DataLayout::NCHW)))
 {
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
@@ -121,29 +125,34 @@ using CLDepthwiseConvolutionLayerQuantizedFixture3x3 = DepthwiseConvolutionLayer
 TEST_SUITE(Quantized)
 TEST_SUITE(QASYMM8)
 TEST_SUITE(Generic)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
                        framework::dataset::make("DataType", DataType::QASYMM8)),
-                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })))
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                       framework::dataset::make("DataLayout", DataLayout::NCHW)))
 {
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
                        framework::dataset::make("DataType", DataType::QASYMM8)),
-                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })))
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                       framework::dataset::make("DataLayout", DataLayout::NCHW)))
 {
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
 TEST_SUITE_END()
 TEST_SUITE(W3x3)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
-                       framework::dataset::make("DataType", DataType::QASYMM8)),
-                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                                               framework::dataset::make("DataType", DataType::QASYMM8)),
+                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
                        framework::dataset::make("DataType", DataType::QASYMM8)),
-                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })))
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
diff --git a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
index 49e146c084..236d4bd653 100644
--- a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
@@ -148,23 +148,27 @@ using NEDepthwiseConvolutionLayerQuantizedFixture = DepthwiseConvolutionLayerVal
 TEST_SUITE(Quantized)
 TEST_SUITE(QASYMM8)
 TEST_SUITE(Generic)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
                        framework::dataset::make("DataType", DataType::QASYMM8)),
-                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })))
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                       framework::dataset::make("DataLayout", DataLayout::NCHW)))
 {
     validate(Accessor(_target), _reference, tolerance_qasymm8);
 }
 TEST_SUITE_END()
 TEST_SUITE(W3x3)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
-                       framework::dataset::make("DataType", DataType::QASYMM8)),
-                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                                               framework::dataset::make("DataType", DataType::QASYMM8)),
+                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                               framework::dataset::make("DataLayout", DataLayout::NCHW)))
 {
     validate(Accessor(_target), _reference, tolerance_qasymm8);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
                        framework::dataset::make("DataType", DataType::QASYMM8)),
-                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })))
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                       framework::dataset::make("DataLayout", DataLayout::NCHW)))
 {
     validate(Accessor(_target), _reference, tolerance_qasymm8);
 }
diff --git a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
index ccdd443999..bb756f806e 100644
--- a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
+++ b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
@@ -177,10 +177,10 @@ class DepthwiseConvolutionLayerValidationQuantizedFixture : public DepthwiseConv
 {
 public:
     template <typename...>
-    void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type, QuantizationInfo quantization_info)
+    void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type, QuantizationInfo quantization_info, DataLayout data_layout)
     {
         DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(in_shape, weights_shape, out_shape, pad_stride_info,
-                                                                                                            data_type, quantization_info, DataLayout::NCHW);
+                                                                                                            data_type, quantization_info, data_layout);
     }
 };
 } // namespace validation
diff --git a/tests/validation/reference/DepthwiseConvolutionLayer.cpp b/tests/validation/reference/DepthwiseConvolutionLayer.cpp
index ab61b7dd65..d05da9140b 100644
--- a/tests/validation/reference/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/reference/DepthwiseConvolutionLayer.cpp
@@ -108,13 +108,9 @@ void depthwise_convolution_nchw(const SimpleTensor<T> &src, const SimpleTensor<T
     }
 }
 
-template <>
-SimpleTensor<uint8_t> depthwise_convolution(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &biases, const TensorShape &dst_shape,
-                                            const PadStrideInfo &conv_info)
+void depthwise_convolution_nchw(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &biases, SimpleTensor<uint8_t> &dst, const PadStrideInfo &conv_info)
 {
     // Create reference
-    SimpleTensor<uint8_t> dst{ dst_shape, src.data_type(), 1, src.fixed_point_position(), src.quantization_info() };
-
     const int   input_offset   = -src.quantization_info().offset;
     const float input_scale    = src.quantization_info().scale;
     const int   weights_offset = -weights.quantization_info().offset;
@@ -169,8 +165,8 @@ SimpleTensor<uint8_t> depthwise_convolution(const SimpleTensor<uint8_t> &src, co
                         {
                             coords.set(0, i);
                             coords.set(1, j);
-                            auto    in_val = tensor_elem_at<uint8_t>(src, coords, BorderMode::CONSTANT, -input_offset);
-                            uint8_t w_val  = *(weights.data() + filter_offset);
+                            const auto    in_val = tensor_elem_at<uint8_t>(src, coords, BorderMode::CONSTANT, -input_offset);
+                            const uint8_t w_val  = *(weights.data() + filter_offset);
                             val += (in_val + input_offset) * (w_val + weights_offset);
                             ++filter_offset;
                         }
@@ -187,6 +183,26 @@ SimpleTensor<uint8_t> depthwise_convolution(const SimpleTensor<uint8_t> &src, co
             }
         }
     }
+}
+
+template <>
+SimpleTensor<uint8_t> depthwise_convolution(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &biases, const TensorShape &dst_shape,
+                                            const PadStrideInfo &conv_info)
+{
+    SimpleTensor<uint8_t> dst{ dst_shape, src.data_type(), 1, src.fixed_point_position(), src.quantization_info() };
+
+    if(src.data_layout() == DataLayout::NHWC)
+    {
+        SimpleTensor<uint8_t> src_nchw     = reference::permute<uint8_t>(src, PermutationVector(1U, 2U, 0U));
+        SimpleTensor<uint8_t> weights_nchw = reference::permute<uint8_t>(weights, PermutationVector(1U, 2U, 0U));
+        SimpleTensor<uint8_t> dst_nchw     = reference::permute<uint8_t>(dst, PermutationVector(1U, 2U, 0U));
+
+        depthwise_convolution_nchw(src_nchw, weights_nchw, biases, dst_nchw, conv_info);
+
+        return reference::permute<uint8_t>(dst_nchw, PermutationVector(2U, 0U, 1U));
+    }
+
+    depthwise_convolution_nchw(src, weights, biases, dst, conv_info);
 
     return dst;
 }
-- 
cgit v1.2.1