COMPMID-926 Add depth multiplier support to NEON/CL/GLES depthwise convolution

Change-Id: I03f32c62350e5ea43e77bb15fc5a832d83719e3b Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/126657 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Michele DiGiorgio <michele.digiorgio@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
author: Giorgio Arena <giorgio.arena@arm.com> 2018-04-04 17:44:26 +0100
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-11-02 16:50:48 +0000
commit: 7657224de2b697a8a92cccf26d98e53ccd7c1a03 (patch)
tree: 1dcfa4541dbaf753854a628c93991652158d373e /tests/validation
parent: e74b201ca1abca040ca9f30837fdf19aa610e7c4 (diff)
download: ComputeLibrary-7657224de2b697a8a92cccf26d98e53ccd7c1a03.tar.gz
7 files changed, 179 insertions, 128 deletions
diff --git a/tests/validation/CL/DepthwiseConvolutionLayer.cpp b/tests/validation/CL/DepthwiseConvolutionLayer.cpp
index 450bb21e77..ad7a5d819b 100644
--- a/tests/validation/CL/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/CL/DepthwiseConvolutionLayer.cpp
@@ -45,6 +45,8 @@ namespace
 RelativeTolerance<half_float::half>  tolerance_f16(half_float::half(0.001)); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
 constexpr RelativeTolerance<float>   tolerance_f32(0.01f);                   /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
 constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1);                   /**< Tolerance value for comparing reference's output against implementation's output for DataType::QASYMM8 */
+
+const auto depth_multipliers = framework::dataset::make("DepthMultiplier", { 1, 2, 3 });
 } // namespace
 
 TEST_SUITE(CL)
@@ -54,14 +56,15 @@ template <typename T>
 using CLDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerValidationFixture<CLTensor, CLAccessor, CLDepthwiseConvolutionLayer, T>;
 
 TEST_SUITE(Generic)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), depth_multipliers),
                                                                                                                  framework::dataset::make("DataType",
                                                                                                                          DataType::F32)),
                                                                                                                  framework::dataset::make("DataLayout", DataLayout::NCHW)))
 {
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
+                                                                                                                     depth_multipliers),
                                                                                                                      framework::dataset::make("DataType",
                                                                                                                              DataType::F32)),
                                                                                                                      framework::dataset::make("DataLayout", DataLayout::NCHW)))
@@ -77,15 +80,17 @@ TEST_SUITE(Float)
 TEST_SUITE(F16)
 TEST_SUITE(W3x3)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::ALL,
-                       combine(combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
-                                                                  datasets::SmallDepthwiseConvolutionLayerDataset3x3NCHW()),
+                       combine(combine(combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                                                                          datasets::SmallDepthwiseConvolutionLayerDataset3x3NCHW()),
+                                               depth_multipliers),
                                        framework::dataset::make("DataType",
                                                                 DataType::F16)),
                                framework::dataset::make("DataLayout", DataLayout::NCHW)))
 {
     validate(CLAccessor(_target), _reference, tolerance_f16);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+                                                                                                                       depth_multipliers),
                                                                                                                        framework::dataset::make("DataType",
                                                                                                                                DataType::F16)),
                                                                                                                        framework::dataset::make("DataLayout", DataLayout::NCHW)))
@@ -98,15 +103,17 @@ TEST_SUITE_END()
 TEST_SUITE(FP32)
 TEST_SUITE(W3x3)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL,
-                       combine(combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
-                                                                  datasets::SmallDepthwiseConvolutionLayerDataset3x3NCHW()),
+                       combine(combine(combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                                                                          datasets::SmallDepthwiseConvolutionLayerDataset3x3NCHW()),
+                                               depth_multipliers),
                                        framework::dataset::make("DataType",
                                                                 DataType::F32)),
                                framework::dataset::make("DataLayout", DataLayout::NCHW)))
 {
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+                                                                                                                        depth_multipliers),
                                                                                                                         framework::dataset::make("DataType",
                                                                                                                                 DataType::F32)),
                                                                                                                         framework::dataset::make("DataLayout", DataLayout::NCHW)))
@@ -125,34 +132,41 @@ using CLDepthwiseConvolutionLayerQuantizedFixture3x3 = DepthwiseConvolutionLayer
 TEST_SUITE(Quantized)
 TEST_SUITE(QASYMM8)
 TEST_SUITE(Generic)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
-                       framework::dataset::make("DataType", DataType::QASYMM8)),
-                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
-                       framework::dataset::make("DataLayout", DataLayout::NCHW)))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+                                                       depth_multipliers),
+                                               framework::dataset::make("DataType", DataType::QASYMM8)),
+                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                               framework::dataset::make("DataLayout", DataLayout::NCHW)))
 {
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
-                       framework::dataset::make("DataType", DataType::QASYMM8)),
-                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
-                       framework::dataset::make("DataLayout", DataLayout::NCHW)))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
+                                                       depth_multipliers),
+                                               framework::dataset::make("DataType", DataType::QASYMM8)),
+                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                               framework::dataset::make("DataLayout", DataLayout::NCHW)))
 {
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
 TEST_SUITE_END()
 TEST_SUITE(W3x3)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                       combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                                                       framework::dataset::make("DepthMultiplier", 1)), // COMPMID-1071 Add depth multiplier support for NHWC
                                                framework::dataset::make("DataType", DataType::QASYMM8)),
                                        framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
                                framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
-                       framework::dataset::make("DataType", DataType::QASYMM8)),
-                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
-                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+                                                       framework::dataset::make("DepthMultiplier", 1)), // COMPMID-1071 Add depth multiplier support for NHWC
+                                               framework::dataset::make("DataType", DataType::QASYMM8)),
+                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
diff --git a/tests/validation/GLES_COMPUTE/DepthwiseConvolutionLayer.cpp b/tests/validation/GLES_COMPUTE/DepthwiseConvolutionLayer.cpp
index 2baa93e413..22b1e08d5b 100644
--- a/tests/validation/GLES_COMPUTE/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/GLES_COMPUTE/DepthwiseConvolutionLayer.cpp
@@ -44,6 +44,8 @@ namespace
 {
 RelativeTolerance<half> tolerance_fp16(half(0.2)); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
 constexpr float         tolerance_num = 0.07f;     /**< Tolerance number */
+
+const auto depth_multipliers = framework::dataset::make("DepthMultiplier", { 1, 2, 3 });
 } // namespace
 
 TEST_SUITE(GC)
@@ -55,14 +57,16 @@ using GCDepthwiseConvolutionLayerFixture3x3 = DepthwiseConvolutionLayerValidatio
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
 TEST_SUITE(W3x3)
-FIXTURE_DATA_TEST_CASE(RunSmall, GCDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+FIXTURE_DATA_TEST_CASE(RunSmall, GCDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                                                                                                                   depth_multipliers),
                                                                                                                    framework::dataset::make("DataType",
                                                                                                                            DataType::F16)),
                                                                                                                    framework::dataset::make("DataLayout", DataLayout::NCHW)))
 {
     validate(GCAccessor(_target), _reference, tolerance_fp16, tolerance_num);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, GCDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+FIXTURE_DATA_TEST_CASE(RunLarge, GCDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+                                                                                                                       depth_multipliers),
                                                                                                                        framework::dataset::make("DataType",
                                                                                                                                DataType::F16)),
                                                                                                                        framework::dataset::make("DataLayout", DataLayout::NCHW)))
diff --git a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
index 236d4bd653..b1cc491ac8 100644
--- a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
@@ -22,6 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h"
 #include "arm_compute/runtime/Tensor.h"
 #include "arm_compute/runtime/TensorAllocator.h"
@@ -40,20 +41,34 @@ namespace test
 {
 namespace validation
 {
+using namespace arm_compute::misc::shape_calculator;
+
 namespace
 {
 constexpr RelativeTolerance<float>   tolerance_f32(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
 constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QASYMM8 */
+
+const auto depth_multipliers = framework::dataset::make("DepthMultiplier", { 1, 2, 3 });
 } // namespace
 
 TEST_SUITE(NEON)
 TEST_SUITE(DepthwiseConvLayer)
 
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
-                                                                                              datasets::LargeDepthwiseConvolutionLayerDataset3x3()),
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                                                                                                      datasets::LargeDepthwiseConvolutionLayerDataset3x3()),
+                                                                           depth_multipliers),
                                                                    framework::dataset::make("DataType", DataType::F32)),
-               input_shape, weights_shape, output_shape, info, data_type)
+               input_shape, kernel_size, info, depth_multiplier, data_type)
 {
+    // Get shapes
+    TensorShape weights_shape(kernel_size.width, kernel_size.height);
+
+    const TensorInfo  in_info(input_shape, 1, data_type);
+    const TensorInfo  we_info(weights_shape, 1, data_type);
+    const TensorShape output_shape = compute_depthwise_convolution_shape(in_info, we_info, info, depth_multiplier);
+
+    weights_shape.set(2, output_shape.z());
+
     // Create tensors
     Tensor            src     = create_tensor<Tensor>(input_shape, data_type);
     Tensor            dst     = create_tensor<Tensor>(output_shape, data_type);
@@ -68,7 +83,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::da
 
     // Create and configure function
     NEDepthwiseConvolutionLayer3x3 depthwise_layer;
-    depthwise_layer.configure(&src, &weights, &bias, &dst, info);
+    depthwise_layer.configure(&src, &weights, &bias, &dst, info, depth_multiplier);
 
     // Validate valid region
     const ValidRegion input_valid_region   = shape_to_valid_region(input_shape);
@@ -82,7 +97,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::da
     validate(bias.info()->valid_region(), bias_valid_region);
 
     // Validate padding
-    bool              is_optimized_run = NEDepthwiseConvolutionLayer3x3Kernel::is_optimized_execution_possible(input_shape, info, data_type, DataLayout::NCHW);
+    bool              is_optimized_run = NEDepthwiseConvolutionLayer3x3Kernel::is_optimized_execution_possible(input_shape, info, data_type, depth_multiplier, DataLayout::NCHW);
     const int         step_non_opt_dwc = 16 >> info.stride().first;
     const int         step_bias_add    = 16 / src.info()->element_size();
     const int         step             = is_optimized_run ? step_bias_add : std::max(step_non_opt_dwc, step_bias_add);
@@ -95,14 +110,16 @@ TEST_SUITE(F32)
 TEST_SUITE(Generic)
 template <typename T>
 using NEDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer, T>;
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+                                                                                                                       depth_multipliers),
                                                                                                                        framework::dataset::make("DataType",
                                                                                                                                DataType::F32)),
                                                                                                                        framework::dataset::make("DataLayout", DataLayout::NCHW)))
 {
     validate(Accessor(_target), _reference, tolerance_f32);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
+                                                                                                                     depth_multipliers),
                                                                                                                      framework::dataset::make("DataType",
                                                                                                                              DataType::F32)),
                                                                                                                      framework::dataset::make("DataLayout", DataLayout::NCHW)))
@@ -114,21 +131,24 @@ TEST_SUITE_END()
 TEST_SUITE(W3x3)
 template <typename T>
 using NEDepthwiseConvolutionLayerFixture3x3 = DepthwiseConvolutionLayerValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer3x3, T>;
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                                                                                                                    depth_multipliers),
                                                                                                                     framework::dataset::make("DataType",
                                                                                                                             DataType::F32)),
                                                                                                                     framework::dataset::make("DataLayout", DataLayout::NCHW)))
 {
     validate(Accessor(_target), _reference, tolerance_f32);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+                                                                                                                        depth_multipliers),
                                                                                                                         framework::dataset::make("DataType",
                                                                                                                                 DataType::F32)),
                                                                                                                         framework::dataset::make("DataLayout", DataLayout::NCHW)))
 {
     validate(Accessor(_target), _reference, tolerance_f32);
 }
-FIXTURE_DATA_TEST_CASE(RunOptimized, NEDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(combine(datasets::OptimizedDepthwiseConvolutionLayerDataset3x3(),
+FIXTURE_DATA_TEST_CASE(RunOptimized, NEDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::OptimizedDepthwiseConvolutionLayerDataset3x3(),
+                                                                                                                        framework::dataset::make("DepthMultiplier", 1)),
                                                                                                                         framework::dataset::make("DataType",
                                                                                                                                 DataType::F32)),
                                                                                                                         framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
@@ -148,27 +168,31 @@ using NEDepthwiseConvolutionLayerQuantizedFixture = DepthwiseConvolutionLayerVal
 TEST_SUITE(Quantized)
 TEST_SUITE(QASYMM8)
 TEST_SUITE(Generic)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
-                       framework::dataset::make("DataType", DataType::QASYMM8)),
-                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
-                       framework::dataset::make("DataLayout", DataLayout::NCHW)))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+                                                       depth_multipliers),
+                                               framework::dataset::make("DataType", DataType::QASYMM8)),
+                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                               framework::dataset::make("DataLayout", DataLayout::NCHW)))
 {
     validate(Accessor(_target), _reference, tolerance_qasymm8);
 }
 TEST_SUITE_END()
 TEST_SUITE(W3x3)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                       combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), depth_multipliers),
                                                framework::dataset::make("DataType", DataType::QASYMM8)),
                                        framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
                                framework::dataset::make("DataLayout", DataLayout::NCHW)))
 {
     validate(Accessor(_target), _reference, tolerance_qasymm8);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
-                       framework::dataset::make("DataType", DataType::QASYMM8)),
-                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
-                       framework::dataset::make("DataLayout", DataLayout::NCHW)))
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+                                                       depth_multipliers),
+                                               framework::dataset::make("DataType", DataType::QASYMM8)),
+                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                               framework::dataset::make("DataLayout", DataLayout::NCHW)))
 {
     validate(Accessor(_target), _reference, tolerance_qasymm8);
 }
diff --git a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
index bb756f806e..b7bca8dbf3 100644
--- a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
+++ b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
@@ -26,6 +26,7 @@
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "tests/AssetsLibrary.h"
 #include "tests/Globals.h"
 #include "tests/IAccessor.h"
@@ -44,6 +45,8 @@ namespace test
 {
 namespace validation
 {
+using namespace arm_compute::misc::shape_calculator;
+
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
 class DepthwiseConvolutionLayerValidationGenericFixture : public framework::Fixture
 {
@@ -52,12 +55,20 @@ public:
 
 public:
     template <typename...>
-    void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type, QuantizationInfo quantization_info, DataLayout data_layout)
+    void setup(TensorShape in_shape, Size2D kernel_size, PadStrideInfo pad_stride_info, unsigned int depth_multiplier, DataType data_type, QuantizationInfo quantization_info, DataLayout data_layout)
     {
-        _quantization_info = quantization_info;
-        _data_type         = data_type;
+        _quantization_info            = quantization_info;
+        _data_type                    = data_type;
+        const DataType bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type;
+
+        TensorShape weights_shape(kernel_size.width, kernel_size.height);
+
+        const TensorInfo in_info(in_shape, 1, data_type);
+        const TensorInfo we_info(weights_shape, 1, data_type);
+        TensorShape      out_shape = compute_depthwise_convolution_shape(in_info, we_info, pad_stride_info, depth_multiplier);
+
+        weights_shape.set(2, out_shape.z());
         const TensorShape biases_shape(weights_shape[2]);
-        const DataType    bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type;
 
         if(data_layout == DataLayout::NHWC)
         {
@@ -66,8 +77,8 @@ public:
             permute(out_shape, PermutationVector(2U, 0U, 1U));
         }
 
-        _target    = compute_target(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, data_type, bias_data_type, quantization_info, data_layout);
-        _reference = compute_reference(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, data_type, bias_data_type, quantization_info, data_layout);
+        _target    = compute_target(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, depth_multiplier, data_type, bias_data_type, quantization_info, data_layout);
+        _reference = compute_reference(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, depth_multiplier, data_type, bias_data_type, quantization_info, data_layout);
     }
 
 protected:
@@ -101,6 +112,7 @@ protected:
     }
 
     TensorType compute_target(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &biases_shape, const TensorShape &output_shape, PadStrideInfo &pad_stride_info,
+                              unsigned int   depth_multiplier,
                               const DataType data_type, const DataType bias_data_type, const QuantizationInfo quantization_info, const DataLayout data_layout)
     {
         // Create tensors
@@ -111,7 +123,7 @@ protected:
 
         // Create Depthwise Convolution configure function
         FunctionType dwc;
-        dwc.configure(&src, &weights, &biases, &dst, pad_stride_info);
+        dwc.configure(&src, &weights, &biases, &dst, pad_stride_info, depth_multiplier);
 
         ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
         ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS);
@@ -141,6 +153,7 @@ protected:
     }
 
     SimpleTensor<T> compute_reference(const TensorShape &in_shape, const TensorShape &weights_shape, const TensorShape &biases_shape, const TensorShape &out_shape, const PadStrideInfo &pad_stride_info,
+                                      unsigned int   depth_multiplier,
                                       const DataType data_type, const DataType bias_data_type, const QuantizationInfo quantization_info, const DataLayout data_layout)
     {
         SimpleTensor<T>     src{ in_shape, data_type, 1, 0, quantization_info, data_layout };
@@ -151,7 +164,7 @@ protected:
         fill(weights, 1);
         fill(biases, 2);
 
-        return reference::depthwise_convolution(src, weights, biases, out_shape, pad_stride_info);
+        return reference::depthwise_convolution(src, weights, biases, out_shape, pad_stride_info, depth_multiplier);
     }
 
     TensorType       _target{};
@@ -165,9 +178,9 @@ class DepthwiseConvolutionLayerValidationFixture : public DepthwiseConvolutionLa
 {
 public:
     template <typename...>
-    void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type, DataLayout data_layout)
+    void setup(TensorShape in_shape, Size2D kernel_size, PadStrideInfo pad_stride_info, unsigned int depth_multiplier, DataType data_type, DataLayout data_layout)
     {
-        DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(in_shape, weights_shape, out_shape, pad_stride_info,
+        DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(in_shape, kernel_size, pad_stride_info, depth_multiplier,
                                                                                                             data_type, QuantizationInfo(), data_layout);
     }
 };
@@ -177,9 +190,9 @@ class DepthwiseConvolutionLayerValidationQuantizedFixture : public DepthwiseConv
 {
 public:
     template <typename...>
-    void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type, QuantizationInfo quantization_info, DataLayout data_layout)
+    void setup(TensorShape in_shape, Size2D kernel_size, PadStrideInfo pad_stride_info, unsigned int depth_multiplier, DataType data_type, QuantizationInfo quantization_info, DataLayout data_layout)
     {
-        DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(in_shape, weights_shape, out_shape, pad_stride_info,
+        DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(in_shape, kernel_size, pad_stride_info, depth_multiplier,
                                                                                                             data_type, quantization_info, data_layout);
     }
 };
diff --git a/tests/validation/reference/DepthwiseConvolutionLayer.cpp b/tests/validation/reference/DepthwiseConvolutionLayer.cpp
index d05da9140b..207e5fc45c 100644
--- a/tests/validation/reference/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/reference/DepthwiseConvolutionLayer.cpp
@@ -51,7 +51,8 @@ namespace reference
  *
  */
 template <typename T, typename TB>
-void depthwise_convolution_nchw(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &biases, SimpleTensor<T> &dst, const PadStrideInfo &conv_info)
+void depthwise_convolution_nchw(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &biases, SimpleTensor<T> &dst, const PadStrideInfo &conv_info,
+                                unsigned int depth_multiplier)
 {
     // Compute reference
     const int filter_width  = weights.shape().x();
@@ -75,40 +76,47 @@ void depthwise_convolution_nchw(const SimpleTensor<T> &src, const SimpleTensor<T
     const int maximum_x = input_width + pad_left - filter_half_width + pad_right - filter_half_width;
     const int maximum_y = input_height + pad_top - filter_half_height + pad_bottom - filter_half_height;
 
+    const T border_value(0);
+
     int out_pos = 0;
     for(int r = 0; r < num_batches; ++r)
     {
         for(int z = 0; z < input_depth; ++z)
         {
-            for(int y = minimum_y; y < minimum_y + maximum_y; y += conv_info.stride().second)
+            for(unsigned int m = 0; m < depth_multiplier; ++m)
             {
-                for(int x = minimum_x; x < minimum_x + maximum_x; x += conv_info.stride().first)
-                {
-                    Coordinates coords(static_cast<int>(x), static_cast<int>(y), static_cast<int>(z), static_cast<int>(r));
-                    size_t      filter_offset = filter_plane * z;
+                const int out_z = z * depth_multiplier + m;
 
-                    T val(0);
-                    for(int j = y - filter_half_height; j <= static_cast<int>(y + filter_half_height); ++j)
+                for(int y = minimum_y; y < minimum_y + maximum_y; y += conv_info.stride().second)
+                {
+                    for(int x = minimum_x; x < minimum_x + maximum_x; x += conv_info.stride().first)
                     {
-                        for(int i = x - filter_half_width; i <= static_cast<int>(x + filter_half_width); ++i)
+                        Coordinates coords(static_cast<int>(x), static_cast<int>(y), static_cast<int>(z), static_cast<int>(r));
+                        size_t      filter_offset = filter_plane * out_z;
+
+                        T val(0);
+                        for(int j = y - filter_half_height; j <= static_cast<int>(y + filter_half_height); ++j)
                         {
-                            coords.set(0, i);
-                            coords.set(1, j);
-                            T border_value(0);
-                            val += *(weights.data() + filter_offset) * tensor_elem_at(src, coords, BorderMode::CONSTANT, border_value);
-                            ++filter_offset;
+                            for(int i = x - filter_half_width; i <= static_cast<int>(x + filter_half_width); ++i)
+                            {
+                                coords.set(0, i);
+                                coords.set(1, j);
+
+                                val += *(weights.data() + filter_offset) * tensor_elem_at(src, coords, BorderMode::CONSTANT, border_value);
+                                ++filter_offset;
+                            }
                         }
+
+                        dst[out_pos++] = saturate_cast<T>(val + *static_cast<const TB *>(biases(Coordinates(out_z))));
                     }
-                    coords.set(0, x);
-                    coords.set(1, y);
-                    dst[out_pos++] = saturate_cast<T>(val + *static_cast<const TB *>(biases(Coordinates(z))));
                 }
             }
         }
     }
 }
 
-void depthwise_convolution_nchw(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &biases, SimpleTensor<uint8_t> &dst, const PadStrideInfo &conv_info)
+void depthwise_convolution_nchw(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &biases, SimpleTensor<uint8_t> &dst, const PadStrideInfo &conv_info,
+                                unsigned int depth_multiplier)
 {
     // Create reference
     const int   input_offset   = -src.quantization_info().offset;
@@ -150,89 +158,76 @@ void depthwise_convolution_nchw(const SimpleTensor<uint8_t> &src, const SimpleTe
     {
         for(int z = 0; z < input_depth; ++z)
         {
-            int32_t bias_val = *static_cast<const int32_t *>(biases(Coordinates(z)));
-            for(int y = minimum_y; y < minimum_y + maximum_y; y += conv_info.stride().second)
+            for(unsigned int m = 0; m < depth_multiplier; ++m)
             {
-                for(int x = minimum_x; x < minimum_x + maximum_x; x += conv_info.stride().first)
-                {
-                    Coordinates coords(x, y, z, r);
-                    int         filter_offset = filter_plane * z;
+                const int     out_z    = z * depth_multiplier + m;
+                const int32_t bias_val = *static_cast<const int32_t *>(biases(Coordinates(out_z)));
 
-                    int32_t val = 0;
-                    for(int j = y - filter_half_height; j <= (y + filter_half_height); ++j)
+                for(int y = minimum_y; y < minimum_y + maximum_y; y += conv_info.stride().second)
+                {
+                    for(int x = minimum_x; x < minimum_x + maximum_x; x += conv_info.stride().first)
                     {
-                        for(int i = x - filter_half_width; i <= (x + filter_half_width); ++i)
+                        Coordinates coords(x, y, z, r);
+                        int         filter_offset = filter_plane * out_z;
+
+                        int32_t val = 0;
+                        for(int j = y - filter_half_height; j <= (y + filter_half_height); ++j)
                         {
-                            coords.set(0, i);
-                            coords.set(1, j);
-                            const auto    in_val = tensor_elem_at<uint8_t>(src, coords, BorderMode::CONSTANT, -input_offset);
-                            const uint8_t w_val  = *(weights.data() + filter_offset);
-                            val += (in_val + input_offset) * (w_val + weights_offset);
-                            ++filter_offset;
+                            for(int i = x - filter_half_width; i <= (x + filter_half_width); ++i)
+                            {
+                                coords.set(0, i);
+                                coords.set(1, j);
+                                const auto    in_val = tensor_elem_at<uint8_t>(src, coords, BorderMode::CONSTANT, -input_offset);
+                                const uint8_t w_val  = *(weights.data() + filter_offset);
+                                val += (in_val + input_offset) * (w_val + weights_offset);
+                                ++filter_offset;
+                            }
                         }
+                        val += bias_val;
+                        val = asymm_rounding_divide_by_pow2(asymm_int_mult(val, output_multiplier), output_shift);
+                        val += output_offset;
+                        val = std::max<int32_t>(val, 0);
+                        val = std::min<int32_t>(val, 255);
+
+                        // Store the result
+                        dst[out_pos++] = val;
                     }
-                    val += bias_val;
-                    val = asymm_rounding_divide_by_pow2(asymm_int_mult(val, output_multiplier), output_shift);
-                    val += output_offset;
-                    val = std::max<int32_t>(val, 0);
-                    val = std::min<int32_t>(val, 255);
-
-                    // Store the result
-                    dst[out_pos++] = val;
                 }
             }
         }
     }
 }
 
-template <>
-SimpleTensor<uint8_t> depthwise_convolution(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &biases, const TensorShape &dst_shape,
-                                            const PadStrideInfo &conv_info)
-{
-    SimpleTensor<uint8_t> dst{ dst_shape, src.data_type(), 1, src.fixed_point_position(), src.quantization_info() };
-
-    if(src.data_layout() == DataLayout::NHWC)
-    {
-        SimpleTensor<uint8_t> src_nchw     = reference::permute<uint8_t>(src, PermutationVector(1U, 2U, 0U));
-        SimpleTensor<uint8_t> weights_nchw = reference::permute<uint8_t>(weights, PermutationVector(1U, 2U, 0U));
-        SimpleTensor<uint8_t> dst_nchw     = reference::permute<uint8_t>(dst, PermutationVector(1U, 2U, 0U));
-
-        depthwise_convolution_nchw(src_nchw, weights_nchw, biases, dst_nchw, conv_info);
-
-        return reference::permute<uint8_t>(dst_nchw, PermutationVector(2U, 0U, 1U));
-    }
-
-    depthwise_convolution_nchw(src, weights, biases, dst, conv_info);
-
-    return dst;
-}
-
 template <typename T, typename TB>
-SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info)
+SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info,
+                                      unsigned int depth_multiplier)
 {
-    SimpleTensor<T> dst{ dst_shape, src.data_type(), 1, src.fixed_point_position() };
+    SimpleTensor<T> dst{ dst_shape, src.data_type(), 1, src.fixed_point_position(), src.quantization_info() };
 
-    if(src.data_layout() == DataLayout::NHWC && src.data_type() == DataType::F32)
+    if(src.data_layout() == DataLayout::NHWC)
     {
         SimpleTensor<T> src_nchw     = reference::permute<T>(src, PermutationVector(1U, 2U, 0U));
         SimpleTensor<T> weights_nchw = reference::permute<T>(weights, PermutationVector(1U, 2U, 0U));
         SimpleTensor<T> dst_nchw     = reference::permute<T>(dst, PermutationVector(1U, 2U, 0U));
 
-        depthwise_convolution_nchw<T, TB>(src_nchw, weights_nchw, biases, dst_nchw, conv_info);
+        depthwise_convolution_nchw(src_nchw, weights_nchw, biases, dst_nchw, conv_info, depth_multiplier);
 
         return reference::permute<T>(dst_nchw, PermutationVector(2U, 0U, 1U));
     }
 
-    depthwise_convolution_nchw<T, TB>(src, weights, biases, dst, conv_info);
+    depthwise_convolution_nchw(src, weights, biases, dst, conv_info, depth_multiplier);
 
     return dst;
 }
 
+template SimpleTensor<uint8_t> depthwise_convolution(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &biases, const TensorShape &dst_shape,
+                                                     const PadStrideInfo &conv_info, unsigned int depth_multiplier);
+
 template SimpleTensor<float> depthwise_convolution(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &biases, const TensorShape &dst_shape,
-                                                   const PadStrideInfo &conv_info);
+                                                   const PadStrideInfo &conv_info, unsigned int depth_multiplier);
 
 template SimpleTensor<half> depthwise_convolution(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &biases, const TensorShape &dst_shape,
-                                                  const PadStrideInfo &conv_info);
+                                                  const PadStrideInfo &conv_info, unsigned int depth_multiplier);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/DepthwiseConvolutionLayer.h b/tests/validation/reference/DepthwiseConvolutionLayer.h
index df743a5b8e..bab338723d 100644
--- a/tests/validation/reference/DepthwiseConvolutionLayer.h
+++ b/tests/validation/reference/DepthwiseConvolutionLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -36,7 +36,8 @@ namespace validation
 namespace reference
 {
 template <typename T, typename TB>
-SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info);
+SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info,
+                                      unsigned int depth_multiplier);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/DepthwiseSeparableConvolutionLayer.cpp b/tests/validation/reference/DepthwiseSeparableConvolutionLayer.cpp
index ca6c168114..8bc6ddb696 100644
--- a/tests/validation/reference/DepthwiseSeparableConvolutionLayer.cpp
+++ b/tests/validation/reference/DepthwiseSeparableConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -46,7 +46,7 @@ SimpleTensor<T> depthwise_separable_convolution_layer(const SimpleTensor<T> &src
                                                       const SimpleTensor<T> &pointwise_biases, const TensorShape &dst_shape, const PadStrideInfo &depthwise_conv_info, const PadStrideInfo &pointwise_conv_info)
 {
     // Compute reference
-    SimpleTensor<T> depthwise_out = depthwise_convolution(src, depthwise_weights, depthwise_biases, depthwise_out_shape, depthwise_conv_info);
+    SimpleTensor<T> depthwise_out = depthwise_convolution(src, depthwise_weights, depthwise_biases, depthwise_out_shape, depthwise_conv_info, 1);
     SimpleTensor<T> dst           = convolution_layer(depthwise_out, pointwise_weights, pointwise_biases, dst_shape, pointwise_conv_info);
 
     return dst;
author	Giorgio Arena <giorgio.arena@arm.com>	2018-04-04 17:44:26 +0100
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-11-02 16:50:48 +0000
commit	7657224de2b697a8a92cccf26d98e53ccd7c1a03 (patch)
tree	1dcfa4541dbaf753854a628c93991652158d373e /tests/validation
parent	e74b201ca1abca040ca9f30837fdf19aa610e7c4 (diff)
download	ComputeLibrary-7657224de2b697a8a92cccf26d98e53ccd7c1a03.tar.gz