From 6e10aa395e81b83edb3437191acd7abe1639c7dc Mon Sep 17 00:00:00 2001
From: Manuel Bottini <manuel.bottini@arm.com>
Date: Thu, 30 Apr 2020 13:28:23 +0100
Subject: COMPMID-3316: NEDeconvolutionLayer failing for a big input

- Using NEDirectConvolution for big shapes since the memory
  required explodes for 9x9 kernel
- Adding test cases
- Fix enables only the NEON Deconvolution for NHWC

Change-Id: I8a541346428e5686818f8ecb7f69e2a9106cbceb
Signed-off-by: Manuel Bottini <manuel.bottini@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3135
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Sang-Hoon Park <sang-hoon.park@arm.com>
---
 tests/validation/CL/DeconvolutionLayer.cpp        | 24 +++++++++++++++++-
 tests/validation/NEON/DeconvolutionLayer.cpp      | 31 +++++++++++++++++++++++
 tests/validation/reference/DeconvolutionLayer.cpp |  2 +-
 3 files changed, 55 insertions(+), 2 deletions(-)

(limited to 'tests')
diff --git a/tests/validation/CL/DeconvolutionLayer.cpp b/tests/validation/CL/DeconvolutionLayer.cpp
index e7ba930ebe..dd92887ae8 100644
--- a/tests/validation/CL/DeconvolutionLayer.cpp
+++ b/tests/validation/CL/DeconvolutionLayer.cpp
@@ -49,6 +49,16 @@ RelativeTolerance<half_float::half> tolerance_f16(half_float::half(0.2)); /**< T
 constexpr AbsoluteTolerance<float>  tolerance_qasymm8(1.0);               /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
 constexpr float                     tolerance_num = 0.07f;                /**< Tolerance number */
 
+const auto data9x9_small_asymm = framework::dataset::make("InputShape", TensorShape{ 10U, 10U, 1U, 1U }) *framework::dataset::make("StrideX", 2) *framework::dataset::make("StrideY",
+                                 2)
+                                 *framework::dataset::make("PadLeft", 3)
+                                 *framework::dataset::make("PadRight", 4) *framework::dataset::make("PadTop", 3) *framework::dataset::make("PadBottom", 4) *framework::dataset::make("NumKernels", { 1 });
+
+const auto data9x9_large_asymm = framework::dataset::make("InputShape", TensorShape{ 640U, 360U, 56U, 1U }) *framework::dataset::make("StrideX", 2) *framework::dataset::make("StrideY",
+                                 2)
+                                 *framework::dataset::make("PadLeft", 3)
+                                 *framework::dataset::make("PadRight", 4) *framework::dataset::make("PadTop", 3) *framework::dataset::make("PadBottom", 4) *framework::dataset::make("NumKernels", { 1 });
+
 const auto data4x4 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 3)
                      * framework::dataset::make("PadY", 0, 3) * framework::dataset::make("NumKernels", { 3 });
 
@@ -137,6 +147,9 @@ using CLDeconvolutionLayerFixture2x2 = DeconvolutionValidationFixture<CLTensor,
 template <typename T>
 using CLDeconvolutionLayerFixture1x1 = DeconvolutionValidationFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 1, 1>;
 
+template <typename T>
+using CLDeconvolutionLayerAsymmFixture9x9 = DeconvolutionValidationAsymmFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 9, 9>;
+
 TEST_SUITE(Float)
 TEST_SUITE(FP32)
 
@@ -196,7 +209,16 @@ FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture1x1<float>, framework::Da
     validate(CLAccessor(_target), _reference, tolerance_fp32);
 }
 TEST_SUITE_END() // W1x1
-
+TEST_SUITE(W9x9)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerAsymmFixture9x9<float>, framework::DatasetMode::ALL, combine(combine(combine(data9x9_small_asymm, framework::dataset::make("DataType",
+                                                                                                                  DataType::F32)),
+                                                                                                                  framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+                                                                                                                  framework::dataset::make("AddBias", { false })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_fp32);
+}
+TEST_SUITE_END() // W9x9
 TEST_SUITE_END() // FP32
 
 TEST_SUITE(FP16)
diff --git a/tests/validation/NEON/DeconvolutionLayer.cpp b/tests/validation/NEON/DeconvolutionLayer.cpp
index 38256eb2ad..d888d7b838 100644
--- a/tests/validation/NEON/DeconvolutionLayer.cpp
+++ b/tests/validation/NEON/DeconvolutionLayer.cpp
@@ -59,6 +59,16 @@ const auto data3x3 = datasets::SmallDeconvolutionShapes() * framework::dataset::
 const auto data3x3_asymm = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 2) * framework::dataset::make("StrideY", 1, 2) * framework::dataset::make("PadLeft", 0, 1)
                            * framework::dataset::make("PadRight", 0, 1) * framework::dataset::make("PadTop", 0, 1) * framework::dataset::make("PadBottom", 0, 1) * framework::dataset::make("NumKernels", { 3 });
 
+const auto data9x9_small_asymm = framework::dataset::make("InputShape", TensorShape{ 10U, 10U, 1U, 1U }) *framework::dataset::make("StrideX", 2) *framework::dataset::make("StrideY",
+                                 2)
+                                 *framework::dataset::make("PadLeft", 3)
+                                 *framework::dataset::make("PadRight", 4) *framework::dataset::make("PadTop", 3) *framework::dataset::make("PadBottom", 4) *framework::dataset::make("NumKernels", { 1 });
+
+const auto data9x9_large_asymm = framework::dataset::make("InputShape", TensorShape{ 640U, 360U, 56U, 1U }) *framework::dataset::make("StrideX", 2) *framework::dataset::make("StrideY",
+                                 2)
+                                 *framework::dataset::make("PadLeft", 3)
+                                 *framework::dataset::make("PadRight", 4) *framework::dataset::make("PadTop", 3) *framework::dataset::make("PadBottom", 4) *framework::dataset::make("NumKernels", { 1 });
+
 const auto data3x3_precommit = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 2) * framework::dataset::make("StrideY", 1, 2) * framework::dataset::make("PadX", 0, 2)
                                * framework::dataset::make("PadY", 0, 2) * framework::dataset::make("NumKernels", { 3 });
 
@@ -141,6 +151,9 @@ using NEDeconvolutionLayerFixture3x3 = DeconvolutionValidationFixture<Tensor, Ac
 template <typename T>
 using NEDeconvolutionLayerAsymmFixture3x3 = DeconvolutionValidationAsymmFixture<Tensor, Accessor, NEDeconvolutionLayer, T, 3, 3>;
 
+template <typename T>
+using NEDeconvolutionLayerAsymmFixture9x9 = DeconvolutionValidationAsymmFixture<Tensor, Accessor, NEDeconvolutionLayer, T, 9, 9>;
+
 template <typename T>
 using NEDeconvolutionLayerFixture1x1 = DeconvolutionValidationFixture<Tensor, Accessor, NEDeconvolutionLayer, T, 1, 1>;
 
@@ -189,6 +202,24 @@ FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture1x1<float>, framework::Da
     validate(Accessor(_target), _reference, tolerance_fp32);
 }
 TEST_SUITE_END() // W1x1
+TEST_SUITE(W9x9)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDeconvolutionLayerAsymmFixture9x9<float>, framework::DatasetMode::ALL, combine(combine(combine(data9x9_small_asymm, framework::dataset::make("DataType",
+                                                                                                                  DataType::F32)),
+                                                                                                                  framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+                                                                                                                  framework::dataset::make("AddBias", { false })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_fp32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDeconvolutionLayerAsymmFixture9x9<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data9x9_large_asymm, framework::dataset::make("DataType",
+                                                                                                                      DataType::F32)),
+                                                                                                                      framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+                                                                                                                      framework::dataset::make("AddBias", { false })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_fp32);
+}
+TEST_SUITE_END() // W9x9
 TEST_SUITE_END() // FP32
 
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
diff --git a/tests/validation/reference/DeconvolutionLayer.cpp b/tests/validation/reference/DeconvolutionLayer.cpp
index 01b9c1c403..3cfbfae163 100644
--- a/tests/validation/reference/DeconvolutionLayer.cpp
+++ b/tests/validation/reference/DeconvolutionLayer.cpp
@@ -152,4 +152,4 @@ template SimpleTensor<half> deconvolution_layer(const SimpleTensor<half> &src, c
 } // namespace reference
 } // namespace validation
 } // namespace test
-} // namespace arm_compute
+} // namespace arm_compute
\ No newline at end of file
-- 
cgit v1.2.1