From f391fff0336ae84387dd3ebc683ef85649de9eb5 Mon Sep 17 00:00:00 2001
From: Manuel Bottini <manuel.bottini@arm.com>
Date: Wed, 15 May 2019 13:01:26 +0100
Subject: COMPMID-2302 NEDeconvolution: support for FP16

Change-Id: I9fef05abdcafbc97607613a88f7997dd012e0d80
Signed-off-by: Manuel Bottini <manuel.bottini@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1142
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Giuseppe Rossini <giuseppe.rossini@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
---
 arm_compute/core/CPP/kernels/CPPUpsampleKernel.h   |  2 +-
 arm_compute/runtime/CPP/functions/CPPUpsample.h    |  4 +-
 .../runtime/NEON/functions/NEDeconvolutionLayer.h  | 16 ++++----
 .../NEON/functions/NEDeconvolutionLayer.cpp        |  8 ++--
 tests/validation/NEON/DeconvolutionLayer.cpp       | 48 ++++++++++++++++++----
 5 files changed, 57 insertions(+), 21 deletions(-)

diff --git a/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h b/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h
index 4e61356760..fedbb54d35 100644
--- a/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h
+++ b/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h
@@ -55,7 +55,7 @@ public:
 
     /** Set the input and output of the kernel.
      *
-     * @param[in]  input              The input tensor to upsample. Data types supported: F32/QASYMM8
+     * @param[in]  input              The input tensor to upsample. Data types supported: F32/F16/QASYMM8
      * @param[out] output             The output tensor. Data types supported: Same as @p input
      * @param[in]  info               Padding info.
      * @param[in]  inner_border_right The number of zeros added to right edge of the input.
diff --git a/arm_compute/runtime/CPP/functions/CPPUpsample.h b/arm_compute/runtime/CPP/functions/CPPUpsample.h
index 06df866349..fd7d9c24bf 100644
--- a/arm_compute/runtime/CPP/functions/CPPUpsample.h
+++ b/arm_compute/runtime/CPP/functions/CPPUpsample.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -38,7 +38,7 @@ class CPPUpsample : public ICPPSimpleFunction
 public:
     /** Configure the upsample CPP kernel
      *
-     * @param[in]  input              The input tensor to upsample. Data types supported: F32
+     * @param[in]  input              The input tensor to upsample. Data types supported: F32/F16/QASYMM8
      * @param[out] output             The output tensor. Data types supported: Same as @p input
      * @param[in]  info               Padding information
      * @param[in]  inner_border_right The number of zeros added to right edge of the input.
diff --git a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
index 25512fa147..4eb684b9aa 100644
--- a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
@@ -90,9 +90,9 @@ public:
      *
      * @note This method will be deprecated in the next release.
      *
-     * @param[in,out] input              Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/QASYMM8.
+     * @param[in,out] input              Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8.
      * @param[in]     weights            The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input.
-     * @param[in]     bias               Optional, ignored if NULL. The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input.
+     * @param[in]     bias               Optional, ignored if NULL. The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input, F16 for F16 input.
      * @param[out]    output             Output tensor. The output has the same number of dimensions as the @p input.
      * @param[in]     info               Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
      * @param[in]     inner_border_right The number of zeros added to right edge of the input.
@@ -105,9 +105,9 @@ public:
      *
      * @note This method will be deprecated in the next release.
      *
-     * @param[in] input              Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/QASYMM8.
+     * @param[in] input              Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8.
      * @param[in] weights            The 4d weights info with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input.
-     * @param[in] bias               (Optional) The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input.
+     * @param[in] bias               (Optional) The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input, F16 for F16 input.
      * @param[in] output             Output tensor info. The output has the same number of dimensions as the @p input.
      * @param[in] info               Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
      * @param[in] inner_border_right The number of zeros added to right edge of the input.
@@ -120,9 +120,9 @@ public:
 
     /** Set the input, weights, biases and output tensors.
      *
-     * @param[in,out] input   Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/QASYMM8.
+     * @param[in,out] input   Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8.
      * @param[in]     weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input.
-     * @param[in]     bias    Optional, ignored if NULL. The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input.
+     * @param[in]     bias    Optional, ignored if NULL. The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input, F16 for F16 input.
      * @param[out]    output  Output tensor. The output has the same number of dimensions as the @p input.
      * @param[in]     info    Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
      *
@@ -130,9 +130,9 @@ public:
     void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &info);
     /** Static function to check if given info will lead to a valid configuration of @ref NEDeconvolutionLayer
      *
-     * @param[in] input   Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/QASYMM8.
+     * @param[in] input   Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8.
      * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input.
-     * @param[in] bias    (Optional) The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input.
+     * @param[in] bias    (Optional) The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input, F16 for F16 input.
      * @param[in] output  Output tensor info. The output has the same number of dimensions as the @p input.
      * @param[in] info    Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
      *
diff --git a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
index aff335e5e3..c3d6b94d8d 100644
--- a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
@@ -29,9 +29,10 @@
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "arm_compute/runtime/NEON/NEScheduler.h"
 
-using namespace arm_compute;
 using namespace arm_compute::misc::shape_calculator;
 
+namespace arm_compute
+{
 NEDeconvolutionLayer::NEDeconvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
     : _memory_group(std::move(memory_manager)),
       _conv_f(),
@@ -51,8 +52,8 @@ Status NEDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInf
                                       unsigned int inner_border_right, unsigned int inner_border_top)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::QASYMM8);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::F32, DataType::QASYMM8);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16, DataType::QASYMM8);
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, input);
     ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) != weights->dimension(1));
     ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) < 1);
     ARM_COMPUTE_RETURN_ERROR_ON(!info.padding_is_symmetric());
@@ -192,3 +193,4 @@ void NEDeconvolutionLayer::prepare()
         _is_prepared = true;
     }
 }
+} // namespace arm_compute
\ No newline at end of file
diff --git a/tests/validation/NEON/DeconvolutionLayer.cpp b/tests/validation/NEON/DeconvolutionLayer.cpp
index fc37c02279..8860a9f974 100644
--- a/tests/validation/NEON/DeconvolutionLayer.cpp
+++ b/tests/validation/NEON/DeconvolutionLayer.cpp
@@ -45,7 +45,10 @@ namespace
 {
 constexpr AbsoluteTolerance<float> tolerance_fp32(0.001f); /**< Tolerance for floating point tests */
 constexpr AbsoluteTolerance<float> tolerance_qasymm8(0.0); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
-constexpr float                    tolerance_num = 0.07f;  /**< Tolerance number */
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+const RelativeTolerance<half_float::half> tolerance_fp16(half_float::half(0.2f));    /**< Relative tolerance value for comparing reference's output against implementation's output for DataType::F16 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/
+constexpr float tolerance_num = 0.07f;  /**< Tolerance number */
 
 const auto data4x4 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 3)
                      * framework::dataset::make("PadY", 0, 3) * framework::dataset::make("NumKernels", { 3 });
@@ -175,10 +178,8 @@ template <typename T>
 using NEDeconvolutionLayerFixture1x1 = DeconvolutionValidationFixture<Tensor, Accessor, NEDeconvolutionLayer, T, 1, 1>;
 
 TEST_SUITE(Float)
-
 TEST_SUITE(FP32)
 TEST_SUITE(W4x4)
-
 FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture4x4<float>, framework::DatasetMode::NIGHTLY, combine(combine(data4x4, framework::dataset::make("DataType", DataType::F32)),
                                                                                                             data_layouts_dataset))
 {
@@ -186,9 +187,7 @@ FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture4x4<float>, framework::Da
     validate(Accessor(_target), _reference, tolerance_fp32);
 }
 TEST_SUITE_END() // W4x4
-
 TEST_SUITE(W3x3)
-
 FIXTURE_DATA_TEST_CASE(RunSmall, NEDeconvolutionLayerFixture3x3<float>, framework::DatasetMode::PRECOMMIT, combine(combine(data3x3_precommit, framework::dataset::make("DataType", DataType::F32)),
                                                                                                                    data_layouts_dataset))
 {
@@ -202,7 +201,6 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDeconvolutionLayerFixture3x3<float>, framewor
     validate(Accessor(_target), _reference, tolerance_fp32);
 }
 TEST_SUITE_END() // W3x3
-
 TEST_SUITE(W1x1)
 FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture1x1<float>, framework::DatasetMode::NIGHTLY, combine(combine(data1x1, framework::dataset::make("DataType", DataType::F32)),
                                                                                                             data_layouts_dataset))
@@ -211,8 +209,44 @@ FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture1x1<float>, framework::Da
     validate(Accessor(_target), _reference, tolerance_fp32);
 }
 TEST_SUITE_END() // W1x1
-
 TEST_SUITE_END() // FP32
+
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+TEST_SUITE(FP16)
+TEST_SUITE(W4x4)
+FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture4x4<half>, framework::DatasetMode::NIGHTLY, combine(combine(data4x4, framework::dataset::make("DataType", DataType::F16)),
+                                                                                                            data_layouts_dataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_fp16);
+}
+TEST_SUITE_END() // W4x4
+TEST_SUITE(W3x3)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDeconvolutionLayerFixture3x3<half>, framework::DatasetMode::PRECOMMIT, combine(combine(data3x3_precommit, framework::dataset::make("DataType", DataType::F16)),
+                                                                                                                   data_layouts_dataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_fp16);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDeconvolutionLayerFixture3x3<half>, framework::DatasetMode::NIGHTLY, combine(combine(data3x3, framework::dataset::make("DataType", DataType::F16)),
+                                                                                                                 data_layouts_dataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_fp16);
+}
+TEST_SUITE_END() // W3x3
+TEST_SUITE(W1x1)
+FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture1x1<half>, framework::DatasetMode::NIGHTLY, combine(combine(data1x1, framework::dataset::make("DataType", DataType::F16)),
+                                                                                                            data_layouts_dataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_fp16);
+}
+TEST_SUITE_END() // W1x1
+TEST_SUITE_END() // FP16
+#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+
+
 TEST_SUITE_END() // Float
 
 template <typename T>
-- 
cgit v1.2.1