From 2899e00a6fa57242a9bcae1d08a9a7e1e80f14e7 Mon Sep 17 00:00:00 2001
From: Usama Arif <usama.arif@arm.com>
Date: Tue, 16 Apr 2019 14:32:25 +0100
Subject: COMPMID-2049: Add support for deconvolution for qasymm8 on NEON

Change-Id: I02890c7542f6036edad9cbba9fdcf2312c70070a
Signed-off-by: Usama Arif <usama.arif@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1000
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
---
 arm_compute/core/CPP/kernels/CPPUpsampleKernel.h   |  4 +-
 .../runtime/NEON/functions/NEDeconvolutionLayer.h  | 16 +++---
 src/core/CPP/kernels/CPPUpsampleKernel.cpp         | 21 ++++----
 .../NEON/functions/NEDeconvolutionLayer.cpp        | 17 ++++---
 tests/validation/NEON/DeconvolutionLayer.cpp       | 58 ++++++++++++++++++++++
 5 files changed, 90 insertions(+), 26 deletions(-)

diff --git a/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h b/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h
index e814c76c7d..4e61356760 100644
--- a/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h
+++ b/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -55,7 +55,7 @@ public:
 
     /** Set the input and output of the kernel.
      *
-     * @param[in]  input              The input tensor to upsample. Data types supported: F32
+     * @param[in]  input              The input tensor to upsample. Data types supported: F32/QASYMM8
      * @param[out] output             The output tensor. Data types supported: Same as @p input
      * @param[in]  info               Padding info.
      * @param[in]  inner_border_right The number of zeros added to right edge of the input.
diff --git a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
index dad5d81b14..25512fa147 100644
--- a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
@@ -90,9 +90,9 @@ public:
      *
      * @note This method will be deprecated in the next release.
      *
-     * @param[in,out] input              Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32.
+     * @param[in,out] input              Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/QASYMM8.
      * @param[in]     weights            The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input.
-     * @param[in]     bias               Optional, ignored if NULL. The biases have one dimension. Data type supported: Same as @p input.
+     * @param[in]     bias               Optional, ignored if NULL. The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input.
      * @param[out]    output             Output tensor. The output has the same number of dimensions as the @p input.
      * @param[in]     info               Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
      * @param[in]     inner_border_right The number of zeros added to right edge of the input.
@@ -105,9 +105,9 @@ public:
      *
      * @note This method will be deprecated in the next release.
      *
-     * @param[in] input              Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32.
+     * @param[in] input              Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/QASYMM8.
      * @param[in] weights            The 4d weights info with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input.
-     * @param[in] bias               (Optional) The biases have one dimension. Data type supported: Same as @p input.
+     * @param[in] bias               (Optional) The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input.
      * @param[in] output             Output tensor info. The output has the same number of dimensions as the @p input.
      * @param[in] info               Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
      * @param[in] inner_border_right The number of zeros added to right edge of the input.
@@ -120,9 +120,9 @@ public:
 
     /** Set the input, weights, biases and output tensors.
      *
-     * @param[in,out] input   Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32.
+     * @param[in,out] input   Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/QASYMM8.
      * @param[in]     weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input.
-     * @param[in]     bias    Optional, ignored if NULL. The biases have one dimension. Data type supported: Same as @p input.
+     * @param[in]     bias    Optional, ignored if NULL. The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input.
      * @param[out]    output  Output tensor. The output has the same number of dimensions as the @p input.
      * @param[in]     info    Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
      *
@@ -130,9 +130,9 @@ public:
     void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &info);
     /** Static function to check if given info will lead to a valid configuration of @ref NEDeconvolutionLayer
      *
-     * @param[in] input   Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32.
+     * @param[in] input   Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/QASYMM8.
      * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input.
-     * @param[in] bias    (Optional) The biases have one dimension. Data type supported: Same as @p input.
+     * @param[in] bias    (Optional) The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input.
      * @param[in] output  Output tensor info. The output has the same number of dimensions as the @p input.
      * @param[in] info    Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
      *
diff --git a/src/core/CPP/kernels/CPPUpsampleKernel.cpp b/src/core/CPP/kernels/CPPUpsampleKernel.cpp
index d77d9c118f..f04728d30d 100644
--- a/src/core/CPP/kernels/CPPUpsampleKernel.cpp
+++ b/src/core/CPP/kernels/CPPUpsampleKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -73,14 +73,15 @@ void CPPUpsampleKernel::run(const Window &window, const ThreadInfo &info)
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window);
 
     // Initialize _scaled_output buffer
-    const int width_scaled  = _output->info()->dimension(0);
-    const int height_scaled = _output->info()->dimension(1);
-    const int stride_x      = _info.stride().first;
-    const int stride_y      = _info.stride().second;
-    const int start_x       = _info.pad().first;
-    const int start_y       = _inner_border.second + _info.pad().second;
-    const int end_y         = height_scaled - _info.pad().second;
-    const int end_x         = width_scaled - _inner_border.first - _info.pad().first;
+    const int    width_scaled  = _output->info()->dimension(0);
+    const int    height_scaled = _output->info()->dimension(1);
+    const int    stride_x      = _info.stride().first;
+    const int    stride_y      = _info.stride().second;
+    const int    start_x       = _info.pad().first;
+    const int    start_y       = _inner_border.second + _info.pad().second;
+    const int    end_y         = height_scaled - _info.pad().second;
+    const int    end_x         = width_scaled - _inner_border.first - _info.pad().first;
+    const size_t element_size  = _input->info()->element_size();
 
     std::fill_n(_output->buffer(), _output->info()->total_size(), 0);
 
@@ -95,7 +96,7 @@ void CPPUpsampleKernel::run(const Window &window, const ThreadInfo &info)
 
     execute_window_loop(window, [&](const Coordinates & id)
     {
-        *(reinterpret_cast<float *>(out.ptr())) = *(reinterpret_cast<const float *>(in.ptr()));
+        memcpy(out.ptr(), in.ptr(), element_size);
     },
     in, out);
 }
diff --git a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
index fdc959c4a9..aff335e5e3 100644
--- a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
@@ -51,8 +51,8 @@ Status NEDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInf
                                       unsigned int inner_border_right, unsigned int inner_border_top)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::QASYMM8);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::F32, DataType::QASYMM8);
     ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) != weights->dimension(1));
     ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) < 1);
     ARM_COMPUTE_RETURN_ERROR_ON(!info.padding_is_symmetric());
@@ -68,7 +68,11 @@ Status NEDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInf
 
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
 
-    if(bias != nullptr)
+    if(is_data_type_quantized_asymmetric(input->data_type()))
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32);
+    }
+    else
     {
         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias);
     }
@@ -111,10 +115,11 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con
     _inner_border     = std::make_pair(inner_border_right, inner_border_top);
     _is_prepared      = false;
 
-    const unsigned int stride_x = info.stride().first;
-    const unsigned int stride_y = info.stride().second;
+    const DataLayout   data_layout = input->info()->data_layout();
+    const unsigned int stride_x    = info.stride().first;
+    const unsigned int stride_y    = info.stride().second;
 
-    _weights_flipped.allocator()->init(TensorInfo(weights->info()->tensor_shape(), 1, weights->info()->data_type()));
+    _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout));
     _flip_weights.configure(weights, &_weights_flipped);
 
     auto out_dims = deconvolution_output_dimensions(input->info()->dimension(0), input->info()->dimension(1), weights->info()->dimension(0), weights->info()->dimension(1),
diff --git a/tests/validation/NEON/DeconvolutionLayer.cpp b/tests/validation/NEON/DeconvolutionLayer.cpp
index 4a05535e09..fc37c02279 100644
--- a/tests/validation/NEON/DeconvolutionLayer.cpp
+++ b/tests/validation/NEON/DeconvolutionLayer.cpp
@@ -44,6 +44,8 @@ namespace validation
 namespace
 {
 constexpr AbsoluteTolerance<float> tolerance_fp32(0.001f); /**< Tolerance for floating point tests */
+constexpr AbsoluteTolerance<float> tolerance_qasymm8(0.0); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
+constexpr float                    tolerance_num = 0.07f;  /**< Tolerance number */
 
 const auto data4x4 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 3)
                      * framework::dataset::make("PadY", 0, 3) * framework::dataset::make("NumKernels", { 3 });
@@ -213,6 +215,62 @@ TEST_SUITE_END() // W1x1
 TEST_SUITE_END() // FP32
 TEST_SUITE_END() // Float
 
+template <typename T>
+using NEDeconvolutionLayerQuantizedFixture4x4 = DeconvolutionValidationQuantizedFixture<Tensor, Accessor, NEDeconvolutionLayer, T, 4, 4>;
+
+template <typename T>
+using NEDeconvolutionLayerQuantizedFixture3x3 = DeconvolutionValidationQuantizedFixture<Tensor, Accessor, NEDeconvolutionLayer, T, 3, 3>;
+
+template <typename T>
+using NEDeconvolutionLayerQuantizedFixture1x1 = DeconvolutionValidationQuantizedFixture<Tensor, Accessor, NEDeconvolutionLayer, T, 1, 1>;
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+
+TEST_SUITE(W4x4)
+FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedFixture4x4<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data4x4, framework::dataset::make("DataType",
+                                                                                                                       DataType::QASYMM8)),
+                                                                                                                       data_layouts_dataset),
+                                                                                                                       framework::dataset::make("QuantizationInfo", QuantizationInfo(2.f / 255.f, 0))))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+}
+TEST_SUITE_END() // W4x4
+
+TEST_SUITE(W3x3)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDeconvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data3x3_precommit, framework::dataset::make("DataType",
+                       DataType::QASYMM8)),
+                       data_layouts_dataset),
+                       framework::dataset::make("QuantizationInfo", QuantizationInfo(2.f / 255.f, 0))))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDeconvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data3x3, framework::dataset::make("DataType",
+                       DataType::QASYMM8)),
+                       data_layouts_dataset),
+                       framework::dataset::make("QuantizationInfo", QuantizationInfo(2.f / 255.f, 0))))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+}
+TEST_SUITE_END() // W3x3
+
+TEST_SUITE(W1x1)
+FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedFixture1x1<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data1x1, framework::dataset::make("DataType",
+                                                                                                                       DataType::QASYMM8)),
+                                                                                                                       data_layouts_dataset),
+                                                                                                                       framework::dataset::make("QuantizationInfo", QuantizationInfo(2.f / 255.f, 0))))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+}
+TEST_SUITE_END() // W1x1
+
+TEST_SUITE_END() // QASYMM8
+TEST_SUITE_END() // Quantized
+
 TEST_SUITE_END() // DeconvolutionLayer
 TEST_SUITE_END() // NEON
 } // namespace validation
-- 
cgit v1.2.1