From 4370cffc7fb0da7fb486b9d06d24e16169521876 Mon Sep 17 00:00:00 2001
From: Manuel Bottini <manuel.bottini@arm.com>
Date: Fri, 7 Feb 2020 16:31:59 +0000
Subject: COMPMID-3034: Add NERequantizationLayerKernel

Change-Id: I3f098c3c2c2031d8cbe7326eab88a4e78bda867f
Signed-off-by: Manuel Bottini <manuel.bottini@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2704
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Sang-Hoon Park <sang-hoon.park@arm.com>
---
 arm_compute/core/NEON/NEMath.h                     | 11 ++-
 arm_compute/core/NEON/NEMath.inl                   | 14 +++-
 .../core/NEON/kernels/NEQuantizationLayerKernel.h  | 12 ++--
 arm_compute/core/QuantizationInfo.h                | 44 ++++++++++++
 .../runtime/NEON/functions/NEQuantizationLayer.h   | 10 +--
 .../NEON/kernels/NEQuantizationLayerKernel.cpp     | 78 ++++++++++++++--------
 tests/validation/NEON/QuantizationLayer.cpp        | 72 ++++++++++++++++++--
 7 files changed, 194 insertions(+), 47 deletions(-)
diff --git a/arm_compute/core/NEON/NEMath.h b/arm_compute/core/NEON/NEMath.h
index 54f8252250..3905f67e29 100644
--- a/arm_compute/core/NEON/NEMath.h
+++ b/arm_compute/core/NEON/NEMath.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -173,6 +173,15 @@ float32x4x4_t convert_uint8x16_to_float32x4x4(const uint8x16_t &in);
  */
 float32x4x4_t convert_int8x16_to_float32x4x4(const int8x16_t &in);
 
+/** Converts to float32x4x4_t from the specified templated 16 elements vectors
+ *
+ * @param[in] in Vector of float to be converted
+ *
+ * @return Converted vector of float
+ */
+template <typename T>
+float32x4x4_t convert_to_float32x4x4(const T &in);
+
 /** Converts from two float32x4x3_t to just one uint8x8x3_t
  *
  * @param[in]  in1 First input vector of float to be converted
diff --git a/arm_compute/core/NEON/NEMath.inl b/arm_compute/core/NEON/NEMath.inl
index 5d8b82c281..49870d06a8 100644
--- a/arm_compute/core/NEON/NEMath.inl
+++ b/arm_compute/core/NEON/NEMath.inl
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -345,6 +345,18 @@ inline float32x4x4_t convert_int8x16_to_float32x4x4(const int8x16_t &in)
     return out;
 }
 
+template <>
+inline float32x4x4_t convert_to_float32x4x4(const uint8x16_t &in)
+{
+    return convert_uint8x16_to_float32x4x4(in);
+}
+
+template <>
+inline float32x4x4_t convert_to_float32x4x4(const int8x16_t &in)
+{
+    return convert_int8x16_to_float32x4x4(in);
+}
+
 inline void convert_float32x4x3_to_uint8x8x3(const float32x4x3_t &in1, const float32x4x3_t &in2, uint8x8x3_t &out)
 {
     out.val[0] = vqmovn_u16(vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in1.val[0])),
diff --git a/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h
index 1a9b533640..087e767b73 100644
--- a/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -56,16 +56,16 @@ public:
     ~NEQuantizationLayerKernel() = default;
     /** Set the input, output.
      *
-     * @param[in]  input  Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: F32/F16.
-     * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM16.
+     * @param[in]  input  Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
+     * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
      *
      * @note Output auto initialization is not supported by this kernel
      */
     void configure(const ITensor *input, ITensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref NEQuantizationLayerKernel
      *
-     * @param[in] input  Input tensor info. Data types supported: F32/F16.
-     * @param[in] output Output tensor info. Data types supported: QASYMM8/QASYMM16.
+     * @param[in] input  Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
+     * @param[in] output Output tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
      *
      * @return a status
      */
@@ -80,7 +80,7 @@ private:
      * @param[in] window Region on which to execute the kernel.
      */
     using QuantizationFunctionExecutorPtr = void (NEQuantizationLayerKernel::*)(const Window &window);
-    /** Function to apply QASYMM8 quantization on a tensor.
+    /** Function to apply QASYMM8 or QASYMM8_SIGNED quantization on a tensor.
      *
      * @param[in] window Region on which to execute the kernel.
      */
diff --git a/arm_compute/core/QuantizationInfo.h b/arm_compute/core/QuantizationInfo.h
index 06ba665c6b..f859beb87a 100644
--- a/arm_compute/core/QuantizationInfo.h
+++ b/arm_compute/core/QuantizationInfo.h
@@ -516,5 +516,49 @@ inline float dequantize_qasymm16(uint16_t value, const QuantizationInfo &qinfo)
 {
     return dequantize_qasymm16(value, qinfo.uniform());
 }
+
+/*
+ * In case of requantization of a quantized input tensor to an output tensor with another quantization
+ * instead of applying dequantization and then a quantization functions, we just compute new scale and
+ * offset.
+ *
+ * Assuming:
+ *   - q_i as input quantized value
+ *   - q_o as output quantized value
+ *   - z_i as input quantization offset value
+ *   - z_o as output quantization offset value
+ *   - s_i as input quantization scale value
+ *   - s_o as output quantization scale value
+ *   - z_n as new quantization offset value
+ *   - s_n as new quantization scale value
+ *
+ * q_o = ( q_i - z_i ) * s_i / s_o + z_o
+ *
+ * We can rewrite the formula as:
+ *
+ * q_o = ( q_i * s_i / s_o ) - z_i * s_i / s_o + z_o
+ *
+ * q_o = q_i / s_n + z_n
+ *
+ * Where:
+ *
+ * s_n = s_o / s_i
+ *
+ * z_n = - z_i * s_i / s_o + z_o
+ *
+ */
+inline UniformQuantizationInfo compute_requantization_scale_offset(const UniformQuantizationInfo &uqinfo_in, const UniformQuantizationInfo &uqinfo_out)
+{
+    float   scale_to_apply  = uqinfo_out.scale;
+    int32_t offset_to_apply = uqinfo_out.offset;
+
+    scale_to_apply /= uqinfo_in.scale;
+    // In order to minimize flooring we convert the offset to a float,
+    // then compute the new offset in the float domain,
+    // finally we convert it back as int32_t
+    offset_to_apply -= static_cast<int32_t>(static_cast<float>(uqinfo_in.offset) * uqinfo_in.scale / uqinfo_out.scale);
+    return UniformQuantizationInfo(scale_to_apply, offset_to_apply);
+}
+
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_QUANTIZATION_INFO_H */
diff --git a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
index 1cf83e87ad..fc317be81e 100644
--- a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -48,14 +48,14 @@ public:
     NEQuantizationLayer() = default;
     /** Set the input and output tensors.
      *
-     * @param[in]  input  Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: F32/F16.
-     * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM16
+     * @param[in]  input  Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
+     * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16
      */
     void configure(const ITensor *input, ITensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref NEQuantizationLayer
      *
-     * @param[in] input  Input tensor info. The dimensions over the third will be interpreted as batches. Data types supported: F32/F16.
-     * @param[in] output Output tensor info. Data types supported: QASYMM8/QASYMM16
+     * @param[in] input  Input tensor info. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
+     * @param[in] output Output tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16
      *
      * @return a status
      */
diff --git a/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp b/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp
index 2beb730448..113abad6b6 100644
--- a/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,6 +26,7 @@
 #include "arm_compute/core/Error.h"
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/NEON/NEAsymm.h"
+#include "arm_compute/core/NEON/NEMath.h"
 #include "arm_compute/core/NEON/wrapper/wrapper.h"
 #include "arm_compute/core/Utils.h"
 #include "arm_compute/core/Validate.h"
@@ -46,7 +47,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
     ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
     ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape().total_size() == 0);
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QASYMM16);
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
@@ -54,6 +55,14 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
     return Status{};
 }
 
+template <typename T>
+inline float32x4x4_t load_value(const T *input_ptr)
+{
+    using Tx16_t = typename wrapper::traits::neon_vector<T, 16>::type;
+    return arm_compute::convert_to_float32x4x4<Tx16_t>(wrapper::vloadq(input_ptr));
+}
+
+template <>
 inline float32x4x4_t load_value(const float *input_ptr)
 {
     return { wrapper::vloadq(input_ptr),
@@ -62,7 +71,8 @@ inline float32x4x4_t load_value(const float *input_ptr)
              wrapper::vloadq(input_ptr + 12) };
 }
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-inline const float32x4x4_t load_value(const float16_t *input_ptr)
+template <>
+inline float32x4x4_t load_value(const float16_t *input_ptr)
 {
     return { vcvt_f32_f16(wrapper::vload(input_ptr)),
              vcvt_f32_f16(wrapper::vload(input_ptr + 4)),
@@ -105,34 +115,38 @@ void NEQuantizationLayerKernel::configure(const ITensor *input, ITensor *output)
     _input  = input;
     _output = output;
 
-    static std::map<DataType, QuantizationFunctionExecutorPtr> quant_map_f32 =
+    static const std::map<std::string, QuantizationFunctionExecutorPtr> quant_map =
     {
-        { DataType::QASYMM8, &NEQuantizationLayerKernel::run_quantize_qasymm8<float, uint8_t> },
-        { DataType::QASYMM8_SIGNED, &NEQuantizationLayerKernel::run_quantize_qasymm8<float, int8_t> },
-        { DataType::QASYMM16, &NEQuantizationLayerKernel::run_quantize_qasymm16<float> },
-    };
+        { "op_QASYMM8_QASYMM8", &NEQuantizationLayerKernel::run_quantize_qasymm8<uint8_t, uint8_t> },
+        { "op_QASYMM8_QASYMM8_SIGNED", &NEQuantizationLayerKernel::run_quantize_qasymm8<uint8_t, int8_t> },
+        { "op_QASYMM8_QASYMM16", &NEQuantizationLayerKernel::run_quantize_qasymm16<uint8_t> },
+
+        { "op_QASYMM8_SIGNED_QASYMM8", &NEQuantizationLayerKernel::run_quantize_qasymm8<int8_t, uint8_t> },
+        { "op_QASYMM8_SIGNED_QASYMM8_SIGNED", &NEQuantizationLayerKernel::run_quantize_qasymm8<int8_t, int8_t> },
+        { "op_QASYMM8_SIGNED_QASYMM16", &NEQuantizationLayerKernel::run_quantize_qasymm16<int8_t> },
+
+        { "op_F32_QASYMM8", &NEQuantizationLayerKernel::run_quantize_qasymm8<float, uint8_t> },
+        { "op_F32_QASYMM8_SIGNED", &NEQuantizationLayerKernel::run_quantize_qasymm8<float, int8_t> },
+        { "op_F32_QASYMM16", &NEQuantizationLayerKernel::run_quantize_qasymm16<float> },
+
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-    static std::map<DataType, QuantizationFunctionExecutorPtr> quant_map_f16 =
-    {
-        { DataType::QASYMM8, &NEQuantizationLayerKernel::run_quantize_qasymm8<float16_t, uint8_t> },
-        { DataType::QASYMM8_SIGNED, &NEQuantizationLayerKernel::run_quantize_qasymm8<float16_t, int8_t> },
-        { DataType::QASYMM16, &NEQuantizationLayerKernel::run_quantize_qasymm16<float16_t> },
-    };
+        { "op_F16_QASYMM8", &NEQuantizationLayerKernel::run_quantize_qasymm8<float16_t, uint8_t> },
+        { "op_F16_QASYMM8_SIGNED", &NEQuantizationLayerKernel::run_quantize_qasymm8<float16_t, int8_t> },
+        { "op_F16_QASYMM16", &NEQuantizationLayerKernel::run_quantize_qasymm16<float16_t> },
 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/
+    };
+
+    std::string function_to_call("op_");
+    function_to_call += string_from_data_type(_input->info()->data_type()) + "_";
+    function_to_call += string_from_data_type(_output->info()->data_type());
+
+    auto it = quant_map.find(function_to_call);
 
-    switch(input->info()->data_type())
+    if(it == quant_map.end())
     {
-        case DataType::F32:
-            _func = quant_map_f32[output->info()->data_type()];
-            break;
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-        case DataType::F16:
-            _func = quant_map_f16[output->info()->data_type()];
-            break;
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-        default:
-            ARM_COMPUTE_ERROR("Unsupported input data type.");
+        ARM_COMPUTE_ERROR("Unsupported combination of input and output data types");
     }
+    _func = it->second;
 
     // Configure kernel window
     Window win_config = calculate_max_window(*input->info(), Steps());
@@ -156,7 +170,12 @@ void NEQuantizationLayerKernel::run_quantize_qasymm8(const Window &window)
     const auto window_start_x = static_cast<int>(window.x().start());
     const auto window_end_x   = static_cast<int>(window.x().end());
 
-    const UniformQuantizationInfo uqinfo = _output->info()->quantization_info().uniform();
+    const UniformQuantizationInfo uqinfo_in = _input->info()->quantization_info().uniform();
+    UniformQuantizationInfo       uqinfo    = _output->info()->quantization_info().uniform();
+    if(is_data_type_quantized_asymmetric(_input->info()->data_type()))
+    {
+        uqinfo = compute_requantization_scale_offset(uqinfo_in, uqinfo);
+    }
 #ifdef __aarch64__
     constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_EVEN;
 #else  //__aarch64__
@@ -194,7 +213,12 @@ void NEQuantizationLayerKernel::run_quantize_qasymm16(const Window &window)
     const auto window_start_x = static_cast<int>(window.x().start());
     const auto window_end_x   = static_cast<int>(window.x().end());
 
-    const UniformQuantizationInfo uqinfo = _output->info()->quantization_info().uniform();
+    const UniformQuantizationInfo uqinfo_in = _input->info()->quantization_info().uniform();
+    UniformQuantizationInfo       uqinfo    = _output->info()->quantization_info().uniform();
+    if(is_data_type_quantized_asymmetric(_input->info()->data_type()))
+    {
+        uqinfo = compute_requantization_scale_offset(uqinfo_in, uqinfo);
+    }
 #ifdef __aarch64__
     constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_EVEN;
 #else  //__aarch64__
diff --git a/tests/validation/NEON/QuantizationLayer.cpp b/tests/validation/NEON/QuantizationLayer.cpp
index a4af2a2886..a5372b897c 100644
--- a/tests/validation/NEON/QuantizationLayer.cpp
+++ b/tests/validation/NEON/QuantizationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,11 +43,11 @@ namespace validation
 namespace
 {
 /** Tolerance for quantization */
-constexpr AbsoluteTolerance<uint8_t>  tolerance_u8(1);
-constexpr AbsoluteTolerance<uint16_t> tolerance_u16(1);
-
-const auto QuantizationSmallShapes = concat(datasets::Small3DShapes(), datasets::Small4DShapes());
-const auto QuantizationLargeShapes = concat(datasets::Large3DShapes(), datasets::Large4DShapes());
+constexpr AbsoluteTolerance<uint8_t>  tolerance_u8(1);  /**< Tolerance value for comparing reference's output against implementation's output for QASYMM8 data types */
+constexpr AbsoluteTolerance<int8_t>   tolerance_s8(1);  /**< Tolerance value for comparing reference's output against implementation's output for QASYMM8_SIGNED data types */
+constexpr AbsoluteTolerance<uint16_t> tolerance_u16(1); /**< Tolerance value for comparing reference's output against implementation's output for QASYMM16 data types */
+const auto                            QuantizationSmallShapes = concat(datasets::Small3DShapes(), datasets::Small4DShapes());
+const auto                            QuantizationLargeShapes = concat(datasets::Large3DShapes(), datasets::Large4DShapes());
 } // namespace
 
 TEST_SUITE(NEON)
@@ -56,7 +56,7 @@ TEST_SUITE(QuantizationLayer)
 // *INDENT-OFF*
 // clang-format off
 DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
-               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QASYMM8),  // Wrong input data type
+               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QASYMM8),  // Wrong output data type
                                                        TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32),  // Wrong output data type
                                                        TensorInfo(TensorShape(16U, 16U, 2U, 5U), 1, DataType::F32),  // Missmatching shapes
                                                        TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32),  // Valid
@@ -193,6 +193,64 @@ TEST_SUITE_END() // FP16
 #endif           //  __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 TEST_SUITE_END() // Float
 
+TEST_SUITE(Quantized)
+template <typename T>
+using NEQuantizationLayerQASYMM8GenFixture = QuantizationValidationGenericFixture<Tensor, Accessor, NEQuantizationLayer, T, uint8_t>;
+template <typename T>
+using NEQuantizationLayerQASYMM8_SIGNEDGenFixture = QuantizationValidationGenericFixture<Tensor, Accessor, NEQuantizationLayer, T, int8_t>;
+template <typename T>
+using NEQuantizationLayerQASYMM16GenFixture = QuantizationValidationGenericFixture<Tensor, Accessor, NEQuantizationLayer, T, uint16_t>;
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmallQASYMM8, NEQuantizationLayerQASYMM8GenFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(QuantizationSmallShapes,
+                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                       framework::dataset::make("DataTypeOut", { DataType::QASYMM8 })),
+                       framework::dataset::make("QuantizationInfoOutput", { QuantizationInfo(0.5f, 10) })),
+                       framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(2.0f, 15) })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_u8);
+}
+FIXTURE_DATA_TEST_CASE(RunSmallQASYMM8_SIGNED, NEQuantizationLayerQASYMM8_SIGNEDGenFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(QuantizationSmallShapes,
+                       framework::dataset::make("DataTypeIn", DataType::QASYMM8)),
+                       framework::dataset::make("DataTypeOut", { DataType::QASYMM8_SIGNED })),
+                       framework::dataset::make("QuantizationInfoOutput", { QuantizationInfo(1.0f, 10), QuantizationInfo(2.0f, -25) })),
+                       framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(1.0f, 15) })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_s8);
+}
+FIXTURE_DATA_TEST_CASE(RunSmallQASYMM16, NEQuantizationLayerQASYMM16GenFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(QuantizationSmallShapes,
+                       framework::dataset::make("DataTypeIn", DataType::QASYMM8)),
+                       framework::dataset::make("DataTypeOut", { DataType::QASYMM16 })),
+                       framework::dataset::make("QuantizationInfoOutput", { QuantizationInfo(1.0f, 10) })),
+                       framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(4.0f, 23) })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_u16);
+}
+TEST_SUITE_END() // QASYMM8
+TEST_SUITE(QASYMM8_SIGNED)
+FIXTURE_DATA_TEST_CASE(RunSmallQASYMM8_SIGNED, NEQuantizationLayerQASYMM8_SIGNEDGenFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(QuantizationSmallShapes,
+                       framework::dataset::make("DataTypeIn", DataType::QASYMM8_SIGNED)),
+                       framework::dataset::make("DataTypeOut", { DataType::QASYMM8_SIGNED })),
+                       framework::dataset::make("QuantizationInfoOutput", { QuantizationInfo(1.0f, 10) })),
+                       framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(2.0f, -5) })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_s8);
+}
+FIXTURE_DATA_TEST_CASE(RunSmallQASYMM8, NEQuantizationLayerQASYMM8GenFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(QuantizationSmallShapes,
+                       framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+                       framework::dataset::make("DataTypeOut", { DataType::QASYMM8 })),
+                       framework::dataset::make("QuantizationInfoOutput", { QuantizationInfo(2.0f, 10), QuantizationInfo(2.0f, -25) })),
+                       framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(1.0f, 30) })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_u8);
+}
+TEST_SUITE_END() // QASYMM8_SIGNED
+TEST_SUITE_END() // Quantized
+
 TEST_SUITE_END() // QuantizationLayer
 TEST_SUITE_END() // NEON
 } // namespace validation
-- 
cgit v1.2.1