From d87a7b297a5bfc2bad3ba78ea97754d7894e82ef Mon Sep 17 00:00:00 2001
From: Michele Di Giorgio <michele.digiorgio@arm.com>
Date: Tue, 10 Sep 2019 10:42:27 +0100
Subject: COMPMID-2650: Add support for QASYMM16 in CLQuantizationLayer

Change-Id: I51dda621975f522a65d770304bed0ff0f30d1235
Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1902
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
---
 src/core/CL/kernels/CLQuantizationLayerKernel.cpp | 54 ++++++++++++++---------
 1 file changed, 32 insertions(+), 22 deletions(-)

(limited to 'src/core/CL/kernels/CLQuantizationLayerKernel.cpp')

diff --git a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp
index 493255f1cd..19aa51b5b5 100644
--- a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp
@@ -32,9 +32,10 @@
 #include "arm_compute/core/Utils.h"
 #include "arm_compute/core/Validate.h"
 #include "arm_compute/core/Window.h"
+#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
 
-using namespace arm_compute;
-
+namespace arm_compute
+{
 namespace
 {
 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
@@ -42,28 +43,34 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16);
     ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
-    if((output != nullptr) && (output->total_size() != 0))
-    {
-        ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8);
-        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
-    }
+
+    // Output must always be initialized
+    ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape().total_size() == 0);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8, DataType::QASYMM16);
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
 
     return Status{};
 }
 
-std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output)
+std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output)
 {
     // Configure kernel window
     Window win = calculate_max_window(*input, Steps());
 
-    // Output tensor auto initialization if not yet initialized
-    auto_init_if_empty(*output, input->tensor_shape(), 1, DataType::QASYMM8);
+    const int  vec_size_x     = 16 / input->element_size();
+    const int  input_width_x  = input->tensor_shape().x();
+    const bool multi_access_x = (input_width_x / vec_size_x > 0);
+    if(multi_access_x)
+    {
+        win.set(Window::DimX,
+                Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x), vec_size_x));
+    }
 
     Coordinates coord;
     coord.set_num_dimensions(output->num_dimensions());
     output->set_valid_region(ValidRegion(coord, output->tensor_shape()));
 
-    return std::make_tuple(Status{}, win);
+    return std::make_pair(Status{}, win);
 }
 } // namespace
 
@@ -84,31 +91,33 @@ void CLQuantizationLayerKernel::configure(const ICLTensor *input, ICLTensor *out
     const int  input_width_x  = input->info()->tensor_shape().x();
     const bool multi_access_x = (input_width_x / vec_size_x > 0);
 
-    // Create and update the window (if needed)
-    Window win = calculate_max_window(*input->info());
-    if(multi_access_x)
-    {
-        win.set(Window::DimX,
-                Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x), vec_size_x));
-    }
-    ICLKernel::configure_internal(win);
+    // Configure kernel window
+    auto win_config = validate_and_configure_window(input->info(), output->info());
+    ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+    ICLKernel::configure_internal(win_config.second);
 
-    const UniformQuantizationInfo qinfo = output->info()->quantization_info().uniform();
+    const UniformQuantizationInfo qinfo            = output->info()->quantization_info().uniform();
+    const DataType                output_data_type = output->info()->data_type();
 
     // Create kernel
     CLBuildOptions build_opts;
     build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(qinfo.scale));
     build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(qinfo.offset));
     build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x));
-    build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
+    build_opts.add_option("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(input->info()->data_type()));
+    build_opts.add_option("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output_data_type));
     build_opts.add_option_if(multi_access_x, "-DLAST_ACCESSED_X=" + support::cpp11::to_string(std::max<int>(input_width_x - vec_size_x, 0)));
+    std::pair<int, int> min_max_quant_values = quantization::get_min_max_values_from_quantized_data_type(output_data_type);
+    build_opts.add_option("-DMIN_QUANT_VAL=" + support::cpp11::to_string(min_max_quant_values.first));
+    build_opts.add_option("-DMAX_QUANT_VAL=" + support::cpp11::to_string(min_max_quant_values.second));
+
     _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("quantization_layer", build_opts.options()));
 }
 
 Status CLQuantizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output)
 {
     ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output));
-    ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get())));
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get()).first);
 
     return Status{};
 }
@@ -130,3 +139,4 @@ void CLQuantizationLayerKernel::run(const Window &window, cl::CommandQueue &queu
     }
     while(window_collapsed.slide_window_slice_3D(slice));
 }
+} // namespace arm_compute
-- 
cgit v1.2.1