From 7281834f8554670c9d8ae59069a4d80adec21fa7 Mon Sep 17 00:00:00 2001
From: Michele Di Giorgio <michele.digiorgio@arm.com>
Date: Thu, 13 Jul 2017 11:03:35 +0100
Subject: COMPMID-446: Add support for QS8/QS16 CL Arithmetic Add/Sub

Change-Id: I84fc457a9c28856a11322944822d2fabaf92e8e4
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/80528
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Moritz Pflanzer <moritz.pflanzer@arm.com>
---
 src/core/CL/cl_kernels/arithmetic_op.cl               | 10 +++++++---
 src/core/CL/kernels/CLArithmeticAdditionKernel.cpp    | 15 ++++++++++++---
 src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp | 15 ++++++++++++---
 3 files changed, 31 insertions(+), 9 deletions(-)

(limited to 'src/core')

diff --git a/src/core/CL/cl_kernels/arithmetic_op.cl b/src/core/CL/cl_kernels/arithmetic_op.cl
index 5102d34552..03414105e6 100644
--- a/src/core/CL/cl_kernels/arithmetic_op.cl
+++ b/src/core/CL/cl_kernels/arithmetic_op.cl
@@ -23,6 +23,10 @@
  */
 #include "helpers.h"
 
+#if defined(FIXED_POINT_POSITION)
+#include "fixed_point.h"
+#endif /* FIXED_POINT_POSITION */
+
 #ifdef SATURATE
 #define ADD(x, y) add_sat((x), (y))
 #define SUB(x, y) sub_sat((x), (y))
@@ -37,19 +41,19 @@
  * e.g. -DDATA_TYPE_IN1=uchar -DDATA_TYPE_IN2=uchar -DDATA_TYPE_OUT=short
  * @attention To perform saturating operation -DSATURATE has to be passed to the compiler otherwise wrapping policy will be used.
  *
- * @param[in]  in1_ptr                           Pointer to the source image. Supported data types: U8, S16
+ * @param[in]  in1_ptr                           Pointer to the source image. Supported data types: U8/QS8/QS16/S16/F16/F32
  * @param[in]  in1_stride_x                      Stride of the source image in X dimension (in bytes)
  * @param[in]  in1_step_x                        in1_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in]  in1_stride_y                      Stride of the source image in Y dimension (in bytes)
  * @param[in]  in1_step_y                        in1_stride_y * number of elements along Y processed per workitem(in bytes)
  * @param[in]  in1_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in]  in2_ptr                           Pointer to the source image. Supported data types: U8, S16
+ * @param[in]  in2_ptr                           Pointer to the source image. Supported data types: U8/QS8 (only if @p in1_ptr is QS8), QS16 (only if @p in1_ptr is QS16), S16/F16/F32
  * @param[in]  in2_stride_x                      Stride of the source image in X dimension (in bytes)
  * @param[in]  in2_step_x                        in2_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in]  in2_stride_y                      Stride of the source image in Y dimension (in bytes)
  * @param[in]  in2_step_y                        in2_stride_y * number of elements along Y processed per workitem(in bytes)
  * @param[in]  in2_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] out_ptr                           Pointer to the destination image. Supported data types: U8, S16
+ * @param[out] out_ptr                           Pointer to the destination image. Supported data types: U8 (only if both inputs are U8), QS8 (only if both inputs are QS8), QS16 (only if both inputs are QS16), S16/F16/F32
  * @param[in]  out_stride_x                      Stride of the destination image in X dimension (in bytes)
  * @param[in]  out_step_x                        out_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in]  out_stride_y                      Stride of the destination image in Y dimension (in bytes)
diff --git a/src/core/CL/kernels/CLArithmeticAdditionKernel.cpp b/src/core/CL/kernels/CLArithmeticAdditionKernel.cpp
index 0cb0847784..65422c2bbf 100644
--- a/src/core/CL/kernels/CLArithmeticAdditionKernel.cpp
+++ b/src/core/CL/kernels/CLArithmeticAdditionKernel.cpp
@@ -69,11 +69,16 @@ void CLArithmeticAdditionKernel::configure(const ICLTensor *input1, const ICLTen
     }
 
     ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input1, input2, output);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::QS8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::QS8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QS8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32);
     ARM_COMPUTE_ERROR_ON_MSG(output->info()->data_type() == DataType::U8 && (input1->info()->data_type() != DataType::U8 || input2->info()->data_type() != DataType::U8),
                              "Output can only be U8 if both inputs are U8");
+    if(is_data_type_fixed_point(input1->info()->data_type()) || is_data_type_fixed_point(input2->info()->data_type()) || is_data_type_fixed_point(output->info()->data_type()))
+    {
+        // Check that all data types are the same and all fixed-point positions are the same
+        ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input1, input2, output);
+    }
 
     _input1 = input1;
     _input2 = input2;
@@ -87,6 +92,10 @@ void CLArithmeticAdditionKernel::configure(const ICLTensor *input1, const ICLTen
     build_opts.emplace("-DDATA_TYPE_IN1=" + get_cl_type_from_data_type(input1->info()->data_type()));
     build_opts.emplace("-DDATA_TYPE_IN2=" + get_cl_type_from_data_type(input2->info()->data_type()));
     build_opts.emplace("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type()));
+    if(is_data_type_fixed_point(input1->info()->data_type()))
+    {
+        build_opts.emplace("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input1->info()->fixed_point_position()));
+    }
 
     // Create kernel
     _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("arithmetic_add", build_opts));
diff --git a/src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp b/src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp
index 69f9ff17d3..c5183af7d7 100644
--- a/src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp
+++ b/src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp
@@ -62,11 +62,16 @@ void CLArithmeticSubtractionKernel::configure(const ICLTensor *input1, const ICL
     }
 
     ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input1, input2, output);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::QS8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::QS8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QS8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32);
     ARM_COMPUTE_ERROR_ON_MSG(output->info()->data_type() == DataType::U8 && (input1->info()->data_type() != DataType::U8 || input2->info()->data_type() != DataType::U8),
                              "Output can only be U8 if both inputs are U8");
+    if(is_data_type_fixed_point(input1->info()->data_type()) || is_data_type_fixed_point(input2->info()->data_type()) || is_data_type_fixed_point(output->info()->data_type()))
+    {
+        // Check that all data types are the same and all fixed-point positions are the same
+        ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input1, input2, output);
+    }
 
     _input1 = input1;
     _input2 = input2;
@@ -80,6 +85,10 @@ void CLArithmeticSubtractionKernel::configure(const ICLTensor *input1, const ICL
     build_opts.emplace("-DDATA_TYPE_IN1=" + get_cl_type_from_data_type(input1->info()->data_type()));
     build_opts.emplace("-DDATA_TYPE_IN2=" + get_cl_type_from_data_type(input2->info()->data_type()));
     build_opts.emplace("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type()));
+    if(is_data_type_fixed_point(input1->info()->data_type()))
+    {
+        build_opts.emplace("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input1->info()->fixed_point_position()));
+    }
 
     // Create kernel
     _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("arithmetic_sub", build_opts));
-- 
cgit v1.2.1