COMPMID-444: Add support for QS8/QS16 NEON Arithmetic Add/Sub/Mul.

Change-Id: Ia482498688ca1884272b5062e3415e736e03d36f Reviewed-on: http://mpd-gerrit.cambridge.arm.com/80448 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
author: Michele Di Giorgio <michele.digiorgio@arm.com> 2017-07-11 15:00:52 +0100
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-09-17 14:16:42 +0100
commit: 81f0d15d6840a0ae8ef571114555a26da74c4a43 (patch)
tree: a9eeda0a2b69961cd6a51d74e039bbed26a9b436 /src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
parent: f70256bd46f03090281581c152bd17b4a50febcd (diff)
download: ComputeLibrary-81f0d15d6840a0ae8ef571114555a26da74c4a43.tar.gz
1 files changed, 45 insertions, 3 deletions
diff --git a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
index f51b6b9f0b..cac2a6bd05 100644
--- a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
+++ b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
@@ -26,6 +26,7 @@
 #include "arm_compute/core/Error.h"
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/NEON/NEFixedPoint.h"
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/core/Validate.h"
 
@@ -44,6 +45,38 @@ class Coordinates;
 
 namespace
 {
+void sub_wrap_QS8_QS8_QS8(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
+{
+    Iterator input1(in1, window);
+    Iterator input2(in2, window);
+    Iterator output(out, window);
+
+    execute_window_loop(window, [&](const Coordinates & id)
+    {
+        const qint8x16_t a = vld1q_qs8(reinterpret_cast<const qint8_t *>(input1.ptr()));
+        const qint8x16_t b = vld1q_qs8(reinterpret_cast<const qint8_t *>(input2.ptr()));
+
+        vst1q_qs8(reinterpret_cast<qint8_t *>(output.ptr()), vsubq_qs8(a, b));
+    },
+    input1, input2, output);
+}
+
+void sub_saturate_QS8_QS8_QS8(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
+{
+    Iterator input1(in1, window);
+    Iterator input2(in2, window);
+    Iterator output(out, window);
+
+    execute_window_loop(window, [&](const Coordinates & id)
+    {
+        const qint8x16_t a = vld1q_qs8(reinterpret_cast<const qint8_t *>(input1.ptr()));
+        const qint8x16_t b = vld1q_qs8(reinterpret_cast<const qint8_t *>(input2.ptr()));
+
+        vst1q_qs8(reinterpret_cast<qint8_t *>(output.ptr()), vqsubq_qs8(a, b));
+    },
+    input1, input2, output);
+}
+
 void sub_wrap_U8_U8_U8(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
 {
     Iterator input1(in1, window);
@@ -302,14 +335,21 @@ void NEArithmeticSubtractionKernel::configure(const ITensor *input1, const ITens
     }
 
     ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input1, input2, output);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::S16, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::S16, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::QS8, DataType::U8, DataType::QS16, DataType::S16, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::QS8, DataType::U8, DataType::QS16, DataType::S16, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::U8, DataType::QS16, DataType::S16, DataType::F32);
     ARM_COMPUTE_ERROR_ON_MSG(output->info()->data_type() == DataType::U8 && (input1->info()->data_type() != DataType::U8 || input2->info()->data_type() != DataType::U8),
                              "Output can only be U8 if both inputs are U8");
+    if(is_data_type_fixed_point(input1->info()->data_type()) || is_data_type_fixed_point(input2->info()->data_type()) || is_data_type_fixed_point(output->info()->data_type()))
+    {
+        // Check that all data types are the same and all fixed-point positions are the same
+        ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input1, input2, output);
+    }
 
     static std::map<std::string, SubFunction *> map_function =
     {
+        { "sub_wrap_QS8_QS8_QS8", &sub_wrap_QS8_QS8_QS8 },
+        { "sub_saturate_QS8_QS8_QS8", &sub_saturate_QS8_QS8_QS8 },
         { "sub_wrap_U8_U8_U8", &sub_wrap_U8_U8_U8 },
         { "sub_wrap_U8_U8_S16", &sub_wrap_U8_U8_S16 },
         { "sub_saturate_U8_U8_U8", &sub_saturate_U8_U8_U8 },
@@ -318,6 +358,8 @@ void NEArithmeticSubtractionKernel::configure(const ITensor *input1, const ITens
         { "sub_wrap_S16_U8_S16", &sub_wrap_S16_U8_S16 },
         { "sub_saturate_U8_S16_S16", &sub_saturate_U8_S16_S16 },
         { "sub_saturate_S16_U8_S16", &sub_saturate_S16_U8_S16 },
+        { "sub_wrap_QS16_QS16_QS16", &sub_wrap_S16_S16_S16 },
+        { "sub_saturate_QS16_QS16_QS16", &sub_saturate_S16_S16_S16 },
         { "sub_wrap_S16_S16_S16", &sub_wrap_S16_S16_S16 },
         { "sub_saturate_S16_S16_S16", &sub_saturate_S16_S16_S16 },
         { "sub_wrap_F32_F32_F32", &sub_F32_F32_F32 },
author	Michele Di Giorgio <michele.digiorgio@arm.com>	2017-07-11 15:00:52 +0100
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-09-17 14:16:42 +0100
commit	81f0d15d6840a0ae8ef571114555a26da74c4a43 (patch)
tree	a9eeda0a2b69961cd6a51d74e039bbed26a9b436 /src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
parent	f70256bd46f03090281581c152bd17b4a50febcd (diff)
download	ComputeLibrary-81f0d15d6840a0ae8ef571114555a26da74c4a43.tar.gz