diff options
author | Michele Di Giorgio <michele.digiorgio@arm.com> | 2017-07-11 15:00:52 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-09-17 14:16:42 +0100 |
commit | 81f0d15d6840a0ae8ef571114555a26da74c4a43 (patch) | |
tree | a9eeda0a2b69961cd6a51d74e039bbed26a9b436 /src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp | |
parent | f70256bd46f03090281581c152bd17b4a50febcd (diff) | |
download | ComputeLibrary-81f0d15d6840a0ae8ef571114555a26da74c4a43.tar.gz |
COMPMID-444: Add support for QS8/QS16 NEON Arithmetic Add/Sub/Mul.
Change-Id: Ia482498688ca1884272b5062e3415e736e03d36f
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/80448
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp')
-rw-r--r-- | src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp | 48 |
1 files changed, 45 insertions, 3 deletions
diff --git a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp index c0809eb9fa..7f7e45a940 100644 --- a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp +++ b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp @@ -27,6 +27,7 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/ITensor.h" +#include "arm_compute/core/NEON/NEFixedPoint.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" @@ -45,6 +46,38 @@ class Coordinates; namespace { +void add_wrap_QS8_QS8_QS8(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window) +{ + Iterator input1(in1, window); + Iterator input2(in2, window); + Iterator output(out, window); + + execute_window_loop(window, [&](const Coordinates & id) + { + const qint8x16_t a = vld1q_qs8(reinterpret_cast<const qint8_t *>(input1.ptr())); + const qint8x16_t b = vld1q_qs8(reinterpret_cast<const qint8_t *>(input2.ptr())); + + vst1q_qs8(reinterpret_cast<qint8_t *>(output.ptr()), vaddq_qs8(a, b)); + }, + input1, input2, output); +} + +void add_saturate_QS8_QS8_QS8(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window) +{ + Iterator input1(in1, window); + Iterator input2(in2, window); + Iterator output(out, window); + + execute_window_loop(window, [&](const Coordinates & id) + { + const qint8x16_t a = vld1q_qs8(reinterpret_cast<const qint8_t *>(input1.ptr())); + const qint8x16_t b = vld1q_qs8(reinterpret_cast<const qint8_t *>(input2.ptr())); + + vst1q_qs8(reinterpret_cast<qint8_t *>(output.ptr()), vqaddq_qs8(a, b)); + }, + input1, input2, output); +} + void add_wrap_U8_U8_U8(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window) { Iterator input1(in1, window); @@ -352,14 +385,21 @@ void NEArithmeticAdditionKernel::configure(const ITensor *input1, const ITensor } ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input1, input2, output); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::QS8, DataType::U8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::QS8, DataType::U8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::U8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MSG(output->info()->data_type() == DataType::U8 && (input1->info()->data_type() != DataType::U8 || input2->info()->data_type() != DataType::U8), "Output can only be U8 if both inputs are U8"); + if(is_data_type_fixed_point(input1->info()->data_type()) || is_data_type_fixed_point(input2->info()->data_type()) || is_data_type_fixed_point(output->info()->data_type())) + { + // Check that all data types are the same and all fixed-point positions are the same + ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input1, input2, output); + } static std::map<std::string, AddFunction *> map_function = { + { "add_wrap_QS8_QS8_QS8", &add_wrap_QS8_QS8_QS8 }, + { "add_saturate_QS8_QS8_QS8", &add_saturate_QS8_QS8_QS8 }, { "add_wrap_U8_U8_U8", &add_wrap_U8_U8_U8 }, { "add_saturate_U8_U8_U8", &add_saturate_U8_U8_U8 }, { "add_wrap_S16_U8_S16", &add_wrap_S16_U8_S16 }, @@ -368,6 +408,8 @@ void NEArithmeticAdditionKernel::configure(const ITensor *input1, const ITensor { "add_saturate_U8_S16_S16", &add_saturate_U8_S16_S16 }, { "add_wrap_U8_U8_S16", &add_wrap_U8_U8_S16 }, { "add_saturate_U8_U8_S16", &add_saturate_U8_U8_S16 }, + { "add_wrap_QS16_QS16_QS16", &add_wrap_S16_S16_S16 }, + { "add_saturate_QS16_QS16_QS16", &add_saturate_S16_S16_S16 }, { "add_wrap_S16_S16_S16", &add_wrap_S16_S16_S16 }, { "add_saturate_S16_S16_S16", &add_saturate_S16_S16_S16 }, { "add_wrap_F32_F32_F32", &add_F32_F32_F32 }, |