diff options
author | Gian Marco Iodice <gianmarco.iodice@arm.com> | 2017-06-30 12:21:00 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-09-17 14:15:39 +0100 |
commit | bdb6b0bb156588dc39fd5084d4c91d05b5148610 (patch) | |
tree | bb3c3645dd9abbf20462dace7828bb7ec459dc4d /src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp | |
parent | ac69aa137e360340fe9f148f019d93af6c3d8336 (diff) | |
download | ComputeLibrary-bdb6b0bb156588dc39fd5084d4c91d05b5148610.tar.gz |
COMPMID-433 - Port NEGEMM to support 16 bit fixed point
Change-Id: I82de74d7027bbc8a00a4d6671e968785280d5f6c
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79498
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Moritz Pflanzer <moritz.pflanzer@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp')
-rw-r--r-- | src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp | 32 |
1 files changed, 30 insertions, 2 deletions
diff --git a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp index 57d2807b8a..91fbe6f962 100644 --- a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp @@ -114,6 +114,31 @@ void matrix_addition_qs8(const ITensor *input, ITensor *output, const Window &wi }, in, out); } + +void matrix_addition_qs16(const ITensor *input, ITensor *output, const Window &window, float beta) +{ + const int fixed_point_position = input->info()->fixed_point_position(); + const qint16x8_t beta_qs16 = vdupq_n_qs16(scvt_qs16_f32(beta, fixed_point_position)); + + Iterator in(input, window); + Iterator out(output, window); + + execute_window_loop(window, [&](const Coordinates & id) + { + const auto in_ptr = reinterpret_cast<const qint16_t *>(in.ptr()); + const auto out_ptr = reinterpret_cast<qint16_t *>(out.ptr()); + + qint16x8x2_t alpha_ab = vld2q_s16(out_ptr); + const qint16x8x2_t c = vld2q_s16(in_ptr); + + // Multiply matrix C by its weight and accumulate + alpha_ab.val[0] = vqmlaq_qs16(alpha_ab.val[0], c.val[0], beta_qs16, fixed_point_position); + alpha_ab.val[1] = vqmlaq_qs16(alpha_ab.val[1], c.val[1], beta_qs16, fixed_point_position); + + vst2q_s16(out_ptr, alpha_ab); + }, + in, out); +} } // namespace NEGEMMMatrixAdditionKernel::NEGEMMMatrixAdditionKernel() @@ -123,8 +148,8 @@ NEGEMMMatrixAdditionKernel::NEGEMMMatrixAdditionKernel() void NEGEMMMatrixAdditionKernel::configure(const ITensor *input, ITensor *output, float beta) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output); ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != output->info()->dimension(0)); @@ -138,6 +163,9 @@ void NEGEMMMatrixAdditionKernel::configure(const ITensor *input, ITensor *output case DataType::QS8: _func = &matrix_addition_qs8; break; + case DataType::QS16: + _func = &matrix_addition_qs16; + break; case DataType::F16: #ifdef ARM_COMPUTE_ENABLE_FP16 _func = &matrix_addition_f16; |