From bdb6b0bb156588dc39fd5084d4c91d05b5148610 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Fri, 30 Jun 2017 12:21:00 +0100 Subject: COMPMID-433 - Port NEGEMM to support 16 bit fixed point Change-Id: I82de74d7027bbc8a00a4d6671e968785280d5f6c Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79498 Reviewed-by: Georgios Pinitas Tested-by: Kaizen Reviewed-by: Moritz Pflanzer Reviewed-by: Anthony Barbier --- .../NEON/kernels/NEGEMMMatrixAdditionKernel.cpp | 32 ++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) (limited to 'src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp') diff --git a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp index 57d2807b8a..91fbe6f962 100644 --- a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp @@ -114,6 +114,31 @@ void matrix_addition_qs8(const ITensor *input, ITensor *output, const Window &wi }, in, out); } + +void matrix_addition_qs16(const ITensor *input, ITensor *output, const Window &window, float beta) +{ + const int fixed_point_position = input->info()->fixed_point_position(); + const qint16x8_t beta_qs16 = vdupq_n_qs16(scvt_qs16_f32(beta, fixed_point_position)); + + Iterator in(input, window); + Iterator out(output, window); + + execute_window_loop(window, [&](const Coordinates & id) + { + const auto in_ptr = reinterpret_cast(in.ptr()); + const auto out_ptr = reinterpret_cast(out.ptr()); + + qint16x8x2_t alpha_ab = vld2q_s16(out_ptr); + const qint16x8x2_t c = vld2q_s16(in_ptr); + + // Multiply matrix C by its weight and accumulate + alpha_ab.val[0] = vqmlaq_qs16(alpha_ab.val[0], c.val[0], beta_qs16, fixed_point_position); + alpha_ab.val[1] = vqmlaq_qs16(alpha_ab.val[1], c.val[1], beta_qs16, fixed_point_position); + + vst2q_s16(out_ptr, alpha_ab); + }, + in, out); +} } // namespace NEGEMMMatrixAdditionKernel::NEGEMMMatrixAdditionKernel() @@ -123,8 +148,8 @@ NEGEMMMatrixAdditionKernel::NEGEMMMatrixAdditionKernel() void NEGEMMMatrixAdditionKernel::configure(const ITensor *input, ITensor *output, float beta) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output); ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != output->info()->dimension(0)); @@ -138,6 +163,9 @@ void NEGEMMMatrixAdditionKernel::configure(const ITensor *input, ITensor *output case DataType::QS8: _func = &matrix_addition_qs8; break; + case DataType::QS16: + _func = &matrix_addition_qs16; + break; case DataType::F16: #ifdef ARM_COMPUTE_ENABLE_FP16 _func = &matrix_addition_f16; -- cgit v1.2.1