From ccc65d44a53eaa61c718cbc4d826c811e2ccebda Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Tue, 27 Jun 2017 17:39:11 +0100 Subject: COMPMID-427: Port NEActivationLayer in 16bit fixed point. Change-Id: Iebd61807f7b597c6bd990673bc7655c68ee16f4b Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79085 Reviewed-by: Moritz Pflanzer Tested-by: Kaizen Reviewed-by: Gian Marco Iodice --- src/core/NEON/kernels/NEActivationLayerKernel.cpp | 149 ++++++++++++++++++++-- src/core/Utils.cpp | 2 + 2 files changed, 140 insertions(+), 11 deletions(-) (limited to 'src/core') diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp index 492d197925..f530413453 100644 --- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp @@ -47,7 +47,7 @@ NEActivationLayerKernel::NEActivationLayerKernel() void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::QS8); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F32); _input = input; _act_info = activation_info; @@ -78,7 +78,6 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation }, { ActivationFunction::TANH, &NEActivationLayerKernel::activation }, }; - // Activation functions : QS8 static std::map act_map_qs8 = { @@ -92,15 +91,31 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation }, { ActivationFunction::TANH, &NEActivationLayerKernel::activation }, }; + // Activation functions : QS16 + static std::map act_map_qs16 = + { + { ActivationFunction::ABS, &NEActivationLayerKernel::activation }, + { ActivationFunction::LINEAR, &NEActivationLayerKernel::activation }, + { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation }, + { ActivationFunction::RELU, &NEActivationLayerKernel::activation }, + { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation }, + { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation }, + { ActivationFunction::SQRT, &NEActivationLayerKernel::activation }, + { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation }, + { ActivationFunction::TANH, &NEActivationLayerKernel::activation }, + }; switch(input->info()->data_type()) { - case DataType::F32: - _func = act_map_f32[activation_info.activation()]; - break; case DataType::QS8: _func = act_map_qs8[activation_info.activation()]; break; + case DataType::QS16: + _func = act_map_qs16[activation_info.activation()]; + break; + case DataType::F32: + _func = act_map_f32[activation_info.activation()]; + break; default: ARM_COMPUTE_ERROR("Unsupported data type."); } @@ -262,9 +277,9 @@ typename std::enable_if::value, void>::type NEActivationL template typename std::enable_if::value, void>::type NEActivationLayerKernel::activation(const Window &window) { - Iterator input(_input, window); - Iterator output(_output, window); - int fixed_point_position = _input->info()->fixed_point_position(); + Iterator input(_input, window); + Iterator output(_output, window); + const int fixed_point_position = _input->info()->fixed_point_position(); static const qint8x16_t CONST_0 = vdupq_n_qs8(0); const qint8x16_t CONST_1 = vdupq_n_qs8(sqcvt_qs8_f32(1.f, fixed_point_position)); @@ -291,7 +306,7 @@ typename std::enable_if::value, void>::type NEActivation tmp = vqmlaq_qs8(b, a, in, fixed_point_position); break; case ActivationFunction::LOGISTIC: - tmp = vrecipq_qs8(vqaddq_qs8(CONST_1, vqexpq_qs8(vnegq_s8(in), fixed_point_position)), fixed_point_position); + tmp = vqrecipq_qs8(vqaddq_qs8(CONST_1, vqexpq_qs8(vnegq_s8(in), fixed_point_position)), fixed_point_position); break; case ActivationFunction::RELU: tmp = vmaxq_qs8(CONST_0, in); @@ -300,13 +315,13 @@ typename std::enable_if::value, void>::type NEActivation tmp = vlogq_qs8(vqaddq_qs8(CONST_1, vqexpq_qs8(in, fixed_point_position)), fixed_point_position); break; case ActivationFunction::SQRT: - tmp = vrecipq_qs8(vinvsqrtq_qs8(in, fixed_point_position), fixed_point_position); + tmp = vqrecipq_qs8(vqinvsqrtq_qs8(in, fixed_point_position), fixed_point_position); break; case ActivationFunction::SQUARE: tmp = vqmulq_qs8(in, in, fixed_point_position); break; case ActivationFunction::TANH: - tmp = vtanhq_qs8(in, fixed_point_position); + tmp = vqmulq_qs8(a, vqtanhq_qs8(vqmulq_qs8(b, in, fixed_point_position), fixed_point_position), fixed_point_position); break; default: break; @@ -317,6 +332,118 @@ typename std::enable_if::value, void>::type NEActivation input, output); } +template +typename std::enable_if::value, void>::type NEActivationLayerKernel::activation(const Window &window) +{ + Iterator input(_input, window); + Iterator output(_output, window); + const int fixed_point_position = _input->info()->fixed_point_position(); + + static const qint16x8_t CONST_0 = vdupq_n_qs16(0); + const qint16x8_t CONST_1 = vdupq_n_qs16(sqcvt_qs16_f32(1.f, fixed_point_position)); + const qint16x8_t a = vdupq_n_qs16(sqcvt_qs16_f32(_act_info.a(), fixed_point_position)); + const qint16x8_t b = vdupq_n_qs16(sqcvt_qs16_f32(_act_info.b(), fixed_point_position)); + + execute_window_loop(window, [&](const Coordinates & id) + { + const auto input_ptr = reinterpret_cast(input.ptr()); + const auto output_ptr = reinterpret_cast(output.ptr()); + + const qint16x8x2_t in = vld2q_s16(input_ptr); + qint16x8x2_t tmp = { {} }; + + switch(F) + { + case ActivationFunction::ABS: + tmp = + { + { + vqabsq_qs16(in.val[0]), + vqabsq_qs16(in.val[1]), + } + }; + break; + case ActivationFunction::BOUNDED_RELU: + tmp = + { + { + vminq_qs16(a, vmaxq_qs16(CONST_0, in.val[0])), + vminq_qs16(a, vmaxq_qs16(CONST_0, in.val[1])), + } + }; + break; + case ActivationFunction::LINEAR: + tmp = + { + { + vqmlaq_qs16(b, a, in.val[0], fixed_point_position), + vqmlaq_qs16(b, a, in.val[1], fixed_point_position), + } + }; + break; + case ActivationFunction::LOGISTIC: + tmp = + { + { + vqrecipq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(vnegq_s16(in.val[0]), fixed_point_position)), fixed_point_position), + vqrecipq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(vnegq_s16(in.val[1]), fixed_point_position)), fixed_point_position), + } + }; + break; + case ActivationFunction::RELU: + tmp = + { + { + vmaxq_qs16(CONST_0, in.val[0]), + vmaxq_qs16(CONST_0, in.val[1]), + } + }; + break; + case ActivationFunction::SOFT_RELU: + tmp = + { + { + vlogq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(in.val[0], fixed_point_position)), fixed_point_position), + vlogq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(in.val[1], fixed_point_position)), fixed_point_position), + } + }; + break; + case ActivationFunction::SQRT: + tmp = + { + { + vqrecipq_qs16(vqinvsqrtq_qs16(in.val[0], fixed_point_position), fixed_point_position), + vqrecipq_qs16(vqinvsqrtq_qs16(in.val[1], fixed_point_position), fixed_point_position), + } + }; + break; + case ActivationFunction::SQUARE: + tmp = + { + { + vqmulq_qs16(in.val[0], in.val[0], fixed_point_position), + vqmulq_qs16(in.val[1], in.val[1], fixed_point_position), + } + }; + break; + case ActivationFunction::TANH: + tmp = + { + { + vqmulq_qs16(a, vqtanhq_qs16(vqmulq_qs16(b, in.val[0], fixed_point_position), fixed_point_position), fixed_point_position), + vqmulq_qs16(a, vqtanhq_qs16(vqmulq_qs16(b, in.val[1], fixed_point_position), fixed_point_position), fixed_point_position), + } + }; + break; + default: + break; + } + + vst2q_qs16(output_ptr, tmp); + }, + input, output); +} + void NEActivationLayerKernel::run(const Window &window) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); diff --git a/src/core/Utils.cpp b/src/core/Utils.cpp index f6230c0199..11b41aa178 100644 --- a/src/core/Utils.cpp +++ b/src/core/Utils.cpp @@ -286,6 +286,7 @@ void arm_compute::print_consecutive_elements(std::ostream &s, DataType dt, const case DataType::U16: print_consecutive_elements_impl(s, reinterpret_cast(ptr), n, stream_width, element_delim); break; + case DataType::QS16: case DataType::S16: print_consecutive_elements_impl(s, reinterpret_cast(ptr), n, stream_width, element_delim); break; @@ -316,6 +317,7 @@ int arm_compute::max_consecutive_elements_display_width(std::ostream &s, DataTyp return max_consecutive_elements_display_width_impl(s, reinterpret_cast(ptr), n); case DataType::U16: return max_consecutive_elements_display_width_impl(s, reinterpret_cast(ptr), n); + case DataType::QS16: case DataType::S16: return max_consecutive_elements_display_width_impl(s, reinterpret_cast(ptr), n); case DataType::U32: -- cgit v1.2.1