diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2017-06-27 17:39:11 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-09-17 14:15:39 +0100 |
commit | ccc65d44a53eaa61c718cbc4d826c811e2ccebda (patch) | |
tree | a5394c05c43ece384d629ceae644f5c7b5d23280 /src/core/NEON | |
parent | 0745a980c6a5e2d294bcd09f3c704e6cf4fe316d (diff) | |
download | ComputeLibrary-ccc65d44a53eaa61c718cbc4d826c811e2ccebda.tar.gz |
COMPMID-427: Port NEActivationLayer in 16bit fixed point.
Change-Id: Iebd61807f7b597c6bd990673bc7655c68ee16f4b
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79085
Reviewed-by: Moritz Pflanzer <moritz.pflanzer@arm.com>
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Diffstat (limited to 'src/core/NEON')
-rw-r--r-- | src/core/NEON/kernels/NEActivationLayerKernel.cpp | 149 |
1 files changed, 138 insertions, 11 deletions
diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp index 492d197925..f530413453 100644 --- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp @@ -47,7 +47,7 @@ NEActivationLayerKernel::NEActivationLayerKernel() void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::QS8); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F32); _input = input; _act_info = activation_info; @@ -78,7 +78,6 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, float> }, { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, float> }, }; - // Activation functions : QS8 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qs8 = { @@ -92,15 +91,31 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, qint8_t> }, { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qint8_t> }, }; + // Activation functions : QS16 + static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qs16 = + { + { ActivationFunction::ABS, &NEActivationLayerKernel::activation<ActivationFunction::ABS, qint16_t> }, + { ActivationFunction::LINEAR, &NEActivationLayerKernel::activation<ActivationFunction::LINEAR, qint16_t> }, + { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, qint16_t> }, + { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, qint16_t> }, + { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, qint16_t> }, + { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation<ActivationFunction::SOFT_RELU, qint16_t> }, + { ActivationFunction::SQRT, &NEActivationLayerKernel::activation<ActivationFunction::SQRT, qint16_t> }, + { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, qint16_t> }, + { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qint16_t> }, + }; switch(input->info()->data_type()) { - case DataType::F32: - _func = act_map_f32[activation_info.activation()]; - break; case DataType::QS8: _func = act_map_qs8[activation_info.activation()]; break; + case DataType::QS16: + _func = act_map_qs16[activation_info.activation()]; + break; + case DataType::F32: + _func = act_map_f32[activation_info.activation()]; + break; default: ARM_COMPUTE_ERROR("Unsupported data type."); } @@ -262,9 +277,9 @@ typename std::enable_if<std::is_same<T, float>::value, void>::type NEActivationL template <ActivationLayerInfo::ActivationFunction F, typename T> typename std::enable_if<std::is_same<T, int8_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window) { - Iterator input(_input, window); - Iterator output(_output, window); - int fixed_point_position = _input->info()->fixed_point_position(); + Iterator input(_input, window); + Iterator output(_output, window); + const int fixed_point_position = _input->info()->fixed_point_position(); static const qint8x16_t CONST_0 = vdupq_n_qs8(0); const qint8x16_t CONST_1 = vdupq_n_qs8(sqcvt_qs8_f32(1.f, fixed_point_position)); @@ -291,7 +306,7 @@ typename std::enable_if<std::is_same<T, int8_t>::value, void>::type NEActivation tmp = vqmlaq_qs8(b, a, in, fixed_point_position); break; case ActivationFunction::LOGISTIC: - tmp = vrecipq_qs8(vqaddq_qs8(CONST_1, vqexpq_qs8(vnegq_s8(in), fixed_point_position)), fixed_point_position); + tmp = vqrecipq_qs8(vqaddq_qs8(CONST_1, vqexpq_qs8(vnegq_s8(in), fixed_point_position)), fixed_point_position); break; case ActivationFunction::RELU: tmp = vmaxq_qs8(CONST_0, in); @@ -300,13 +315,13 @@ typename std::enable_if<std::is_same<T, int8_t>::value, void>::type NEActivation tmp = vlogq_qs8(vqaddq_qs8(CONST_1, vqexpq_qs8(in, fixed_point_position)), fixed_point_position); break; case ActivationFunction::SQRT: - tmp = vrecipq_qs8(vinvsqrtq_qs8(in, fixed_point_position), fixed_point_position); + tmp = vqrecipq_qs8(vqinvsqrtq_qs8(in, fixed_point_position), fixed_point_position); break; case ActivationFunction::SQUARE: tmp = vqmulq_qs8(in, in, fixed_point_position); break; case ActivationFunction::TANH: - tmp = vtanhq_qs8(in, fixed_point_position); + tmp = vqmulq_qs8(a, vqtanhq_qs8(vqmulq_qs8(b, in, fixed_point_position), fixed_point_position), fixed_point_position); break; default: break; @@ -317,6 +332,118 @@ typename std::enable_if<std::is_same<T, int8_t>::value, void>::type NEActivation input, output); } +template <ActivationLayerInfo::ActivationFunction F, typename T> +typename std::enable_if<std::is_same<T, int16_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window) +{ + Iterator input(_input, window); + Iterator output(_output, window); + const int fixed_point_position = _input->info()->fixed_point_position(); + + static const qint16x8_t CONST_0 = vdupq_n_qs16(0); + const qint16x8_t CONST_1 = vdupq_n_qs16(sqcvt_qs16_f32(1.f, fixed_point_position)); + const qint16x8_t a = vdupq_n_qs16(sqcvt_qs16_f32(_act_info.a(), fixed_point_position)); + const qint16x8_t b = vdupq_n_qs16(sqcvt_qs16_f32(_act_info.b(), fixed_point_position)); + + execute_window_loop(window, [&](const Coordinates & id) + { + const auto input_ptr = reinterpret_cast<const int16_t *>(input.ptr()); + const auto output_ptr = reinterpret_cast<int16_t *>(output.ptr()); + + const qint16x8x2_t in = vld2q_s16(input_ptr); + qint16x8x2_t tmp = { {} }; + + switch(F) + { + case ActivationFunction::ABS: + tmp = + { + { + vqabsq_qs16(in.val[0]), + vqabsq_qs16(in.val[1]), + } + }; + break; + case ActivationFunction::BOUNDED_RELU: + tmp = + { + { + vminq_qs16(a, vmaxq_qs16(CONST_0, in.val[0])), + vminq_qs16(a, vmaxq_qs16(CONST_0, in.val[1])), + } + }; + break; + case ActivationFunction::LINEAR: + tmp = + { + { + vqmlaq_qs16(b, a, in.val[0], fixed_point_position), + vqmlaq_qs16(b, a, in.val[1], fixed_point_position), + } + }; + break; + case ActivationFunction::LOGISTIC: + tmp = + { + { + vqrecipq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(vnegq_s16(in.val[0]), fixed_point_position)), fixed_point_position), + vqrecipq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(vnegq_s16(in.val[1]), fixed_point_position)), fixed_point_position), + } + }; + break; + case ActivationFunction::RELU: + tmp = + { + { + vmaxq_qs16(CONST_0, in.val[0]), + vmaxq_qs16(CONST_0, in.val[1]), + } + }; + break; + case ActivationFunction::SOFT_RELU: + tmp = + { + { + vlogq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(in.val[0], fixed_point_position)), fixed_point_position), + vlogq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(in.val[1], fixed_point_position)), fixed_point_position), + } + }; + break; + case ActivationFunction::SQRT: + tmp = + { + { + vqrecipq_qs16(vqinvsqrtq_qs16(in.val[0], fixed_point_position), fixed_point_position), + vqrecipq_qs16(vqinvsqrtq_qs16(in.val[1], fixed_point_position), fixed_point_position), + } + }; + break; + case ActivationFunction::SQUARE: + tmp = + { + { + vqmulq_qs16(in.val[0], in.val[0], fixed_point_position), + vqmulq_qs16(in.val[1], in.val[1], fixed_point_position), + } + }; + break; + case ActivationFunction::TANH: + tmp = + { + { + vqmulq_qs16(a, vqtanhq_qs16(vqmulq_qs16(b, in.val[0], fixed_point_position), fixed_point_position), fixed_point_position), + vqmulq_qs16(a, vqtanhq_qs16(vqmulq_qs16(b, in.val[1], fixed_point_position), fixed_point_position), fixed_point_position), + } + }; + break; + default: + break; + } + + vst2q_qs16(output_ptr, tmp); + }, + input, output); +} + void NEActivationLayerKernel::run(const Window &window) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); |