From 91654c45cf1de5f41127536a0fdd310c17fdfc8e Mon Sep 17 00:00:00 2001 From: Pablo Tello Date: Wed, 5 Jul 2017 11:32:17 +0100 Subject: COMPMID-421: Added FP16 support in ActivationLayer. Change-Id: I7ba573b19d56e3c87996edb5218a00e5bfca451e Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79755 Reviewed-by: Anthony Barbier Tested-by: Kaizen --- src/core/NEON/kernels/NEActivationLayerKernel.cpp | 151 +++++++++++++++++++++- 1 file changed, 149 insertions(+), 2 deletions(-) (limited to 'src/core/NEON/kernels') diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp index 70b7057fcd..3195411e18 100644 --- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp @@ -47,7 +47,7 @@ NEActivationLayerKernel::NEActivationLayerKernel() void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); _input = input; _act_info = activation_info; @@ -79,6 +79,23 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation }, { ActivationFunction::TANH, &NEActivationLayerKernel::activation }, }; + +#ifdef ARM_COMPUTE_ENABLE_FP16 + // Activation functions : FP16 + static std::map act_map_f16 = + { + { ActivationFunction::ABS, &NEActivationLayerKernel::activation }, + { ActivationFunction::LINEAR, &NEActivationLayerKernel::activation }, + { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation }, + { ActivationFunction::RELU, &NEActivationLayerKernel::activation }, + { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation }, + { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation }, + { ActivationFunction::SQRT, &NEActivationLayerKernel::activation }, + { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation }, + { ActivationFunction::TANH, &NEActivationLayerKernel::activation }, + }; +#endif /* ARM_COMPUTE_ENABLE_FP16*/ + // Activation functions : QS8 static std::map act_map_qs8 = { @@ -119,6 +136,11 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat case DataType::F32: _func = act_map_f32[activation_info.activation()]; break; +#ifdef ARM_COMPUTE_ENABLE_FP16 + case DataType::F16: + _func = act_map_f16[activation_info.activation()]; + break; +#endif /* ARM_COMPUTE_ENABLE_FP16 */ default: ARM_COMPUTE_ERROR("Unsupported data type."); } @@ -148,6 +170,130 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat ICPPKernel::configure(win); } +#ifdef ARM_COMPUTE_ENABLE_FP16 +template +typename std::enable_if::value, void>::type NEActivationLayerKernel::activation(const Window &window) +{ + Iterator input(_input, window); + Iterator output(_output, window); + + static const float16x8_t CONST_0 = vdupq_n_f16(0.f); + static const float16x8_t CONST_1 = vdupq_n_f16(1.f); + + const float16x8_t a = vdupq_n_f16(_act_info.a()); + const float16x8_t b = vdupq_n_f16(_act_info.b()); + + execute_window_loop(window, [&](const Coordinates &) + { + const auto input_ptr = reinterpret_cast(input.ptr()); + const auto output_ptr = reinterpret_cast(output.ptr()); + + const float16x8x2_t in = vld2q_f16(input_ptr); + float16x8x2_t tmp = { {} }; + + switch(F) + { + case ActivationFunction::ABS: + tmp = + { + { + vabsq_f16(in.val[0]), + vabsq_f16(in.val[1]), + } + }; + break; + case ActivationFunction::BOUNDED_RELU: + tmp = + { + { + vminq_f16(a, vmaxq_f16(CONST_0, in.val[0])), + vminq_f16(a, vmaxq_f16(CONST_0, in.val[1])) + } + }; + break; + case ActivationFunction::LINEAR: + tmp = + { + { + vaddq_f16(b, vmulq_f16(a, in.val[0])), + vaddq_f16(b, vmulq_f16(a, in.val[1])) + } + }; + break; + case ActivationFunction::LOGISTIC: + tmp = + { + { + vinvq_f16(vaddq_f16(CONST_1, vexpq_f16(vnegq_f16(in.val[0])))), + vinvq_f16(vaddq_f16(CONST_1, vexpq_f16(vnegq_f16(in.val[1])))), + } + }; + break; + case ActivationFunction::RELU: + tmp = + { + { + vmaxq_f16(CONST_0, in.val[0]), + vmaxq_f16(CONST_0, in.val[1]) + } + }; + break; + case ActivationFunction::LEAKY_RELU: + tmp = + { + { + vbslq_f16(vcgtq_f16(in.val[0], CONST_0), in.val[0], vmulq_f16(a, in.val[0])), + vbslq_f16(vcgtq_f16(in.val[1], CONST_0), in.val[1], vmulq_f16(a, in.val[1])) + } + }; + break; + case ActivationFunction::SOFT_RELU: + tmp = + { + { + vlogq_f16(vaddq_f16(CONST_1, vexpq_f16(in.val[0]))), + vlogq_f16(vaddq_f16(CONST_1, vexpq_f16(in.val[1]))), + } + }; + break; + case ActivationFunction::SQRT: + tmp = + { + { + vinvq_f16(vinvsqrtq_f16(in.val[0])), + vinvq_f16(vinvsqrtq_f16(in.val[1])), + } + }; + break; + case ActivationFunction::SQUARE: + tmp = + { + { + vmulq_f16(in.val[0], in.val[0]), + vmulq_f16(in.val[1], in.val[1]) + } + }; + break; + case ActivationFunction::TANH: + tmp = + { + { + vmulq_f16(a, vtanhq_f16(vmulq_f16(b, in.val[0]))), + vmulq_f16(a, vtanhq_f16(vmulq_f16(b, in.val[1]))), + } + }; + break; + default: + ARM_COMPUTE_ERROR("Not implemented"); + break; + } + + vst2q_f16(output_ptr, tmp); + }, + input, output); +} +#endif /* ARM_COMPUTE_ENABLE_FP16 */ + template typename std::enable_if::value, void>::type NEActivationLayerKernel::activation(const Window &window) { @@ -350,7 +496,7 @@ typename std::enable_if::value, void>::type NEActivation } template -typename std::enable_if::value, void>::type NEActivationLayerKernel::activation(const Window &window) +typename std::enable_if::value, void>::type NEActivationLayerKernel::activation(const Window &window) { Iterator input(_input, window); Iterator output(_output, window); @@ -462,6 +608,7 @@ typename std::enable_if::value, void>::type NEActivatio }; break; default: + ARM_COMPUTE_ERROR("Function not implemented"); break; } -- cgit v1.2.1