diff options
author | Pablo Tello <pablo.tello@arm.com> | 2017-07-05 11:32:17 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-09-17 14:16:42 +0100 |
commit | 91654c45cf1de5f41127536a0fdd310c17fdfc8e (patch) | |
tree | 1cf914061c456282f0ba899ebbdc591cabc7f0fc /src/core/NEON/kernels/NEActivationLayerKernel.cpp | |
parent | ec69f93dc63408933d322ec27d0b7049b9a6e07c (diff) | |
download | ComputeLibrary-91654c45cf1de5f41127536a0fdd310c17fdfc8e.tar.gz |
COMPMID-421: Added FP16 support in ActivationLayer.
Change-Id: I7ba573b19d56e3c87996edb5218a00e5bfca451e
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79755
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEActivationLayerKernel.cpp')
-rw-r--r-- | src/core/NEON/kernels/NEActivationLayerKernel.cpp | 151 |
1 files changed, 149 insertions, 2 deletions
diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp index 70b7057fcd..3195411e18 100644 --- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp @@ -47,7 +47,7 @@ NEActivationLayerKernel::NEActivationLayerKernel() void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); _input = input; _act_info = activation_info; @@ -79,6 +79,23 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, float> }, { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, float> }, }; + +#ifdef ARM_COMPUTE_ENABLE_FP16 + // Activation functions : FP16 + static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_f16 = + { + { ActivationFunction::ABS, &NEActivationLayerKernel::activation<ActivationFunction::ABS, float16_t> }, + { ActivationFunction::LINEAR, &NEActivationLayerKernel::activation<ActivationFunction::LINEAR, float16_t> }, + { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, float16_t> }, + { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, float16_t> }, + { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, float16_t> }, + { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation<ActivationFunction::SOFT_RELU, float16_t> }, + { ActivationFunction::SQRT, &NEActivationLayerKernel::activation<ActivationFunction::SQRT, float16_t> }, + { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, float16_t> }, + { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, float16_t> }, + }; +#endif /* ARM_COMPUTE_ENABLE_FP16*/ + // Activation functions : QS8 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qs8 = { @@ -119,6 +136,11 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat case DataType::F32: _func = act_map_f32[activation_info.activation()]; break; +#ifdef ARM_COMPUTE_ENABLE_FP16 + case DataType::F16: + _func = act_map_f16[activation_info.activation()]; + break; +#endif /* ARM_COMPUTE_ENABLE_FP16 */ default: ARM_COMPUTE_ERROR("Unsupported data type."); } @@ -148,6 +170,130 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat ICPPKernel::configure(win); } +#ifdef ARM_COMPUTE_ENABLE_FP16 +template <ActivationLayerInfo::ActivationFunction F, typename T> +typename std::enable_if<std::is_same<T, float16_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window) +{ + Iterator input(_input, window); + Iterator output(_output, window); + + static const float16x8_t CONST_0 = vdupq_n_f16(0.f); + static const float16x8_t CONST_1 = vdupq_n_f16(1.f); + + const float16x8_t a = vdupq_n_f16(_act_info.a()); + const float16x8_t b = vdupq_n_f16(_act_info.b()); + + execute_window_loop(window, [&](const Coordinates &) + { + const auto input_ptr = reinterpret_cast<const float16_t *>(input.ptr()); + const auto output_ptr = reinterpret_cast<float16_t *>(output.ptr()); + + const float16x8x2_t in = vld2q_f16(input_ptr); + float16x8x2_t tmp = { {} }; + + switch(F) + { + case ActivationFunction::ABS: + tmp = + { + { + vabsq_f16(in.val[0]), + vabsq_f16(in.val[1]), + } + }; + break; + case ActivationFunction::BOUNDED_RELU: + tmp = + { + { + vminq_f16(a, vmaxq_f16(CONST_0, in.val[0])), + vminq_f16(a, vmaxq_f16(CONST_0, in.val[1])) + } + }; + break; + case ActivationFunction::LINEAR: + tmp = + { + { + vaddq_f16(b, vmulq_f16(a, in.val[0])), + vaddq_f16(b, vmulq_f16(a, in.val[1])) + } + }; + break; + case ActivationFunction::LOGISTIC: + tmp = + { + { + vinvq_f16(vaddq_f16(CONST_1, vexpq_f16(vnegq_f16(in.val[0])))), + vinvq_f16(vaddq_f16(CONST_1, vexpq_f16(vnegq_f16(in.val[1])))), + } + }; + break; + case ActivationFunction::RELU: + tmp = + { + { + vmaxq_f16(CONST_0, in.val[0]), + vmaxq_f16(CONST_0, in.val[1]) + } + }; + break; + case ActivationFunction::LEAKY_RELU: + tmp = + { + { + vbslq_f16(vcgtq_f16(in.val[0], CONST_0), in.val[0], vmulq_f16(a, in.val[0])), + vbslq_f16(vcgtq_f16(in.val[1], CONST_0), in.val[1], vmulq_f16(a, in.val[1])) + } + }; + break; + case ActivationFunction::SOFT_RELU: + tmp = + { + { + vlogq_f16(vaddq_f16(CONST_1, vexpq_f16(in.val[0]))), + vlogq_f16(vaddq_f16(CONST_1, vexpq_f16(in.val[1]))), + } + }; + break; + case ActivationFunction::SQRT: + tmp = + { + { + vinvq_f16(vinvsqrtq_f16(in.val[0])), + vinvq_f16(vinvsqrtq_f16(in.val[1])), + } + }; + break; + case ActivationFunction::SQUARE: + tmp = + { + { + vmulq_f16(in.val[0], in.val[0]), + vmulq_f16(in.val[1], in.val[1]) + } + }; + break; + case ActivationFunction::TANH: + tmp = + { + { + vmulq_f16(a, vtanhq_f16(vmulq_f16(b, in.val[0]))), + vmulq_f16(a, vtanhq_f16(vmulq_f16(b, in.val[1]))), + } + }; + break; + default: + ARM_COMPUTE_ERROR("Not implemented"); + break; + } + + vst2q_f16(output_ptr, tmp); + }, + input, output); +} +#endif /* ARM_COMPUTE_ENABLE_FP16 */ + template <ActivationLayerInfo::ActivationFunction F, typename T> typename std::enable_if<std::is_same<T, float>::value, void>::type NEActivationLayerKernel::activation(const Window &window) { @@ -350,7 +496,7 @@ typename std::enable_if<std::is_same<T, int8_t>::value, void>::type NEActivation } template <ActivationLayerInfo::ActivationFunction F, typename T> -typename std::enable_if<std::is_same<T, int16_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window) +typename std::enable_if<std::is_same<T, qint16_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window) { Iterator input(_input, window); Iterator output(_output, window); @@ -462,6 +608,7 @@ typename std::enable_if<std::is_same<T, int16_t>::value, void>::type NEActivatio }; break; default: + ARM_COMPUTE_ERROR("Function not implemented"); break; } |