diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2018-08-23 13:11:53 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:54:54 +0000 |
commit | 3463a8b9eed57340366743340d2d06df3aa1ae88 (patch) | |
tree | dda328b4fc59fb0a9f7a930a25ef65ce8f20e179 /src | |
parent | 2113b9d9ada3a392b1215c4afd7715249d629bfc (diff) | |
download | ComputeLibrary-3463a8b9eed57340366743340d2d06df3aa1ae88.tar.gz |
COMPMID-1534: Fix NEActivationLayer for FP16
Simulates Logistic, Tanh and SoftRelu in FP32
Change-Id: I9950f7636b8ff2f3e054937e5ef414e45dfe06f5
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/145357
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/core/NEON/kernels/NEActivationLayerKernel.cpp | 80 |
1 files changed, 67 insertions, 13 deletions
diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp index 1dad531a40..2163f7bb63 100644 --- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp @@ -138,6 +138,7 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, float16_t> }, { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, float16_t> }, { ActivationFunction::LU_BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LU_BOUNDED_RELU, float16_t> }, + { ActivationFunction::LEAKY_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LEAKY_RELU, float16_t> }, { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation<ActivationFunction::SOFT_RELU, float16_t> }, { ActivationFunction::SQRT, &NEActivationLayerKernel::activation<ActivationFunction::SQRT, float16_t> }, { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, float16_t> }, @@ -182,11 +183,14 @@ typename std::enable_if<std::is_same<T, float16_t>::value, void>::type NEActivat Iterator input(_input, window); Iterator output(_output, window); - static const float16x8_t CONST_0 = vdupq_n_f16(0.f); - static const float16x8_t CONST_1 = vdupq_n_f16(1.f); + static const float16x8_t CONST_0 = vdupq_n_f16(0.f); + static const float16x4_t CONST_1_H = vdup_n_f16(1.f); - const float16x8_t a = vdupq_n_f16(_act_info.a()); - const float16x8_t b = vdupq_n_f16(_act_info.b()); + static const float32x4_t CONST_1_F32 = vdupq_n_f32(1.f); + + const float16x8_t a = vdupq_n_f16(_act_info.a()); + const float16x4_t a_h = vdup_n_f16(_act_info.a()); + const float16x8_t b = vdupq_n_f16(_act_info.b()); execute_window_loop(window, [&](const Coordinates &) { @@ -235,14 +239,29 @@ typename std::enable_if<std::is_same<T, float16_t>::value, void>::type NEActivat }; break; case ActivationFunction::LOGISTIC: + { + // TODO (COMPMID-1535) : Revisit FP16 approximations + const float16x4x2_t in0 = + { + vinv_f16(vadd_f16(CONST_1_H, vcvt_f16_f32(vexpq_f32(vcvt_f32_f16(vneg_f16(vget_low_f16(in.val[0]))))))), + vinv_f16(vadd_f16(CONST_1_H, vcvt_f16_f32(vexpq_f32(vcvt_f32_f16(vneg_f16(vget_high_f16(in.val[0]))))))), + }; + + const float16x4x2_t in1 = + { + vinv_f16(vadd_f16(CONST_1_H, vcvt_f16_f32(vexpq_f32(vcvt_f32_f16(vneg_f16(vget_low_f16(in.val[1]))))))), + vinv_f16(vadd_f16(CONST_1_H, vcvt_f16_f32(vexpq_f32(vcvt_f32_f16(vneg_f16(vget_high_f16(in.val[1]))))))), + }; + tmp = { { - vinvq_f16(vaddq_f16(CONST_1, vexpq_f16(vnegq_f16(in.val[0])))), - vinvq_f16(vaddq_f16(CONST_1, vexpq_f16(vnegq_f16(in.val[1])))), + vcombine_f16(in0.val[0], in0.val[1]), + vcombine_f16(in1.val[0], in1.val[1]), } }; - break; + } + break; case ActivationFunction::RELU: tmp = { @@ -262,14 +281,29 @@ typename std::enable_if<std::is_same<T, float16_t>::value, void>::type NEActivat }; break; case ActivationFunction::SOFT_RELU: + { + // TODO (COMPMID-1535) : Revisit FP16 approximations + const float16x4x2_t in0 = + { + vcvt_f16_f32(vlogq_f32(vaddq_f32(CONST_1_F32, vexpq_f32(vcvt_f32_f16(vget_low_f16(in.val[0])))))), + vcvt_f16_f32(vlogq_f32(vaddq_f32(CONST_1_F32, vexpq_f32(vcvt_f32_f16(vget_high_f16(in.val[0])))))), + }; + + const float16x4x2_t in1 = + { + vcvt_f16_f32(vlogq_f32(vaddq_f32(CONST_1_F32, vexpq_f32(vcvt_f32_f16(vget_low_f16(in.val[1])))))), + vcvt_f16_f32(vlogq_f32(vaddq_f32(CONST_1_F32, vexpq_f32(vcvt_f32_f16(vget_high_f16(in.val[1])))))), + }; + tmp = { { - vlogq_f16(vaddq_f16(CONST_1, vexpq_f16(in.val[0]))), - vlogq_f16(vaddq_f16(CONST_1, vexpq_f16(in.val[1]))), + vcombine_f16(in0.val[0], in0.val[1]), + vcombine_f16(in1.val[0], in1.val[1]), } }; - break; + } + break; case ActivationFunction::SQRT: tmp = { @@ -289,14 +323,34 @@ typename std::enable_if<std::is_same<T, float16_t>::value, void>::type NEActivat }; break; case ActivationFunction::TANH: + { + // TODO (COMPMID-1535) : Revisit FP16 approximations + const float16x8x2_t mul = + { + vmulq_f16(b, in.val[0]), + vmulq_f16(b, in.val[1]) + }; + const float16x4x2_t in0 = + { + vmul_f16(a_h, vcvt_f16_f32(vtanhq_f32(vcvt_f32_f16(vget_low_f16(mul.val[0]))))), + vmul_f16(a_h, vcvt_f16_f32(vtanhq_f32(vcvt_f32_f16(vget_high_f16(mul.val[0]))))), + }; + + const float16x4x2_t in1 = + { + vmul_f16(a_h, vcvt_f16_f32(vtanhq_f32(vcvt_f32_f16(vget_low_f16(mul.val[1]))))), + vmul_f16(a_h, vcvt_f16_f32(vtanhq_f32(vcvt_f32_f16(vget_high_f16(mul.val[1]))))), + }; + tmp = { { - vmulq_f16(a, vtanhq_f16(vmulq_f16(b, in.val[0]))), - vmulq_f16(a, vtanhq_f16(vmulq_f16(b, in.val[1]))), + vcombine_f16(in0.val[0], in0.val[1]), + vcombine_f16(in1.val[0], in1.val[1]), } }; - break; + } + break; default: ARM_COMPUTE_ERROR("Not implemented"); break; |