aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/NEActivationLayerKernel.cpp
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-08-23 13:11:53 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:54 +0000
commit3463a8b9eed57340366743340d2d06df3aa1ae88 (patch)
treedda328b4fc59fb0a9f7a930a25ef65ce8f20e179 /src/core/NEON/kernels/NEActivationLayerKernel.cpp
parent2113b9d9ada3a392b1215c4afd7715249d629bfc (diff)
downloadComputeLibrary-3463a8b9eed57340366743340d2d06df3aa1ae88.tar.gz
COMPMID-1534: Fix NEActivationLayer for FP16
Simulates Logistic, Tanh and SoftRelu in FP32 Change-Id: I9950f7636b8ff2f3e054937e5ef414e45dfe06f5 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/145357 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEActivationLayerKernel.cpp')
-rw-r--r--src/core/NEON/kernels/NEActivationLayerKernel.cpp80
1 files changed, 67 insertions, 13 deletions
diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
index 1dad531a40..2163f7bb63 100644
--- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
@@ -138,6 +138,7 @@ void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, Activat
{ ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, float16_t> },
{ ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, float16_t> },
{ ActivationFunction::LU_BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LU_BOUNDED_RELU, float16_t> },
+ { ActivationFunction::LEAKY_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LEAKY_RELU, float16_t> },
{ ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation<ActivationFunction::SOFT_RELU, float16_t> },
{ ActivationFunction::SQRT, &NEActivationLayerKernel::activation<ActivationFunction::SQRT, float16_t> },
{ ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, float16_t> },
@@ -182,11 +183,14 @@ typename std::enable_if<std::is_same<T, float16_t>::value, void>::type NEActivat
Iterator input(_input, window);
Iterator output(_output, window);
- static const float16x8_t CONST_0 = vdupq_n_f16(0.f);
- static const float16x8_t CONST_1 = vdupq_n_f16(1.f);
+ static const float16x8_t CONST_0 = vdupq_n_f16(0.f);
+ static const float16x4_t CONST_1_H = vdup_n_f16(1.f);
- const float16x8_t a = vdupq_n_f16(_act_info.a());
- const float16x8_t b = vdupq_n_f16(_act_info.b());
+ static const float32x4_t CONST_1_F32 = vdupq_n_f32(1.f);
+
+ const float16x8_t a = vdupq_n_f16(_act_info.a());
+ const float16x4_t a_h = vdup_n_f16(_act_info.a());
+ const float16x8_t b = vdupq_n_f16(_act_info.b());
execute_window_loop(window, [&](const Coordinates &)
{
@@ -235,14 +239,29 @@ typename std::enable_if<std::is_same<T, float16_t>::value, void>::type NEActivat
};
break;
case ActivationFunction::LOGISTIC:
+ {
+ // TODO (COMPMID-1535) : Revisit FP16 approximations
+ const float16x4x2_t in0 =
+ {
+ vinv_f16(vadd_f16(CONST_1_H, vcvt_f16_f32(vexpq_f32(vcvt_f32_f16(vneg_f16(vget_low_f16(in.val[0]))))))),
+ vinv_f16(vadd_f16(CONST_1_H, vcvt_f16_f32(vexpq_f32(vcvt_f32_f16(vneg_f16(vget_high_f16(in.val[0]))))))),
+ };
+
+ const float16x4x2_t in1 =
+ {
+ vinv_f16(vadd_f16(CONST_1_H, vcvt_f16_f32(vexpq_f32(vcvt_f32_f16(vneg_f16(vget_low_f16(in.val[1]))))))),
+ vinv_f16(vadd_f16(CONST_1_H, vcvt_f16_f32(vexpq_f32(vcvt_f32_f16(vneg_f16(vget_high_f16(in.val[1]))))))),
+ };
+
tmp =
{
{
- vinvq_f16(vaddq_f16(CONST_1, vexpq_f16(vnegq_f16(in.val[0])))),
- vinvq_f16(vaddq_f16(CONST_1, vexpq_f16(vnegq_f16(in.val[1])))),
+ vcombine_f16(in0.val[0], in0.val[1]),
+ vcombine_f16(in1.val[0], in1.val[1]),
}
};
- break;
+ }
+ break;
case ActivationFunction::RELU:
tmp =
{
@@ -262,14 +281,29 @@ typename std::enable_if<std::is_same<T, float16_t>::value, void>::type NEActivat
};
break;
case ActivationFunction::SOFT_RELU:
+ {
+ // TODO (COMPMID-1535) : Revisit FP16 approximations
+ const float16x4x2_t in0 =
+ {
+ vcvt_f16_f32(vlogq_f32(vaddq_f32(CONST_1_F32, vexpq_f32(vcvt_f32_f16(vget_low_f16(in.val[0])))))),
+ vcvt_f16_f32(vlogq_f32(vaddq_f32(CONST_1_F32, vexpq_f32(vcvt_f32_f16(vget_high_f16(in.val[0])))))),
+ };
+
+ const float16x4x2_t in1 =
+ {
+ vcvt_f16_f32(vlogq_f32(vaddq_f32(CONST_1_F32, vexpq_f32(vcvt_f32_f16(vget_low_f16(in.val[1])))))),
+ vcvt_f16_f32(vlogq_f32(vaddq_f32(CONST_1_F32, vexpq_f32(vcvt_f32_f16(vget_high_f16(in.val[1])))))),
+ };
+
tmp =
{
{
- vlogq_f16(vaddq_f16(CONST_1, vexpq_f16(in.val[0]))),
- vlogq_f16(vaddq_f16(CONST_1, vexpq_f16(in.val[1]))),
+ vcombine_f16(in0.val[0], in0.val[1]),
+ vcombine_f16(in1.val[0], in1.val[1]),
}
};
- break;
+ }
+ break;
case ActivationFunction::SQRT:
tmp =
{
@@ -289,14 +323,34 @@ typename std::enable_if<std::is_same<T, float16_t>::value, void>::type NEActivat
};
break;
case ActivationFunction::TANH:
+ {
+ // TODO (COMPMID-1535) : Revisit FP16 approximations
+ const float16x8x2_t mul =
+ {
+ vmulq_f16(b, in.val[0]),
+ vmulq_f16(b, in.val[1])
+ };
+ const float16x4x2_t in0 =
+ {
+ vmul_f16(a_h, vcvt_f16_f32(vtanhq_f32(vcvt_f32_f16(vget_low_f16(mul.val[0]))))),
+ vmul_f16(a_h, vcvt_f16_f32(vtanhq_f32(vcvt_f32_f16(vget_high_f16(mul.val[0]))))),
+ };
+
+ const float16x4x2_t in1 =
+ {
+ vmul_f16(a_h, vcvt_f16_f32(vtanhq_f32(vcvt_f32_f16(vget_low_f16(mul.val[1]))))),
+ vmul_f16(a_h, vcvt_f16_f32(vtanhq_f32(vcvt_f32_f16(vget_high_f16(mul.val[1]))))),
+ };
+
tmp =
{
{
- vmulq_f16(a, vtanhq_f16(vmulq_f16(b, in.val[0]))),
- vmulq_f16(a, vtanhq_f16(vmulq_f16(b, in.val[1]))),
+ vcombine_f16(in0.val[0], in0.val[1]),
+ vcombine_f16(in1.val[0], in1.val[1]),
}
};
- break;
+ }
+ break;
default:
ARM_COMPUTE_ERROR("Not implemented");
break;