aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/NEON')
-rw-r--r--src/core/NEON/kernels/NEActivationLayerKernel.cpp19
-rw-r--r--src/core/NEON/kernels/NEReductionOperationKernel.cpp5
2 files changed, 7 insertions, 17 deletions
diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
index 2163f7bb63..5ce79f1007 100644
--- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
@@ -184,7 +184,7 @@ typename std::enable_if<std::is_same<T, float16_t>::value, void>::type NEActivat
Iterator output(_output, window);
static const float16x8_t CONST_0 = vdupq_n_f16(0.f);
- static const float16x4_t CONST_1_H = vdup_n_f16(1.f);
+ static const float16x8_t CONST_1_H = vdupq_n_f16(1.f);
static const float32x4_t CONST_1_F32 = vdupq_n_f32(1.f);
@@ -240,24 +240,11 @@ typename std::enable_if<std::is_same<T, float16_t>::value, void>::type NEActivat
break;
case ActivationFunction::LOGISTIC:
{
- // TODO (COMPMID-1535) : Revisit FP16 approximations
- const float16x4x2_t in0 =
- {
- vinv_f16(vadd_f16(CONST_1_H, vcvt_f16_f32(vexpq_f32(vcvt_f32_f16(vneg_f16(vget_low_f16(in.val[0]))))))),
- vinv_f16(vadd_f16(CONST_1_H, vcvt_f16_f32(vexpq_f32(vcvt_f32_f16(vneg_f16(vget_high_f16(in.val[0]))))))),
- };
-
- const float16x4x2_t in1 =
- {
- vinv_f16(vadd_f16(CONST_1_H, vcvt_f16_f32(vexpq_f32(vcvt_f32_f16(vneg_f16(vget_low_f16(in.val[1]))))))),
- vinv_f16(vadd_f16(CONST_1_H, vcvt_f16_f32(vexpq_f32(vcvt_f32_f16(vneg_f16(vget_high_f16(in.val[1]))))))),
- };
-
tmp =
{
{
- vcombine_f16(in0.val[0], in0.val[1]),
- vcombine_f16(in1.val[0], in1.val[1]),
+ vinvq_f16(vaddq_f16(CONST_1_H, vexpq_f16(vnegq_f16(in.val[0])))),
+ vinvq_f16(vaddq_f16(CONST_1_H, vexpq_f16(vnegq_f16(in.val[1]))))
}
};
}
diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.cpp b/src/core/NEON/kernels/NEReductionOperationKernel.cpp
index b77219cd79..182e93d177 100644
--- a/src/core/NEON/kernels/NEReductionOperationKernel.cpp
+++ b/src/core/NEON/kernels/NEReductionOperationKernel.cpp
@@ -154,7 +154,10 @@ struct RedOpX
input);
auto carry_addition = wrapper::vpadd(wrapper::vgethigh(vec_sum_value), wrapper::vgetlow(vec_sum_value));
- carry_addition = wrapper::vpadd(carry_addition, carry_addition);
+ for(int i = 0; i < S / 4; ++i)
+ {
+ carry_addition = wrapper::vpadd(carry_addition, carry_addition);
+ }
auto res = wrapper::vgetlane(carry_addition, 0);
if(op == ReductionOperation::MEAN_SUM)