aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/NEActivationLayerKernel.cpp
diff options
context:
space:
mode:
authorSiCong Li <sicong.li@arm.com>2020-06-08 17:30:51 +0100
committerSiCong Li <sicong.li@arm.com>2020-06-10 17:33:25 +0000
commita32e2aef81cfcba9f5ae1770ceeb4a8d26fdc1f4 (patch)
treeb791e466df9dff4032be409d6765cb8a9af9319d /src/core/NEON/kernels/NEActivationLayerKernel.cpp
parent8aa8764982d23ed8b8c8810bbfda30542f21e034 (diff)
downloadComputeLibrary-a32e2aef81cfcba9f5ae1770ceeb4a8d26fdc1f4.tar.gz
COMPMID-3523: Fix validation fails on armv8.2-a
* Fix neon sqrt activation delta(epsilon) * Fix NEON Hard Swish validation tolerance * Fix NEON FP16 LogSoftmaxLayer validation test typo * Raise NEON reduction (sum) f16 tolerance Change-Id: Ia33d69ce5f0b78be1893fb8e13d2761a8e7fceff Signed-off-by: SiCong Li <sicong.li@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3318 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEActivationLayerKernel.cpp')
-rw-r--r--src/core/NEON/kernels/NEActivationLayerKernel.cpp9
1 files changed, 7 insertions, 2 deletions
diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
index 8e91e6b4d1..ffbfd710f9 100644
--- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
@@ -265,7 +265,12 @@ NEActivationLayerKernel::activation(const Window &window)
Iterator input(_input, win_collapsed);
Iterator output(_output, win_collapsed);
- const auto epsilon = wrapper::vdup_n(static_cast<T>(1e-24), ExactTagType{});
+ // A small delta added to the input to prevent NAN values caused by zeros in inputs to SQRT
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+ const auto delta = wrapper::vdup_n(static_cast<T>(1e-7), ExactTagType{});
+#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+ const auto delta = wrapper::vdup_n(static_cast<T>(1e-24), ExactTagType{});
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
const auto const_1 = wrapper::vdup_n(static_cast<T>(1.f), ExactTagType{});
const auto const_0 = wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{});
const auto const_6 = wrapper::vdup_n(static_cast<T>(6.f), ExactTagType{});
@@ -318,7 +323,7 @@ NEActivationLayerKernel::activation(const Window &window)
tmp = wrapper::vbsl(wrapper::vcge(vin, const_0), vin, wrapper::vmul(va, wrapper::vsub(wrapper::vexpq(vin), const_1)));
break;
case ActivationFunction::SQRT:
- tmp = wrapper::vinv(wrapper::vinvsqrt(vin + epsilon));
+ tmp = wrapper::vinv(wrapper::vinvsqrt(wrapper::vadd(vin, delta)));
break;
case ActivationFunction::SQUARE:
tmp = wrapper::vmul(vin, vin);