From a32e2aef81cfcba9f5ae1770ceeb4a8d26fdc1f4 Mon Sep 17 00:00:00 2001
From: SiCong Li <sicong.li@arm.com>
Date: Mon, 8 Jun 2020 17:30:51 +0100
Subject: COMPMID-3523: Fix validation fails on armv8.2-a

* Fix neon sqrt activation delta(epsilon)
* Fix NEON Hard Swish validation tolerance
* Fix NEON FP16 LogSoftmaxLayer validation test typo
* Raise NEON reduction (sum) f16 tolerance

Change-Id: Ia33d69ce5f0b78be1893fb8e13d2761a8e7fceff
Signed-off-by: SiCong Li <sicong.li@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3318
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
---
 src/core/NEON/kernels/NEActivationLayerKernel.cpp | 9 +++++++--
 tests/validation/NEON/ActivationLayer.cpp         | 2 ++
 tests/validation/NEON/LogSoftmaxLayer.cpp         | 6 +++---
 tests/validation/NEON/ReductionOperation.cpp      | 2 +-
 4 files changed, 13 insertions(+), 6 deletions(-)
diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
index 8e91e6b4d1..ffbfd710f9 100644
--- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
@@ -265,7 +265,12 @@ NEActivationLayerKernel::activation(const Window &window)
     Iterator input(_input, win_collapsed);
     Iterator output(_output, win_collapsed);
 
-    const auto epsilon     = wrapper::vdup_n(static_cast<T>(1e-24), ExactTagType{});
+    // A small delta added to the input to prevent NAN values caused by zeros in inputs to SQRT
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+    const auto delta = wrapper::vdup_n(static_cast<T>(1e-7), ExactTagType{});
+#else  /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+    const auto delta = wrapper::vdup_n(static_cast<T>(1e-24), ExactTagType{});
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
     const auto const_1     = wrapper::vdup_n(static_cast<T>(1.f), ExactTagType{});
     const auto const_0     = wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{});
     const auto const_6     = wrapper::vdup_n(static_cast<T>(6.f), ExactTagType{});
@@ -318,7 +323,7 @@ NEActivationLayerKernel::activation(const Window &window)
                     tmp = wrapper::vbsl(wrapper::vcge(vin, const_0), vin, wrapper::vmul(va, wrapper::vsub(wrapper::vexpq(vin), const_1)));
                     break;
                 case ActivationFunction::SQRT:
-                    tmp = wrapper::vinv(wrapper::vinvsqrt(vin + epsilon));
+                    tmp = wrapper::vinv(wrapper::vinvsqrt(wrapper::vadd(vin, delta)));
                     break;
                 case ActivationFunction::SQUARE:
                     tmp = wrapper::vmul(vin, vin);
diff --git a/tests/validation/NEON/ActivationLayer.cpp b/tests/validation/NEON/ActivationLayer.cpp
index 063bfaa2cd..e3a8db167c 100644
--- a/tests/validation/NEON/ActivationLayer.cpp
+++ b/tests/validation/NEON/ActivationLayer.cpp
@@ -60,6 +60,7 @@ RelativeTolerance<float> relative_tolerance(DataType data_type, ActivationLayerI
         case ActivationLayerInfo::ActivationFunction::ELU:
         case ActivationLayerInfo::ActivationFunction::SQRT:
         case ActivationLayerInfo::ActivationFunction::TANH:
+        case ActivationLayerInfo::ActivationFunction::HARD_SWISH:
             switch(data_type)
             {
                 case DataType::F16:
@@ -87,6 +88,7 @@ AbsoluteTolerance<float> absolute_tolerance(DataType data_type, ActivationLayerI
         case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
         case ActivationLayerInfo::ActivationFunction::SQRT:
         case ActivationLayerInfo::ActivationFunction::TANH:
+        case ActivationLayerInfo::ActivationFunction::HARD_SWISH:
             switch(data_type)
             {
                 case DataType::F16:
diff --git a/tests/validation/NEON/LogSoftmaxLayer.cpp b/tests/validation/NEON/LogSoftmaxLayer.cpp
index e35c8fd8a2..43e98ae4ab 100644
--- a/tests/validation/NEON/LogSoftmaxLayer.cpp
+++ b/tests/validation/NEON/LogSoftmaxLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -77,12 +77,12 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NELogSoftmaxLayerFixture<half>, framework::Data
     validate(Accessor(_target), _reference, tolerance_f16);
 }
 FIXTURE_DATA_TEST_CASE(RunSmall4D, NELogSoftmaxLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small4DShapes(),
-                                                                                                                      framework::dataset::make("DataType", DataType::F32)),
+                                                                                                                      framework::dataset::make("DataType", DataType::F16)),
                                                                                                                       framework::dataset::make("Beta", { 1.0f, 2.0f })),
                                                                                                               framework::dataset::make("Axis", { 1, 2, 3 })))
 {
     // Validate output
-    validate(Accessor(_target), _reference, tolerance_f32);
+    validate(Accessor(_target), _reference, tolerance_f16);
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NELogSoftmaxLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SoftmaxLayerLargeShapes(),
                                                                                                                   framework::dataset::make("DataType", DataType::F16)),
diff --git a/tests/validation/NEON/ReductionOperation.cpp b/tests/validation/NEON/ReductionOperation.cpp
index f155e97d0d..4c99daab77 100644
--- a/tests/validation/NEON/ReductionOperation.cpp
+++ b/tests/validation/NEON/ReductionOperation.cpp
@@ -46,7 +46,7 @@ namespace
 AbsoluteTolerance<float> tolerance_f32(0.0001f);
 RelativeTolerance<float> rel_tolerance_f32(0.0001f);
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-AbsoluteTolerance<float> tolerance_f16(0.1f);
+AbsoluteTolerance<float> tolerance_f16(0.2f);
 RelativeTolerance<float> rel_tolerance_f16(0.1f);
 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 /** Tolerance for quantized operations */
-- 
cgit v1.2.1