2 files changed, 24 insertions, 7 deletions
diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
index 484e58b79b..0f416defab 100644
--- a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
+++ b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
@@ -210,10 +210,6 @@ T sqmul(T a, T b);
     {                                                                             \
         return vsubq_##TAG(a, b);                                                 \
     }                                                                             \
-    inline vec_16_byte_t<TYPE> vexp(vec_16_byte_t<TYPE> vec)                      \
-    {                                                                             \
-        return vexpq_##TAG(vec);                                                  \
-    }                                                                             \
     inline vec_16_byte_t<TYPE> vmul_n(vec_16_byte_t<TYPE> vec, TYPE val)          \
     {                                                                             \
         return vmulq_n_##TAG(vec, val);                                           \
@@ -280,6 +276,26 @@ float32x4x4_t vexp(float32x4x4_t vec)
     return res;
 }
 
+float32x4_t vexp(const float32x4_t &vec)
+{
+    return vexpq_f32(vec);
+}
+
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+// TODO (COMPMID-1535) : Revisit FP16 approximations
+float16x8_t vexp(const float16x8_t &vec)
+{
+    float16x4x2_t res =
+    {
+        {
+            vcvt_f16_f32(vexpq_f32(vcvt_f32_f16(vget_low_f16(vec)))),
+            vcvt_f16_f32(vexpq_f32(vcvt_f32_f16(vget_high_f16(vec))))
+        }
+    };
+    return vcombine_f16(res.val[0], res.val[1]);
+}
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+
 template <>
 float32x4x4_t vdup_n<float32x4x4_t>(float val)
 {
diff --git a/tests/validation/NEON/SoftmaxLayer.cpp b/tests/validation/NEON/SoftmaxLayer.cpp
index 8c0d46bc41..a5d6344423 100644
--- a/tests/validation/NEON/SoftmaxLayer.cpp
+++ b/tests/validation/NEON/SoftmaxLayer.cpp
@@ -45,7 +45,8 @@ namespace
 /** Tolerance for float operations */
 constexpr AbsoluteTolerance<float> tolerance_f32(0.000001f);
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-constexpr AbsoluteTolerance<float> tolerance_f16(0.0001f);
+constexpr RelativeTolerance<float> rel_tolerance_f16(0.1f);
+constexpr AbsoluteTolerance<float> abs_tolerance_f16(0.01f);
 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/
 
 /** Tolerance for quantized operations */
@@ -122,14 +123,14 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixture<half>, framework::Dataset
                                                                                                          framework::dataset::make("Beta", { 1.0f, 2.0f })))
 {
     // Validate output
-    validate(Accessor(_target), _reference, tolerance_f16);
+    validate(Accessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SoftmaxLayerSmallShapes(),
                                                                                                                framework::dataset::make("DataType", DataType::F16)),
                                                                                                        framework::dataset::make("Beta", { 1.0f, 2.0f })))
 {
     // Validate output
-    validate(Accessor(_target), _reference, tolerance_f16);
+    validate(Accessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
 }
 TEST_SUITE_END()
 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */