COMPMID-1534 - Fix NENormalizationLayer for FP16

Implemented vinvq_f16 with fp32 data type in order to avoid accuracy issue. Change-Id: Ibfffd12e4a941c1388a982fc7bbe3e1832351feb Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/145416 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
author: Gian Marco Iodice <gianmarco.iodice@arm.com> 2018-08-23 15:29:16 +0100
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-11-02 16:54:54 +0000
commit: f2cde9b29deee6423ea6fe9a1a9afc9ef61d2663 (patch)
tree: 8a770ff88dc93cee025c44de3c1b8d6f35b9e317
parent: 13d96e061fe3be14f9693e6761f1795a2399b249 (diff)
download: ComputeLibrary-f2cde9b29deee6423ea6fe9a1a9afc9ef61d2663.tar.gz
3 files changed, 14 insertions, 2 deletions
diff --git a/arm_compute/core/NEON/NEMath.inl b/arm_compute/core/NEON/NEMath.inl
index 61d25d115c..1ebc9c10af 100644
--- a/arm_compute/core/NEON/NEMath.inl
+++ b/arm_compute/core/NEON/NEMath.inl
@@ -303,7 +303,16 @@ inline float16x8_t vlogq_f16(float16x8_t x)
 
 inline float16x8_t vpowq_f16(float16x8_t val, float16x8_t n)
 {
-    return vexpq_f16(vmulq_f16(n, vlogq_f16(val)));
+    // TODO (giaiod01) - COMPMID-1535
+    float32x4_t n0_f32   = vcvt_f32_f16(vget_low_f16(n));
+    float32x4_t n1_f32   = vcvt_f32_f16(vget_high_f16(n));
+    float32x4_t val0_f32 = vcvt_f32_f16(vget_low_f16(val));
+    float32x4_t val1_f32 = vcvt_f32_f16(vget_high_f16(val));
+
+    float32x4_t res0_f32 = vexpq_f32(vmulq_f32(n0_f32, vlogq_f32(val0_f32)));
+    float32x4_t res1_f32 = vexpq_f32(vmulq_f32(n1_f32, vlogq_f32(val1_f32)));
+
+    return vcombine_f16(vcvt_f16_f32(res0_f32), vcvt_f16_f32(res1_f32));
 }
 #endif /* DOXYGEN_SKIP_THIS */
 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
diff --git a/src/core/NEON/kernels/NENormalizationLayerKernel.cpp b/src/core/NEON/kernels/NENormalizationLayerKernel.cpp
index 15e8298e2d..fe6b69c455 100644
--- a/src/core/NEON/kernels/NENormalizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NENormalizationLayerKernel.cpp
@@ -145,6 +145,7 @@ void NENormalizationLayerKernel::configure(const ITensor *input, const ITensor *
             }
             break;
         }
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
         case DataType::F16:
         {
             switch(norm_idx)
@@ -169,6 +170,7 @@ void NENormalizationLayerKernel::configure(const ITensor *input, const ITensor *
             }
             break;
         }
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
         default:
             ARM_COMPUTE_ERROR("NOT SUPPORTED!");
     }
diff --git a/tests/validation/fixtures/NormalizationLayerFixture.h b/tests/validation/fixtures/NormalizationLayerFixture.h
index f4f9c64944..318b77e1a7 100644
--- a/tests/validation/fixtures/NormalizationLayerFixture.h
+++ b/tests/validation/fixtures/NormalizationLayerFixture.h
@@ -59,7 +59,8 @@ protected:
     template <typename U>
     void fill(U &&tensor)
     {
-        library->fill_tensor_uniform(tensor, 0);
+        std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
+        library->fill(tensor, distribution, 0);
     }
 
     TensorType compute_target(const TensorShape &shape, NormalizationLayerInfo info, DataType data_type)
author	Gian Marco Iodice <gianmarco.iodice@arm.com>	2018-08-23 15:29:16 +0100
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-11-02 16:54:54 +0000
commit	f2cde9b29deee6423ea6fe9a1a9afc9ef61d2663 (patch)
tree	8a770ff88dc93cee025c44de3c1b8d6f35b9e317
parent	13d96e061fe3be14f9693e6761f1795a2399b249 (diff)
download	ComputeLibrary-f2cde9b29deee6423ea6fe9a1a9afc9ef61d2663.tar.gz