aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-08-23 13:38:59 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:54 +0000
commit31fa0d6b52fc8c189e559fe1525b61e55f6494de (patch)
tree6b15328f03de2898f3129df047793b9213214e6d /src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
parent3463a8b9eed57340366743340d2d06df3aa1ae88 (diff)
downloadComputeLibrary-31fa0d6b52fc8c189e559fe1525b61e55f6494de.tar.gz
COMPMID-1534: Fix NESoftmaxLayer for FP16
Simulates exp function in FP32 Change-Id: Ieffceeab64fda6f466f212b56f794cc44d477afa Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/145367 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NESoftmaxLayerKernel.cpp')
-rw-r--r--src/core/NEON/kernels/NESoftmaxLayerKernel.cpp24
1 files changed, 20 insertions, 4 deletions
diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
index 484e58b79b..0f416defab 100644
--- a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
+++ b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
@@ -210,10 +210,6 @@ T sqmul(T a, T b);
{ \
return vsubq_##TAG(a, b); \
} \
- inline vec_16_byte_t<TYPE> vexp(vec_16_byte_t<TYPE> vec) \
- { \
- return vexpq_##TAG(vec); \
- } \
inline vec_16_byte_t<TYPE> vmul_n(vec_16_byte_t<TYPE> vec, TYPE val) \
{ \
return vmulq_n_##TAG(vec, val); \
@@ -280,6 +276,26 @@ float32x4x4_t vexp(float32x4x4_t vec)
return res;
}
+float32x4_t vexp(const float32x4_t &vec)
+{
+ return vexpq_f32(vec);
+}
+
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+// TODO (COMPMID-1535) : Revisit FP16 approximations
+float16x8_t vexp(const float16x8_t &vec)
+{
+ float16x4x2_t res =
+ {
+ {
+ vcvt_f16_f32(vexpq_f32(vcvt_f32_f16(vget_low_f16(vec)))),
+ vcvt_f16_f32(vexpq_f32(vcvt_f32_f16(vget_high_f16(vec))))
+ }
+ };
+ return vcombine_f16(res.val[0], res.val[1]);
+}
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+
template <>
float32x4x4_t vdup_n<float32x4x4_t>(float val)
{