aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core
diff options
context:
space:
mode:
authorPablo Tello <pablo.tello@arm.com>2017-07-05 11:32:17 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-09-17 14:16:42 +0100
commit91654c45cf1de5f41127536a0fdd310c17fdfc8e (patch)
tree1cf914061c456282f0ba899ebbdc591cabc7f0fc /arm_compute/core
parentec69f93dc63408933d322ec27d0b7049b9a6e07c (diff)
downloadComputeLibrary-91654c45cf1de5f41127536a0fdd310c17fdfc8e.tar.gz
COMPMID-421: Added FP16 support in ActivationLayer.
Change-Id: I7ba573b19d56e3c87996edb5218a00e5bfca451e Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79755 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Diffstat (limited to 'arm_compute/core')
-rw-r--r--arm_compute/core/NEON/NEMath.h18
-rw-r--r--arm_compute/core/NEON/NEMath.inl23
-rw-r--r--arm_compute/core/NEON/kernels/NEActivationLayerKernel.h12
3 files changed, 53 insertions, 0 deletions
diff --git a/arm_compute/core/NEON/NEMath.h b/arm_compute/core/NEON/NEMath.h
index 8dd9d609e7..b467a600d6 100644
--- a/arm_compute/core/NEON/NEMath.h
+++ b/arm_compute/core/NEON/NEMath.h
@@ -93,6 +93,24 @@ float32x4_t vtanhq_f32(float32x4_t val);
float32x4_t vpowq_f32(float32x4_t val, float32x4_t n);
#ifdef ARM_COMPUTE_ENABLE_FP16
+/** Calculate hyperbolic tangent.
+ *
+ * tanh(x) = (e^2x - 1)/(e^2x + 1)
+ *
+ * @note We clamp x to [-5,5] to avoid overflowing issues.
+ *
+ * @param[in] val Input vector value in F32 format.
+ *
+ * @return The calculated Hyperbolic Tangent.
+ */
+float16x8_t vtanhq_f16(float16x8_t val);
+/** Calculate inverse square root.
+ *
+ * @param[in] x Input value.
+ *
+ * @return The calculated inverse square root.
+ */
+float16x8_t vinvsqrtq_f16(float16x8_t x);
/** Calculate exponential
*
* @param[in] x Input vector value in F16 format.
diff --git a/arm_compute/core/NEON/NEMath.inl b/arm_compute/core/NEON/NEMath.inl
index c73c54501f..1d90029147 100644
--- a/arm_compute/core/NEON/NEMath.inl
+++ b/arm_compute/core/NEON/NEMath.inl
@@ -172,6 +172,14 @@ const std::array<float16x8_t, 8> log_tab_f16 =
vdupq_n_f16(0.0141278216615f),
}
};
+inline float16x8_t vinvsqrtq_f16(float16x8_t x)
+{
+ float16x8_t sqrt_reciprocal = vrsqrteq_f16(x);
+ sqrt_reciprocal = vmulq_f16(vrsqrtsq_f16(vmulq_f16(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal);
+ sqrt_reciprocal = vmulq_f16(vrsqrtsq_f16(vmulq_f16(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal);
+
+ return sqrt_reciprocal;
+}
inline float16x8_t vinvq_f16(float16x8_t x)
{
@@ -181,6 +189,21 @@ inline float16x8_t vinvq_f16(float16x8_t x)
return recip;
}
+inline float16x8_t vtanhq_f16(float16x8_t val)
+{
+ const float16x8_t CONST_1 = vdupq_n_f16(1.f);
+ const float16x8_t CONST_2 = vdupq_n_f16(2.f);
+ const float16x8_t CONST_MIN_TANH = vdupq_n_f16(-10.f);
+ const float16x8_t CONST_MAX_TANH = vdupq_n_f16(10.f);
+
+ const float16x8_t x = vminq_f16(vmaxq_f16(val, CONST_MIN_TANH), CONST_MAX_TANH);
+ const float16x8_t exp2x = vexpq_f16(vmulq_f16(CONST_2, x));
+ const float16x8_t num = vsubq_f16(exp2x, CONST_1);
+ const float16x8_t den = vaddq_f16(exp2x, CONST_1);
+ const float16x8_t tanh = vmulq_f16(num, vinvq_f16(den));
+ return tanh;
+}
+
inline float16x8_t vtaylor_polyq_f16(float16x8_t x, const std::array<float16x8_t, 8> &coeffs)
{
const float16x8_t A = vaddq_f16(coeffs[0], vmulq_f16(coeffs[4], x));
diff --git a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h
index e995f1e5e0..2c88debfb4 100644
--- a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h
@@ -27,6 +27,10 @@
#include "arm_compute/core/FixedPoint.h"
#include "arm_compute/core/NEON/INEKernel.h"
+#ifdef ARM_COMPUTE_ENABLE_FP16
+#include <arm_fp16.h>
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
+
namespace arm_compute
{
class ITensor;
@@ -72,6 +76,14 @@ private:
*/
template <ActivationLayerInfo::ActivationFunction F, typename T>
typename std::enable_if<std::is_same<T, float>::value, void>::type activation(const Window &window);
+#ifdef ARM_COMPUTE_ENABLE_FP16
+ /** Function to apply an activation function on a tensor.
+ *
+ * @param[in] window Region on which to execute the kernel
+ */
+ template <ActivationLayerInfo::ActivationFunction F, typename T>
+ typename std::enable_if<std::is_same<T, float16_t>::value, void>::type activation(const Window &window);
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
/** Function to apply an activation function on a tensor.
*
* @param[in] window Region on which to execute the kernel