aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core
diff options
context:
space:
mode:
authorManuel Bottini <manuel.bottini@arm.com>2019-05-15 15:30:47 +0100
committerManuel Bottini <manuel.bottini@arm.com>2019-07-17 10:19:00 +0000
commited753266948314922ee56b0d4a3e801264011a12 (patch)
tree24c509710ed2a7082f6ccecdc7ed20c6ae314595 /arm_compute/core
parent2ea3761416aab259d9d84620dba2e011bcb5d880 (diff)
downloadComputeLibrary-ed753266948314922ee56b0d4a3e801264011a12.tar.gz
COMPMID-2283: Implement SIN operator for NEON
Change-Id: I31ee0e7c9a30540cfd2cad76993afb66abfccc4d Signed-off-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-on: https://review.mlplatform.org/c/1169 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
Diffstat (limited to 'arm_compute/core')
-rw-r--r--arm_compute/core/NEON/NEMath.h25
-rw-r--r--arm_compute/core/NEON/NEMath.inl123
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h1
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/sin.h48
4 files changed, 197 insertions, 0 deletions
diff --git a/arm_compute/core/NEON/NEMath.h b/arm_compute/core/NEON/NEMath.h
index 59a03c9d11..560abd6cdc 100644
--- a/arm_compute/core/NEON/NEMath.h
+++ b/arm_compute/core/NEON/NEMath.h
@@ -146,6 +146,22 @@ int32x4_t rounding_divide_by_pow2(int32x4_t x, int exponent);
*/
int32_t rounding_divide_by_pow2(int32_t x, int exponent);
+/** Calculate sine.
+ *
+ * @param[in] val Input vector value in radians, F32 format.
+ *
+ * @return The calculated sine.
+ */
+float32x4_t vsinq_f32(float32x4_t val);
+
+/** Calculate sine.
+ *
+ * @param[in] val Input vector value in radians, F32 format.
+ *
+ * @return The calculated sine.
+ */
+float32x2_t vsin_f32(float32x2_t val);
+
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
/** Calculate hyperbolic tangent.
*
@@ -217,6 +233,15 @@ float16x8_t vexpq_f16(float16x8_t x);
* @return The calculated power.
*/
float16x8_t vpowq_f16(float16x8_t val, float16x8_t n);
+
+/** Calculate sine.
+ *
+ * @param[in] val Input vector value in radians, F16 format.
+ *
+ * @return The calculated sine.
+ */
+float16x8_t vsinq_f16(float16x8_t val);
+
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
} // namespace arm_compute
#include "arm_compute/core/NEON/NEMath.inl"
diff --git a/arm_compute/core/NEON/NEMath.inl b/arm_compute/core/NEON/NEMath.inl
index 2247c14f47..eebcdf864f 100644
--- a/arm_compute/core/NEON/NEMath.inl
+++ b/arm_compute/core/NEON/NEMath.inl
@@ -22,6 +22,8 @@
* SOFTWARE.
*/
+#include <cmath>
+
namespace arm_compute
{
/** Exponent polynomial coefficients */
@@ -54,6 +56,12 @@ const std::array<float32x4_t, 8> log_tab =
}
};
+/** Sin polynomial coefficients */
+constexpr float te_sin_coeff2 = 0.166666666666f; // 1/(2*3)
+constexpr float te_sin_coeff3 = 0.05f; // 1/(4*5)
+constexpr float te_sin_coeff4 = 0.023809523810f; // 1/(6*7)
+constexpr float te_sin_coeff5 = 0.013888888889f; // 1/(8*9)
+
#ifndef DOXYGEN_SKIP_THIS
inline float32x4_t vfloorq_f32(float32x4_t val)
{
@@ -190,6 +198,97 @@ inline float32x4_t vpowq_f32(float32x4_t val, float32x4_t n)
{
return vexpq_f32(vmulq_f32(n, vlogq_f32(val)));
}
+
+inline float32x4_t vsinq_f32(float32x4_t val)
+{
+ const float32x4_t pi_v = vdupq_n_f32(M_PI);
+ const float32x4_t pio2_v = vdupq_n_f32(M_PI / 2);
+ const float32x4_t ipi_v = vdupq_n_f32(1 / M_PI);
+
+ //Find positive or negative
+ const int32x4_t c_v = vabsq_s32(vcvtq_s32_f32(vmulq_f32(val, ipi_v)));
+ const uint32x4_t sign_v = vcleq_f32(val, vdupq_n_f32(0));
+ const uint32x4_t odd_v = vandq_u32(vreinterpretq_u32_s32(c_v), vdupq_n_u32(1));
+
+ uint32x4_t neg_v = veorq_u32(odd_v, sign_v);
+
+ //Modulus a - (n * int(a*(1/n)))
+ float32x4_t ma = vsubq_f32(vabsq_f32(val), vmulq_f32(pi_v, vcvtq_f32_s32(c_v)));
+ const uint32x4_t reb_v = vcgeq_f32(ma, pio2_v);
+
+ //Rebase a between 0 and pi/2
+ ma = vbslq_f32(reb_v, vsubq_f32(pi_v, ma), ma);
+
+ //Taylor series
+ const float32x4_t ma2 = vmulq_f32(ma, ma);
+
+ //2nd elem: x^3 / 3!
+ float32x4_t elem = vmulq_f32(vmulq_f32(ma, ma2), vdupq_n_f32(te_sin_coeff2));
+ float32x4_t res = vsubq_f32(ma, elem);
+
+ //3rd elem: x^5 / 5!
+ elem = vmulq_f32(vmulq_f32(elem, ma2), vdupq_n_f32(te_sin_coeff3));
+ res = vaddq_f32(res, elem);
+
+ //4th elem: x^7 / 7!float32x2_t vsin_f32(float32x2_t val)
+ elem = vmulq_f32(vmulq_f32(elem, ma2), vdupq_n_f32(te_sin_coeff4));
+ res = vsubq_f32(res, elem);
+
+ //5th elem: x^9 / 9!
+ elem = vmulq_f32(vmulq_f32(elem, ma2), vdupq_n_f32(te_sin_coeff5));
+ res = vaddq_f32(res, elem);
+
+ //Change of sign
+ neg_v = vshlq_n_u32(neg_v, 31);
+ res = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(res), neg_v));
+ return res;
+}
+
+inline float32x2_t vsin_f32(float32x2_t val)
+{
+ const float32x2_t pi_v = vdup_n_f32(M_PI);
+ const float32x2_t pio2_v = vdup_n_f32(M_PI / 2);
+ const float32x2_t ipi_v = vdup_n_f32(1 / M_PI);
+
+ //Find positive or negative
+ const int32x2_t c_v = vabs_s32(vcvt_s32_f32(vmul_f32(val, ipi_v)));
+ const uint32x2_t sign_v = vcle_f32(val, vdup_n_f32(0));
+ const uint32x2_t odd_v = vand_u32(vreinterpret_u32_s32(c_v), vdup_n_u32(1));
+
+ uint32x2_t neg_v = veor_u32(odd_v, sign_v);
+
+ //Modulus a - (n * int(a*(1/n)))
+ float32x2_t ma = vsub_f32(vabs_f32(val), vmul_f32(pi_v, vcvt_f32_s32(c_v)));
+ const uint32x2_t reb_v = vcge_f32(ma, pio2_v);
+
+ //Rebase a between 0 and pi/2
+ ma = vbsl_f32(reb_v, vsub_f32(pi_v, ma), ma);
+
+ //Taylor series
+ const float32x2_t ma2 = vmul_f32(ma, ma);
+
+ //2nd elem: x^3 / 3!
+ float32x2_t elem = vmul_f32(vmul_f32(ma, ma2), vdup_n_f32(te_sin_coeff2));
+ float32x2_t res = vsub_f32(ma, elem);
+
+ //3rd elem: x^5 / 5!
+ elem = vmul_f32(vmul_f32(elem, ma2), vdup_n_f32(te_sin_coeff3));
+ res = vadd_f32(res, elem);
+
+ //4th elem: x^7 / 7!float32x2_t vsin_f32(float32x2_t val)
+ elem = vmul_f32(vmul_f32(elem, ma2), vdup_n_f32(te_sin_coeff4));
+ res = vsub_f32(res, elem);
+
+ //5th elem: x^9 / 9!
+ elem = vmul_f32(vmul_f32(elem, ma2), vdup_n_f32(te_sin_coeff5));
+ res = vadd_f32(res, elem);
+
+ //Change of sign
+ neg_v = vshl_n_u32(neg_v, 31);
+ res = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(res), neg_v));
+ return res;
+}
+
#endif /* DOXYGEN_SKIP_THIS */
inline int32x4_t rounding_divide_by_pow2(int32x4_t x, int exponent)
@@ -318,6 +417,30 @@ inline float16x8_t vpowq_f16(float16x8_t val, float16x8_t n)
return vcombine_f16(vcvt_f16_f32(res0_f32), vcvt_f16_f32(res1_f32));
}
+
+inline float16x8_t vsinq_f16(float16x8_t val)
+{
+ const float32x4_t val_high = vcvt_f32_f16(vget_high_f16(val));
+ const float32x4_t val_low = vcvt_f32_f16(vget_low_f16(val));
+
+ const float32x4_t res_high = vsinq_f32(val_high);
+ const float32x4_t res_low = vsinq_f32(val_low);
+
+ return vcombine_f16(vcvt_f16_f32(res_low), vcvt_f16_f32(res_high));
+}
+
+inline float16x4_t vsin_f16(float16x4_t val)
+{
+ const float32x4_t val_f32 = vcvt_f32_f16(val);
+ const float32x2_t val_high = vget_high_f32(val_f32);
+ const float32x2_t val_low = vget_low_f32(val_f32);
+
+ const float32x2_t res_high = vsin_f32(val_high);
+ const float32x2_t res_low = vsin_f32(val_low);
+
+ return vcvt_f16_f32(vcombine_f32(res_low, res_high));
+}
+
#endif /* DOXYGEN_SKIP_THIS */
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
} // namespace arm_compute
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h b/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h
index c9dbb2fa81..0362ca125f 100644
--- a/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h
+++ b/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h
@@ -59,6 +59,7 @@
#include "arm_compute/core/NEON/wrapper/intrinsics/rev64.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/round.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/setlane.h"
+#include "arm_compute/core/NEON/wrapper/intrinsics/sin.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/store.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/sub.h"
#include "arm_compute/core/NEON/wrapper/intrinsics/tanh.h"
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/sin.h b/arm_compute/core/NEON/wrapper/intrinsics/sin.h
new file mode 100644
index 0000000000..da98876e11
--- /dev/null
+++ b/arm_compute/core/NEON/wrapper/intrinsics/sin.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_WRAPPER_SIN_H__
+#define __ARM_COMPUTE_WRAPPER_SIN_H__
+
+#include "arm_compute/core/NEON/NEMath.h"
+#include <arm_neon.h>
+
+namespace arm_compute
+{
+namespace wrapper
+{
+#define VSIN_IMPL(vtype, prefix, postfix) \
+ inline vtype vsin(const vtype &a) \
+ { \
+ return prefix##_##postfix(a); \
+ }
+
+VSIN_IMPL(float32x4_t, vsinq, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VSIN_IMPL(float16x8_t, vsinq, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+#undef vsub_IMPL
+} // namespace wrapper
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_WRAPPER_SUB_H__ */ \ No newline at end of file