aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/NEON/wrapper/intrinsics/cvt.h
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2020-02-26 09:58:13 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2020-03-05 15:15:15 +0000
commite8291acc1d9e89c9274d31f0d5bb4779eb95588c (patch)
tree5a0fef36d6daabe387174e55b60de54557c75291 /arm_compute/core/NEON/wrapper/intrinsics/cvt.h
parentaa85cdf22802cb892d7fa422ca505a43d84adb38 (diff)
downloadComputeLibrary-e8291acc1d9e89c9274d31f0d5bb4779eb95588c.tar.gz
COMPMID-3152: Initial Bfloat16 support
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: Ie6959e37e13731c86b2ee29392a99a293450a1b4 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2824 Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
Diffstat (limited to 'arm_compute/core/NEON/wrapper/intrinsics/cvt.h')
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/cvt.h19
1 files changed, 19 insertions, 0 deletions
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/cvt.h b/arm_compute/core/NEON/wrapper/intrinsics/cvt.h
index 1f22e09a11..5ea9a5dedd 100644
--- a/arm_compute/core/NEON/wrapper/intrinsics/cvt.h
+++ b/arm_compute/core/NEON/wrapper/intrinsics/cvt.h
@@ -56,6 +56,25 @@ vcvt(const float32x4_t &a)
return vcvtq_s32_f32(a);
}
+#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16)
+/** Convert 2x128-bit floating point vectors into 1x128-bit bfloat16 vector
+ *
+ * @param[in] inptr Pointer to the input memory to load values from
+ * @param[in,out] outptr Pointer to the output memory to store values to
+ */
+inline void vcvt_bf16_f32(const float *inptr, uint16_t *outptr)
+{
+ __asm __volatile(
+ "ldp q0, q1, [%[inptr]]\n"
+ ".inst 0xea16800\n" // BFCVTN v0, v0
+ ".inst 0x4ea16820\n" // BFCVTN2 v0, v1
+ "str q0, [%[outptr]]\n"
+ : [inptr] "+r"(inptr)
+ : [outptr] "r"(outptr)
+ : "v0", "v1", "memory");
+}
+#endif /* defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) */
+
} // namespace wrapper
} // namespace arm_compute
#endif /* ARM_COMPUTE_WRAPPER_CVT_H */