aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON
diff options
context:
space:
mode:
authorPablo Marquez Tello <pablo.tello@arm.com>2023-01-11 09:54:00 +0000
committerPablo Marquez Tello <pablo.tello@arm.com>2023-01-11 13:57:06 +0000
commit6bcdc578a388782f5ec80ec348c5dd3f5c1f8363 (patch)
tree07221f61b69faa7efb3280bf053667ef1906a470 /src/core/NEON
parent1b2f868b7b55e3e952520f0380e9174696c3ad1b (diff)
downloadComputeLibrary-6bcdc578a388782f5ec80ec348c5dd3f5c1f8363.tar.gz
Deprecated BF16 support in DepthConvert
* Removed BF16 validation tests for DepthConvert * Revert back to using inline assembly to convert to/from BF16 * Resolves COMPMID-5800 Change-Id: I803b2ad19ead297417f780c97c5b724cca6b394c Signed-off-by: Pablo Marquez Tello <pablo.tello@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8929 Reviewed-by: Jakub Sujak <jakub.sujak@arm.com> Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON')
-rw-r--r--src/core/NEON/wrapper/intrinsics/cvt.h22
1 files changed, 21 insertions, 1 deletions
diff --git a/src/core/NEON/wrapper/intrinsics/cvt.h b/src/core/NEON/wrapper/intrinsics/cvt.h
index c75d43dbf2..1c77a9e9f0 100644
--- a/src/core/NEON/wrapper/intrinsics/cvt.h
+++ b/src/core/NEON/wrapper/intrinsics/cvt.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, 2022 Arm Limited.
+ * Copyright (c) 2020, 2022-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -87,6 +87,26 @@ vcvta(const float32x4_t &a)
return vcvtaq_s32_f32(a);
}
#endif //__aarch64__
+
+#if defined(ARM_COMPUTE_ENABLE_BF16)
+/** Convert 2x128-bit floating point vectors into 1x128-bit bfloat16 vector
+ *
+ * @param[in] inptr Pointer to the input memory to load values from
+ * @param[in,out] outptr Pointer to the output memory to store values to
+ */
+inline void vcvt_bf16_f32(const float *inptr, uint16_t *outptr)
+{
+ __asm __volatile(
+ "ldp q0, q1, [%[inptr]]\n"
+ ".inst 0xea16800\n" // BFCVTN v0, v0
+ ".inst 0x4ea16820\n" // BFCVTN2 v0, v1
+ "str q0, [%[outptr]]\n"
+ : [inptr] "+r"(inptr)
+ : [outptr] "r"(outptr)
+ : "v0", "v1", "memory");
+}
+#endif /* defined(ARM_COMPUTE_ENABLE_BF16) */
+
} // namespace wrapper
} // namespace arm_compute
#endif /* ARM_COMPUTE_WRAPPER_CVT_H */