diff options
author | Felix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com> | 2023-09-27 17:46:17 +0100 |
---|---|---|
committer | felixjohnny.thomasmathibalan <felixjohnny.thomasmathibalan@arm.com> | 2023-09-28 12:08:05 +0000 |
commit | afd38f0c617d6f89b2b4532c6c44f116617e2b6f (patch) | |
tree | 03bc7d5a762099989b16a656fa8d397b490ed70e /src/core/NEON/wrapper/intrinsics/cvt.h | |
parent | bdcb4c148ee2fdeaaddf4cf1e57bbb0de02bb894 (diff) | |
download | ComputeLibrary-afd38f0c617d6f89b2b4532c6c44f116617e2b6f.tar.gz |
Apply clang-format on repository
Code is formatted as per a revised clang format configuration
file(not part of this delivery). Version 14.0.6 is used.
Exclusion List:
- files with .cl extension
- files that are not strictly C/C++ (e.g. Android.bp, Sconscript ...)
And the following directories
- compute_kernel_writer/validation/
- tests/
- include/
- src/core/NEON/kernels/convolution/
- src/core/NEON/kernels/arm_gemm/
- src/core/NEON/kernels/arm_conv/
- data/
There will be a follow up for formatting of .cl files and the
files under tests/ and compute_kernel_writer/validation/.
Signed-off-by: Felix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com>
Change-Id: Ib7eb1fcf4e7537b9feaefcfc15098a804a3fde0a
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10391
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Diffstat (limited to 'src/core/NEON/wrapper/intrinsics/cvt.h')
-rw-r--r-- | src/core/NEON/wrapper/intrinsics/cvt.h | 47 |
1 files changed, 21 insertions, 26 deletions
diff --git a/src/core/NEON/wrapper/intrinsics/cvt.h b/src/core/NEON/wrapper/intrinsics/cvt.h index 1c77a9e9f0..381de2284a 100644 --- a/src/core/NEON/wrapper/intrinsics/cvt.h +++ b/src/core/NEON/wrapper/intrinsics/cvt.h @@ -30,12 +30,11 @@ namespace arm_compute { namespace wrapper { -#define VCVT_TO_F32_IMPL(ptype, vtype, prefix, postfix1, postfix2) \ - template <typename T> \ - inline typename std::enable_if<std::is_same<T, float>::value, float32x4_t>::type \ - vcvt(const vtype &a) \ - { \ - return prefix##_##postfix1##_##postfix2(a); \ +#define VCVT_TO_F32_IMPL(ptype, vtype, prefix, postfix1, postfix2) \ + template <typename T> \ + inline typename std::enable_if<std::is_same<T, float>::value, float32x4_t>::type vcvt(const vtype &a) \ + { \ + return prefix##_##postfix1##_##postfix2(a); \ } VCVT_TO_F32_IMPL(float32x4_t, uint32x4_t, vcvtq, f32, u32) @@ -46,12 +45,11 @@ VCVT_TO_F32_IMPL(float32x4_t, float16x4_t, vcvt, f32, f16) #undef VCVT_TO_F32_IMPL #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -#define VCVT_TO_F16_IMPL(ptype, vtype, prefix, postfix1, postfix2) \ - template <typename T> \ - inline typename std::enable_if<std::is_same<T, float16_t>::value, float16x4_t>::type \ - vcvt(const vtype &a) \ - { \ - return prefix##_##postfix1##_##postfix2(a); \ +#define VCVT_TO_F16_IMPL(ptype, vtype, prefix, postfix1, postfix2) \ + template <typename T> \ + inline typename std::enable_if<std::is_same<T, float16_t>::value, float16x4_t>::type vcvt(const vtype &a) \ + { \ + return prefix##_##postfix1##_##postfix2(a); \ } VCVT_TO_F16_IMPL(float16x4_t, float32x4_t, vcvt, f16, f32) @@ -59,14 +57,14 @@ VCVT_TO_F16_IMPL(float16x4_t, float32x4_t, vcvt, f16, f32) #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC template <typename T> -inline typename std::enable_if < std::is_same<T, uint8_t>::value || std::is_same<T, uint32_t>::value, uint32x4_t >::type +inline typename std::enable_if<std::is_same<T, uint8_t>::value || std::is_same<T, uint32_t>::value, uint32x4_t>::type vcvt(const float32x4_t &a) { return vcvtq_u32_f32(a); } template <typename T> -inline typename std::enable_if < std::is_same<T, int8_t>::value || std::is_same<T, int32_t>::value, int32x4_t >::type +inline typename std::enable_if<std::is_same<T, int8_t>::value || std::is_same<T, int32_t>::value, int32x4_t>::type vcvt(const float32x4_t &a) { return vcvtq_s32_f32(a); @@ -74,15 +72,13 @@ vcvt(const float32x4_t &a) #ifdef __aarch64__ template <typename T> -inline typename std::enable_if<std::is_same<T, uint32_t>::value, uint32x4_t>::type -vcvta(const float32x4_t &a) +inline typename std::enable_if<std::is_same<T, uint32_t>::value, uint32x4_t>::type vcvta(const float32x4_t &a) { return vcvtaq_u32_f32(a); } template <typename T> -inline typename std::enable_if<std::is_same<T, int32_t>::value, int32x4_t>::type -vcvta(const float32x4_t &a) +inline typename std::enable_if<std::is_same<T, int32_t>::value, int32x4_t>::type vcvta(const float32x4_t &a) { return vcvtaq_s32_f32(a); } @@ -96,14 +92,13 @@ vcvta(const float32x4_t &a) */ inline void vcvt_bf16_f32(const float *inptr, uint16_t *outptr) { - __asm __volatile( - "ldp q0, q1, [%[inptr]]\n" - ".inst 0xea16800\n" // BFCVTN v0, v0 - ".inst 0x4ea16820\n" // BFCVTN2 v0, v1 - "str q0, [%[outptr]]\n" - : [inptr] "+r"(inptr) - : [outptr] "r"(outptr) - : "v0", "v1", "memory"); + __asm __volatile("ldp q0, q1, [%[inptr]]\n" + ".inst 0xea16800\n" // BFCVTN v0, v0 + ".inst 0x4ea16820\n" // BFCVTN2 v0, v1 + "str q0, [%[outptr]]\n" + : [inptr] "+r"(inptr) + : [outptr] "r"(outptr) + : "v0", "v1", "memory"); } #endif /* defined(ARM_COMPUTE_ENABLE_BF16) */ |