diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2021-04-22 16:42:03 +0100 |
---|---|---|
committer | Michalis Spyrou <michalis.spyrou@arm.com> | 2021-06-07 13:21:17 +0000 |
commit | bdcdc39d89b6a6556f5c0483af5379f75eae0c55 (patch) | |
tree | 454cd50afa81da3ca3382701619fef023911e3f7 /src/core/NEON | |
parent | 5a643320b79f15a5d09b5366c4744579cf71e303 (diff) | |
download | ComputeLibrary-bdcdc39d89b6a6556f5c0483af5379f75eae0c55.tar.gz |
Enable fat binary support
Changes our build system to allow building both Neon(TM) and SVE
kernels and package them in the same binary. This will allow
runtime selection of the underlying architecture.
Adds new build option, fat_binary, for enabling this feature.
Change-Id: I8e8386149773ce28e071a2fb7ddd8c8ae0f28a4a
Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5704
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON')
-rw-r--r-- | src/core/NEON/SVEMath.h | 4 | ||||
-rw-r--r-- | src/core/NEON/SVEMath.inl | 4 | ||||
-rw-r--r-- | src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp | 7 | ||||
-rw-r--r-- | src/core/NEON/kernels/batchnormalization/impl/SVE/fp16.cpp | 4 | ||||
-rw-r--r-- | src/core/NEON/kernels/batchnormalization/impl/SVE/fp32.cpp | 4 | ||||
-rw-r--r-- | src/core/NEON/wrapper/intrinsics/svpow.h | 10 | ||||
-rw-r--r-- | src/core/NEON/wrapper/svtraits.h | 4 | ||||
-rw-r--r-- | src/core/NEON/wrapper/traits.h | 8 |
8 files changed, 27 insertions, 18 deletions
diff --git a/src/core/NEON/SVEMath.h b/src/core/NEON/SVEMath.h index b73043a435..dde75e8088 100644 --- a/src/core/NEON/SVEMath.h +++ b/src/core/NEON/SVEMath.h @@ -24,7 +24,7 @@ #ifndef ARM_COMPUTE_SVEMATH_H #define ARM_COMPUTE_SVEMATH_H -#if defined(__ARM_FEATURE_SVE) +#if defined(ENABLE_SVE) #include "src/core/NEON/wrapper/intrinsics/svcvt.h" #include "src/core/NEON/wrapper/intrinsics/svdup_n.h" #include "src/core/NEON/wrapper/intrinsics/svreinterpret.h" @@ -185,5 +185,5 @@ int_vec_type convert_float_to_int(const svfloat32_t &in_0, const svfloat32_t &in } // namespace arm_compute #include "src/core/NEON/SVEMath.inl" -#endif /* defined(__ARM_FEATURE_SVE) */ +#endif /* defined(ENABLE_SVE) */ #endif /* ARM_COMPUTE_SVEMATH_H */
\ No newline at end of file diff --git a/src/core/NEON/SVEMath.inl b/src/core/NEON/SVEMath.inl index d909adfeb5..7625e5be34 100644 --- a/src/core/NEON/SVEMath.inl +++ b/src/core/NEON/SVEMath.inl @@ -24,7 +24,7 @@ #include <cmath> #include <limits> -#if defined(__ARM_FEATURE_SVE) +#if defined(__ARM_FEATURE_SVE) && defined(ENABLE_SVE) #ifndef M_PI #define M_PI (3.14159265358979323846) @@ -388,4 +388,4 @@ inline svint8_t convert_float_to_int<svint8_t>(const svfloat32_t &in_0, const sv #endif /* defined(__ARM_FEATURE_SVE2) */ } // namespace arm_compute -#endif /* defined(__ARM_FEATURE_SVE) */ +#endif /* defined(ENABLE_SVE) */ diff --git a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp index 1691943b07..92000bb2f6 100644 --- a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp @@ -63,7 +63,7 @@ struct BatchNormalizationKernel static const BatchNormalizationKernel available_kernels[] = { -#if defined(__ARM_FEATURE_SVE) +#if defined(ENABLE_SVE) { "fp16_sve_batch_normalization", [](const BatchNormalizationSelectorData & data) { return data.dt == DataType::F16; }, @@ -74,7 +74,8 @@ static const BatchNormalizationKernel available_kernels[] = [](const BatchNormalizationSelectorData & data) { return data.dt == DataType::F32; }, REGISTER_FP32_SVE(arm_compute::cpu::fp32_sve_batch_normalization) }, -#else /* !defined(__ARM_FEATURE_SVE) */ +#endif /* !defined(ENABLE_SVE) */ +#if defined(ENABLE_NEON) #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) { "fp16_neon_batch_normalization", @@ -87,7 +88,7 @@ static const BatchNormalizationKernel available_kernels[] = [](const BatchNormalizationSelectorData & data) { return data.dt == DataType::F32; }, REGISTER_FP32_NEON(arm_compute::cpu::fp32_neon_batch_normalization) }, -#endif /* !defined(__ARM_FEATURE_SVE) */ +#endif /* !defined(ENABLE_NEON) */ }; const BatchNormalizationKernel *get_implementation(const BatchNormalizationSelectorData &data) diff --git a/src/core/NEON/kernels/batchnormalization/impl/SVE/fp16.cpp b/src/core/NEON/kernels/batchnormalization/impl/SVE/fp16.cpp index 3e3e81d044..a715b9d3ee 100644 --- a/src/core/NEON/kernels/batchnormalization/impl/SVE/fp16.cpp +++ b/src/core/NEON/kernels/batchnormalization/impl/SVE/fp16.cpp @@ -29,7 +29,7 @@ #include <cmath> #include <cstddef> -#if defined(__ARM_FEATURE_SVE) +#if defined(ENABLE_SVE) #include <arm_sve.h> namespace arm_compute @@ -114,4 +114,4 @@ void fp16_sve_batch_normalization(ITensor *src, ITensor *dst, const ITensor *mea } } // namespace cpu } // namespace arm_compute -#endif // __ARM_FEATURE_SVE +#endif // ENABLE_SVE diff --git a/src/core/NEON/kernels/batchnormalization/impl/SVE/fp32.cpp b/src/core/NEON/kernels/batchnormalization/impl/SVE/fp32.cpp index b0d4cbb684..7cc570d8aa 100644 --- a/src/core/NEON/kernels/batchnormalization/impl/SVE/fp32.cpp +++ b/src/core/NEON/kernels/batchnormalization/impl/SVE/fp32.cpp @@ -29,7 +29,7 @@ #include <cmath> #include <cstddef> -#if defined(__ARM_FEATURE_SVE) +#if defined(ENABLE_SVE) #include <arm_sve.h> namespace arm_compute @@ -114,4 +114,4 @@ void fp32_sve_batch_normalization(ITensor *src, ITensor *dst, const ITensor *mea } } // namespace cpu } // namespace arm_compute -#endif // __ARM_FEATURE_SVE +#endif // ENABLE_SVE diff --git a/src/core/NEON/wrapper/intrinsics/svpow.h b/src/core/NEON/wrapper/intrinsics/svpow.h index e89a4ab8f6..0f58d758cb 100644 --- a/src/core/NEON/wrapper/intrinsics/svpow.h +++ b/src/core/NEON/wrapper/intrinsics/svpow.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 Arm Limited. + * Copyright (c) 2020-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -35,8 +35,16 @@ namespace wrapper return svpow_##postfix##_z(pg, a, b); \ } +#define SVPOW_Z_IMPL_INT(type, postfix) \ + inline type svpow_z(svbool_t pg, const type &a, const type &b) \ + { \ + ARM_COMPUTE_UNUSED(pg, a, b); \ + ARM_COMPUTE_ERROR("Not supported"); \ + } + SVPOW_Z_IMPL(svfloat32_t, f32) SVPOW_Z_IMPL(svfloat16_t, f16) +SVPOW_Z_IMPL_INT(svint16_t, s16) #undef SVPOW_Z_IMPL diff --git a/src/core/NEON/wrapper/svtraits.h b/src/core/NEON/wrapper/svtraits.h index 465983d16f..8d2d660659 100644 --- a/src/core/NEON/wrapper/svtraits.h +++ b/src/core/NEON/wrapper/svtraits.h @@ -23,7 +23,7 @@ */ #ifndef SRC_CORE_NEON_WRAPPER_SVTRAITS_H #define SRC_CORE_NEON_WRAPPER_SVTRAITS_H -#if defined(__ARM_FEATURE_SVE) +#if defined(ENABLE_SVE) #include "src/core/NEON/SVEMath.h" #include <arm_sve.h> @@ -66,5 +66,5 @@ DEFINE_TYPES(bfloat16_t) } // namespace wrapper } // namespace arm_compute -#endif /* defined(__ARM_FEATURE_SVE) */ +#endif /* defined(ENABLE_SVE) */ #endif /* #ifndef SRC_CORE_NEON_WRAPPER_SVTRAITS_H */ diff --git a/src/core/NEON/wrapper/traits.h b/src/core/NEON/wrapper/traits.h index 3452b76761..81685140f1 100644 --- a/src/core/NEON/wrapper/traits.h +++ b/src/core/NEON/wrapper/traits.h @@ -26,9 +26,9 @@ #include <arm_neon.h> -#if defined(__ARM_FEATURE_SVE) +#if defined(ENABLE_SVE) #include <arm_sve.h> -#endif /* defined(__ARM_FEATURE_SVE) */ +#endif /* defined(ENABLE_SVE) */ namespace arm_compute { @@ -116,13 +116,13 @@ template <> struct neon_bitvector<float16_t, BitWidth::W128>{ using type = float #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -#if defined(__ARM_FEATURE_SVE) +#if defined(ENABLE_SVE) /** Create the appropriate SVE vector given its type */ template <typename T> struct sve_vector; template <> struct sve_vector<uint8_t>{ using scalar_type = uint8_t; using type = svuint8_t; }; template <> struct sve_vector<int8_t>{ using scalar_type = int8_t; using type = svint8_t; }; -#endif /* defined(__ARM_FEATURE_SVE) */ +#endif /* defined(ENABLE_SVE) */ #endif /* DOXYGEN_SKIP_THIS */ |