From f2cdce30ca782cac7caebc43c7e67caf677b7358 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Mon, 9 Dec 2019 18:35:57 +0000 Subject: COMPMID-2841: Enable aarch32 builds Decouples the execution state from the architecture. Now architectures can be set as (armv7a, armv8a, etc) and execution state using the `estate` flag with the following options (auto, 32, 64). Change-Id: Ie7f757b3565495a39c7e20fb350a72fd9c5a2a4f Signed-off-by: Georgios Pinitas Reviewed-on: https://review.mlplatform.org/c/2438 Reviewed-by: Pablo Marquez Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- SConscript | 5 +- SConstruct | 90 +++++++++++++--------- arm_compute/core/NEON/NEMath.inl | 4 +- docs/Doxyfile | 4 +- src/core/NEON/kernels/NESelectKernel.cpp | 4 +- .../arm_gemm/merges/a32_merge_float_8x6.hpp | 4 + 6 files changed, 66 insertions(+), 45 deletions(-) diff --git a/SConscript b/SConscript index ed22f6eefe..0b7729cced 100644 --- a/SConscript +++ b/SConscript @@ -216,11 +216,10 @@ if env['neon']: graph_files += Glob('src/graph/backends/NEON/*.cpp') - if env['arch'] == "armv7a": + if env['estate'] == '32': core_files += Glob('src/core/NEON/kernels/arm_gemm/kernels/a32_*/*.cpp') - - if "arm64-v8" in env['arch']: + if env['estate'] == '64': core_files += Glob('src/core/NEON/kernels/arm_gemm/kernels/a64_*/*.cpp') if "sve" in env['arch']: core_files += Glob('src/core/NEON/kernels/arm_gemm/kernels/sve_*/*.cpp') diff --git a/SConstruct b/SConstruct index e63d33e9fd..216920f059 100644 --- a/SConstruct +++ b/SConstruct @@ -40,7 +40,9 @@ vars.AddVariables( BoolVariable("debug", "Debug", False), BoolVariable("asserts", "Enable asserts (this flag is forced to 1 for debug=1)", False), BoolVariable("logging", "Logging (this flag is forced to 1 for debug=1)", False), - EnumVariable("arch", "Target Architecture", "armv7a", allowed_values=("armv7a", "arm64-v8a", "arm64-v8.2-a", "arm64-v8.2-a-sve", "x86_32", "x86_64")), + EnumVariable("arch", "Target Architecture", "armv7a", + allowed_values=("armv7a", "arm64-v8a", "arm64-v8.2-a", "arm64-v8.2-a-sve", "x86_32", "x86_64", "armv8a", "armv8.2-a", "armv8.2-a-sve", "x86")), + EnumVariable("estate", "Execution State", "auto", allowed_values=("auto", "32", "64")), EnumVariable("os", "Target OS", "linux", allowed_values=("linux", "android", "bare_metal")), EnumVariable("build", "Build type", "cross_compile", allowed_values=("native", "cross_compile", "embed_only")), BoolVariable("examples", "Build example programs", True), @@ -164,48 +166,66 @@ if env['openmp']: env.Append(CXXFLAGS = ['-fopenmp']) env.Append(LINKFLAGS = ['-fopenmp']) +# Validate and define state +if env['estate'] == 'auto': + if 'v7a' in env['arch']: + env['estate'] = '32' + else: + env['estate'] = '64' + +# Map legacy arch +if 'arm64' in env['arch']: + env['estate'] = '64' + +if 'v7a' in env['estate'] and env['estate'] == '64': + print("ERROR: armv7a architecture has only 32-bit execution state") + Exit(1) + # Add architecture specific flags prefix = "" -if env['arch'] == 'armv7a': +if 'v7a' in env['arch']: env.Append(CXXFLAGS = ['-march=armv7-a', '-mthumb', '-mfpu=neon']) - - if env['os'] == 'linux': - prefix = "arm-linux-gnueabihf-" - env.Append(CXXFLAGS = ['-mfloat-abi=hard']) - elif env['os'] == 'bare_metal': - prefix = "arm-eabi-" - env.Append(CXXFLAGS = ['-mfloat-abi=hard']) - elif env['os'] == 'android': - prefix = "arm-linux-androideabi-" + if env['os'] == 'android': env.Append(CXXFLAGS = ['-mfloat-abi=softfp']) -elif env['arch'] == 'arm64-v8a': + else: + env.Append(CXXFLAGS = ['-mfloat-abi=hard']) +elif 'v8a' in env['arch']: env.Append(CXXFLAGS = ['-march=armv8-a']) - env.Append(CPPDEFINES = ['ARM_COMPUTE_AARCH64_V8A']) - if env['os'] == 'linux': - prefix = "aarch64-linux-gnu-" - elif env['os'] == 'bare_metal': - prefix = "aarch64-elf-" - elif env['os'] == 'android': - prefix = "aarch64-linux-android-" -elif 'arm64-v8.2-a' in env['arch']: - if env['arch'] == 'arm64-v8.2-a-sve': + if env['estate'] == '32': + env.Append(CXXFLAGS = ['-mfpu=neon-fp-armv8']) +elif 'v8.2-a' in env['arch']: + if env['estate'] == '32': + env.Append(CXXFLAGS = ['-mfpu=neon-fp-armv8']) + if 'sve' in env['arch']: env.Append(CXXFLAGS = ['-march=armv8.2-a+sve+fp16+dotprod']) else: env.Append(CXXFLAGS = ['-march=armv8.2-a+fp16']) # explicitly enable fp16 extension otherwise __ARM_FEATURE_FP16_VECTOR_ARITHMETIC is undefined - if env['os'] == 'linux': - prefix = "aarch64-linux-gnu-" - elif env['os'] == 'bare_metal': - prefix = "aarch64-elf-" - elif env['os'] == 'android': - prefix = "aarch64-linux-android-" - env.Append(CPPDEFINES = ['ARM_COMPUTE_AARCH64_V8_2']) -elif env['arch'] == 'x86_32': - env.Append(CCFLAGS = ['-m32']) - env.Append(LINKFLAGS = ['-m32']) -elif env['arch'] == 'x86_64': - env.Append(CXXFLAGS = ['-fPIC']) - env.Append(CCFLAGS = ['-m64']) - env.Append(LINKFLAGS = ['-m64']) +elif 'x86' in env['arch']: + if env['estate'] == '32': + env.Append(CCFLAGS = ['-m32']) + env.Append(LINKFLAGS = ['-m32']) + else: + env.Append(CXXFLAGS = ['-fPIC']) + env.Append(CCFLAGS = ['-m64']) + env.Append(LINKFLAGS = ['-m64']) + +# Define toolchain +prefix = "" +if 'x86' not in env['arch']: + if env['estate'] == '32': + if env['os'] == 'linux': + prefix = "arm-linux-gnueabihf-" if 'v7' in env['arch'] else "armv8l-linux-gnueabihf-" + elif env['os'] == 'bare_metal': + prefix = "arm-eabi-" + elif env['os'] == 'android': + prefix = "arm-linux-androideabi-" + elif env['estate'] == '64' and 'v8' in env['arch']: + if env['os'] == 'linux': + prefix = "aarch64-linux-gnu-" + elif env['os'] == 'bare_metal': + prefix = "aarch64-elf-" + elif env['os'] == 'android': + prefix = "aarch64-linux-android-" if env['build'] == 'native': prefix = "" diff --git a/arm_compute/core/NEON/NEMath.inl b/arm_compute/core/NEON/NEMath.inl index a3601f6a25..179f1b6299 100644 --- a/arm_compute/core/NEON/NEMath.inl +++ b/arm_compute/core/NEON/NEMath.inl @@ -434,7 +434,7 @@ inline float16x8_t vexpq_f16(float16x8_t x) const float32x4_t x_high = vcvt_f32_f16(vget_high_f16(x)); const float32x4_t x_low = vcvt_f32_f16(vget_low_f16(x)); - const float16x8_t res = vcvt_high_f16_f32(vcvt_f16_f32(vexpq_f32(x_low)), vexpq_f32(x_high)); + const float16x8_t res = vcombine_f16(vcvt_f16_f32(vexpq_f32(x_low)), vcvt_f16_f32(vexpq_f32(x_high))); return res; } @@ -444,7 +444,7 @@ inline float16x8_t vlogq_f16(float16x8_t x) const float32x4_t x_high = vcvt_f32_f16(vget_high_f16(x)); const float32x4_t x_low = vcvt_f32_f16(vget_low_f16(x)); - const float16x8_t res = vcvt_high_f16_f32(vcvt_f16_f32(vlogq_f32(x_low)), vlogq_f32(x_high)); + const float16x8_t res = vcombine_f16(vcvt_f16_f32(vlogq_f32(x_low)), vcvt_f16_f32(vlogq_f32(x_high))); return res; } diff --git a/docs/Doxyfile b/docs/Doxyfile index 835aa00a05..7f5aa5bdbe 100644 --- a/docs/Doxyfile +++ b/docs/Doxyfile @@ -2087,9 +2087,7 @@ PREDEFINED = DOXYGEN_SKIP_THIS \ LOCATE_MIN \ LOCATE_MAX \ HAS_BIAS \ - POOL_AVG \ - ARM_COMPUTE_AARCH64_V8_2 \ - ARM_COMPUTE_AARCH64_V8A + POOL_AVG # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this diff --git a/src/core/NEON/kernels/NESelectKernel.cpp b/src/core/NEON/kernels/NESelectKernel.cpp index 72afe4f054..191d182002 100644 --- a/src/core/NEON/kernels/NESelectKernel.cpp +++ b/src/core/NEON/kernels/NESelectKernel.cpp @@ -197,7 +197,7 @@ void NESelectKernel::configure(const ITensor *c, const ITensor *x, const ITensor }; #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC map_function["op_F16"] = &select_op_16; -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ } else { @@ -213,7 +213,7 @@ void NESelectKernel::configure(const ITensor *c, const ITensor *x, const ITensor }; #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC map_function["op_F16"] = &select_op_not_same_rank; -#endif /* ARM_COMPUTE_AARCH64_V8_2 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ } auto it = map_function.find(function_to_call); diff --git a/src/core/NEON/kernels/arm_gemm/merges/a32_merge_float_8x6.hpp b/src/core/NEON/kernels/arm_gemm/merges/a32_merge_float_8x6.hpp index 9409646818..16bdbb5986 100644 --- a/src/core/NEON/kernels/arm_gemm/merges/a32_merge_float_8x6.hpp +++ b/src/core/NEON/kernels/arm_gemm/merges/a32_merge_float_8x6.hpp @@ -83,12 +83,16 @@ void MergeResults<8, 6, false>(float *out, const float *in, const int ldout, con switch ((y + 5) - ymax) { case 4: outptr1 = dummyres; + /* fall through */ case 3: outptr2 = dummyres; + /* fall through */ case 2: outptr3 = dummyres; + /* fall through */ case 1: outptr4 = dummyres; + /* fall through */ case 0: outptr5 = dummyres; break; -- cgit v1.2.1