From 9f7aca97f41a1aa683141f14f19f605f122f7561 Mon Sep 17 00:00:00 2001 From: Pablo Marquez Tello Date: Wed, 16 Aug 2023 15:21:44 +0100 Subject: Changes to enable FP16 in armv8a multi_isa * This is the initial patch to start working on enabling fp16 in all multi_isa builds. More changes are required in the way we register the kernels using the macro REGISTER_FP16_NEON. * In this patch we add the capability to build the fp16 files in listed in filelist.json with the correct arch option to enable FP16 * This patch is required towards building an universal multi_isa binary where fp16 is enable. * Enable REGISTER_FP16_NEON macro for all builds by removing __ARM_FEATURE_FP16_VECTOR_ARITHMETIC guard from the macro definition. The macro has to be used across all types of builds. Change-Id: I99f4c273f6ee04cad3c097e5e374200f48568fa9 Signed-off-by: Pablo Marquez Tello Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10682 Tested-by: Arm Jenkins Reviewed-by: Jakub Sujak Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- SConscript | 101 +++++++++++++++------ SConstruct | 20 ++-- docs/user_guide/release_version_and_change_log.dox | 2 + src/core/common/Registrars.h | 12 +-- 4 files changed, 93 insertions(+), 42 deletions(-) diff --git a/SConscript b/SConscript index 9069df901b..31e7a5b4f4 100644 --- a/SConscript +++ b/SConscript @@ -82,7 +82,7 @@ def build_obj_list(arch_info, sources, static=False): # A list of static objects # A list of shared objects -def build_lib_objects(): +def build_multiisa_lib_objects(): lib_static_objs = [] # static objects lib_shared_objs = [] # shared objects @@ -93,20 +93,30 @@ def build_lib_objects(): # Build all the common files for the base architecture if env['arch'] == 'armv8a': - lib_static_objs += build_obj_list(filedefs["armv8-a"], lib_files, static=True) - lib_shared_objs += build_obj_list(filedefs["armv8-a"], lib_files, static=False) + lib_static_objs += build_obj_list(filedefs["armv8-a"], misa_lib_files, static=True) + lib_shared_objs += build_obj_list(filedefs["armv8-a"], misa_lib_files, static=False) else: - lib_static_objs += build_obj_list(filedefs["armv8.2-a"], lib_files, static=True) - lib_shared_objs += build_obj_list(filedefs["armv8.2-a"], lib_files, static=False) + lib_static_objs += build_obj_list(filedefs["armv8.2-a"], misa_lib_files, static=True) + lib_shared_objs += build_obj_list(filedefs["armv8.2-a"], misa_lib_files, static=False) + + # Build the FP16 specific files + lib_static_objs += build_obj_list(filedefs["armv8.2-a"], misa_lib_files_neon_fp16, static=True) + lib_shared_objs += build_obj_list(filedefs["armv8.2-a"], misa_lib_files_neon_fp16, static=False) # Build the SVE specific files - lib_static_objs += build_obj_list(filedefs["armv8.2-a-sve"], lib_files_sve, static=True) - lib_shared_objs += build_obj_list(filedefs["armv8.2-a-sve"], lib_files_sve, static=False) + lib_static_objs += build_obj_list(filedefs["armv8.2-a-sve"], misa_lib_files_sve, static=True) + lib_shared_objs += build_obj_list(filedefs["armv8.2-a-sve"], misa_lib_files_sve, static=False) + lib_static_objs += build_obj_list(filedefs["armv8.2-a-sve"], misa_lib_files_sve_fp16, static=True) + lib_shared_objs += build_obj_list(filedefs["armv8.2-a-sve"], misa_lib_files_sve_fp16, static=False) + # Build the SVE2 specific files arm_compute_env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_SVE2']) - lib_static_objs += build_obj_list(filedefs["armv8.6-a-sve2"], lib_files_sve2, static=True) - lib_shared_objs += build_obj_list(filedefs["armv8.6-a-sve2"], lib_files_sve2, static=False) + lib_static_objs += build_obj_list(filedefs["armv8.6-a-sve2"], misa_lib_files_sve2, static=True) + lib_shared_objs += build_obj_list(filedefs["armv8.6-a-sve2"], misa_lib_files_sve2, static=False) + lib_static_objs += build_obj_list(filedefs["armv8.6-a-sve2"], misa_lib_files_sve2_fp16, static=True) + lib_shared_objs += build_obj_list(filedefs["armv8.6-a-sve2"], misa_lib_files_sve2_fp16, static=False) + return lib_static_objs, lib_shared_objs @@ -284,29 +294,29 @@ def get_attrs_list(env, data_types, data_layouts): return attrs -def get_operator_backend_files(filelist, operators, backend='', techs=[], attrs=[]): +def get_operator_backend_files(filelist, operators, backend='', techs=[], attrs=[], include_common=True): files = { "common" : [] } - # Early return if filelist is empty if backend not in filelist: return files - # Iterate over operators and create the file lists to compiler for operator in operators: if operator in filelist[backend]['operators']: - files['common'] += filelist[backend]['operators'][operator]["files"]["common"] + if include_common : + files['common'] += filelist[backend]['operators'][operator]["files"]["common"] for tech in techs: if tech in filelist[backend]['operators'][operator]["files"]: # Add tech as a key to dictionary if not there if tech not in files: files[tech] = [] - # Add tech files to the tech file list tech_files = filelist[backend]['operators'][operator]["files"][tech] - files[tech] += tech_files.get('common', []) + if include_common: + files[tech] += tech_files.get('common', []) for attr in attrs: files[tech] += tech_files.get(attr, []) + # Remove duplicates if they exist return {k: list(set(v)) for k,v in files.items()} @@ -608,6 +618,17 @@ if env['opencl']: lib_files_sve = [] lib_files_sve2 = [] +# the variables below are used for the multi_isa builds +# please note that the variables names without the _fp16 suffix +# do not hold any fp16 files. + +misa_lib_files = lib_files +misa_lib_files_sve = [] +misa_lib_files_sve2 = [] +misa_lib_files_neon_fp16 = [] +misa_lib_files_sve_fp16 = [] +misa_lib_files_sve2_fp16 = [] + if env['neon']: # build winograd/depthwise sources for either v7a / v8a arm_compute_env.Append(CPPPATH = ["src/core/NEON/kernels/arm_gemm", @@ -620,8 +641,6 @@ if env['neon']: "arm_compute/core/NEON/kernels/assembly/", "src/cpu/kernels/assembly/"]) - lib_files += filelist['cpu']['common'] - # Setup SIMD file list to include simd = ['neon'] if env['multi_isa']: @@ -636,7 +655,6 @@ if env['neon']: else: attrs = get_attrs_list(env, env['data_type_support'], env['data_layout_support']) - if env['fixed_format_kernels']: attrs.append("fixed_format_kernels") @@ -644,19 +662,46 @@ if env['neon']: cpu_operators = custom_operators if use_custom_ops else filelist['cpu']['operators'].keys() cpu_ops_to_build = resolve_operator_dependencies(filelist, cpu_operators, 'cpu') - cpu_files = get_operator_backend_files(filelist, cpu_ops_to_build, 'cpu', simd, attrs) + if env['multi_isa']: + misa_lib_files += filelist['cpu']['common'] + + # For multi_isa builds we need to build fp16 files for armv8.2-a+fp16 so we filter them out of cpu_files removing the attribute fp16 + attrs.remove('fp16') + cpu_files = get_operator_backend_files(filelist, cpu_ops_to_build, 'cpu', simd, attrs) + + # Shared among ALL CPU files + misa_lib_files += cpu_files.get('common', []) + + # Arm® Neon™ specific files + misa_lib_files += cpu_files.get('neon', []) + + # Get all the fp16 files + fp16_cpu_files = get_operator_backend_files(filelist, cpu_ops_to_build, 'cpu', simd, ['fp16'],False) + + misa_lib_files_neon_fp16 = fp16_cpu_files.get('neon',[]) + misa_lib_files_sve_fp16 = fp16_cpu_files.get('sve',[]) + misa_lib_files_sve2_fp16 = fp16_cpu_files.get('sve2',[]) + + # SVE files only minus FP16 + misa_lib_files_sve = cpu_files.get('sve', []) + + # SVE2 files only minus FP16 + misa_lib_files_sve2 = cpu_files.get('sve2', []) + else: + lib_files += filelist['cpu']['common'] + + # Non multi_isa build + cpu_files = get_operator_backend_files(filelist, cpu_ops_to_build, 'cpu', simd, attrs) - # Shared among ALL CPU files - lib_files += cpu_files.get('common', []) + # Shared among ALL CPU files + lib_files += cpu_files.get('common', []) - # Arm® Neon™ specific files - lib_files += cpu_files.get('neon', []) + # Arm® Neon™ specific files + lib_files += cpu_files.get('neon', []) - # SVE files only - lib_files_sve = cpu_files.get('sve', []) + lib_files_sve = cpu_files.get('sve', []) - # SVE2 files only - lib_files_sve2 = cpu_files.get('sve2', []) + lib_files_sve2 = cpu_files.get('sve2', []) graph_files += Glob('src/graph/backends/NEON/*.cpp') @@ -674,7 +719,7 @@ Export('bootcode_o') if (env['multi_isa']): - lib_static_objs, lib_shared_objs = build_lib_objects() + lib_static_objs, lib_shared_objs = build_multiisa_lib_objects() # STATIC library build. diff --git a/SConstruct b/SConstruct index 68c518a4a0..3eee4c0561 100644 --- a/SConstruct +++ b/SConstruct @@ -62,8 +62,14 @@ def read_build_config_json(build_config): def update_data_type_layout_flags(env, data_types, data_layouts): # Manage data-types - if any(i in data_types for i in ['all', 'fp16']): - env.Append(CXXFLAGS = ['-DENABLE_FP16_KERNELS']) + if env['multi_isa']: + if any(i in data_types for i in ['all', 'fp16']): + env.Append(CXXFLAGS = ['-DENABLE_FP16_KERNELS']) + else: + if not 'v8a' in env['arch'] and not 'v7a' in env['arch']: + if any(i in data_types for i in ['all', 'fp16']): + env.Append(CXXFLAGS = ['-DENABLE_FP16_KERNELS']) + if any(i in data_types for i in ['all', 'fp32']): env.Append(CXXFLAGS = ['-DENABLE_FP32_KERNELS']) if any(i in data_types for i in ['all', 'qasymm8']): @@ -112,7 +118,7 @@ vars.AddVariables( BoolVariable("exceptions", "Enable/disable C++ exception support", True), BoolVariable("high_priority", "Generate a library containing only the high priority operators", False), PathVariable("linker_script", "Use an external linker script", "", PathVariable.PathAccept), - PathVariable("external_tests_dir", """Add examples, benchmarks and tests to the tests suite from an external path. In order to use this option, the external tests directory must have the following structure: + PathVariable("external_tests_dir", """Add examples, benchmarks and tests to the tests suite from an external path. In order to use this option, the external tests directory must have the following structure: EXTERNAL_TESTS_DIR: └── tests ├── benchmark @@ -240,7 +246,6 @@ env.Append(CXXFLAGS = ['-DARCH_ARM', if not 'windows' in env['os']: env.Append(CXXFLAGS = ['-Wall','-std=c++14', '-pedantic' ]) -env.Append(CPPDEFINES = ['_GLIBCXX_USE_NANOSLEEP']) cpp_tool = {'linux': 'g++', 'android' : 'clang++', 'tizen': 'g++', 'macos':'clang++', @@ -312,8 +317,7 @@ if env['multi_isa']: Exit(1) if 'v8a' in env['arch']: - print("INFO: multi_isa armv8-a architecture build doesn't enable __ARM_FEATURE_FP16_VECTOR_ARITHMETIC. Use armv8.2-a or beyond to enable FP16 vector arithmetic support") - env.Append(CXXFLAGS = ['-march=armv8-a']) # note: this will disable fp16 extension __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + env.Append(CXXFLAGS = ['-march=armv8-a']) else: if 'v8.6-a' in env['arch']: if "disable_mmla_fp" not in env['custom_options']: @@ -536,7 +540,7 @@ if env['standalone']: if not 'windows' in env['os']: env.Append(CXXFLAGS = ['-fPIC']) env.Append(LINKFLAGS = ['-static-libgcc','-static-libstdc++']) - + if env['Werror']: env.Append(CXXFLAGS = ['-Werror']) @@ -597,7 +601,7 @@ if env['debug']: else: env.Append(CXXFLAGS = ['-Z7','-MTd','-fms-compatibility','-fdelayed-template-parsing']) env.Append(LINKFLAGS = ['-DEBUG']) - + env.Append(CPPDEFINES = ['ARM_COMPUTE_DEBUG_ENABLED']) else: if not 'windows' in env['os']: diff --git a/docs/user_guide/release_version_and_change_log.dox b/docs/user_guide/release_version_and_change_log.dox index 6d27ae31ad..13f4e9ea2a 100644 --- a/docs/user_guide/release_version_and_change_log.dox +++ b/docs/user_guide/release_version_and_change_log.dox @@ -44,6 +44,8 @@ If there is more than one release in a month then an extra sequential number is v24.01 Public major release - Remove the legacy 'libarm_compute_core' library. This library is an artifact of Compute Library's legacy library architecture and no longer serves any purpose. You should link only to the main `libarm_compute` library for core functionality. + - New features + - Add support for FP16 in all multi_isa builds. v23.11 Public major release - New features diff --git a/src/core/common/Registrars.h b/src/core/common/Registrars.h index 686304b8d7..50b3fc1284 100644 --- a/src/core/common/Registrars.h +++ b/src/core/common/Registrars.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022 Arm Limited. + * Copyright (c) 2020-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_CORE_COMMON_REGISTRARS_H -#define SRC_CORE_COMMON_REGISTRARS_H +#ifndef ACL_SRC_CORE_COMMON_REGISTRARS_H +#define ACL_SRC_CORE_COMMON_REGISTRARS_H #if defined(ENABLE_FP16_KERNELS) @@ -38,11 +38,11 @@ #define REGISTER_FP16_SVE2(func_name) nullptr #endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ -#if defined(ARM_COMPUTE_ENABLE_NEON) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +#if defined(ARM_COMPUTE_ENABLE_NEON) #define REGISTER_FP16_NEON(func_name) &(func_name) #else /* !defined(ARM_COMPUTE_ENABLE_NEON) */ #define REGISTER_FP16_NEON(func_name) nullptr -#endif /* defined(ARM_COMPUTE_ENABLE_NEON) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */ +#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */ #else /* !defined(ENABLE_FP16_KERNELS) */ #define REGISTER_FP16_NEON(func_name) nullptr @@ -179,4 +179,4 @@ #define REGISTER_BF16_NEON(func_name) nullptr #endif /* defined(ARM_COMPUTE_ENABLE_BF16)*/ -#endif /* SRC_CORE_COMMON_REGISTRARS_H */ +#endif // ACL_SRC_CORE_COMMON_REGISTRARS_H -- cgit v1.2.1