aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPablo Marquez Tello <pablo.tello@arm.com>2023-08-16 15:21:44 +0100
committerPablo Marquez Tello <pablo.tello@arm.com>2023-11-28 10:15:44 +0000
commit9f7aca97f41a1aa683141f14f19f605f122f7561 (patch)
treebac1b5abd9b5324d8d2f9583e41660a81c398597
parent8d4cdd43a74574e0f99f83f1adb1d391c0c85abe (diff)
downloadComputeLibrary-9f7aca97f41a1aa683141f14f19f605f122f7561.tar.gz
Changes to enable FP16 in armv8a multi_isa
* This is the initial patch to start working on enabling fp16 in all multi_isa builds. More changes are required in the way we register the kernels using the macro REGISTER_FP16_NEON. * In this patch we add the capability to build the fp16 files in listed in filelist.json with the correct arch option to enable FP16 * This patch is required towards building an universal multi_isa binary where fp16 is enable. * Enable REGISTER_FP16_NEON macro for all builds by removing __ARM_FEATURE_FP16_VECTOR_ARITHMETIC guard from the macro definition. The macro has to be used across all types of builds. Change-Id: I99f4c273f6ee04cad3c097e5e374200f48568fa9 Signed-off-by: Pablo Marquez Tello <pablo.tello@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10682 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Jakub Sujak <jakub.sujak@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--SConscript101
-rw-r--r--SConstruct20
-rw-r--r--docs/user_guide/release_version_and_change_log.dox2
-rw-r--r--src/core/common/Registrars.h12
4 files changed, 93 insertions, 42 deletions
diff --git a/SConscript b/SConscript
index 9069df901b..31e7a5b4f4 100644
--- a/SConscript
+++ b/SConscript
@@ -82,7 +82,7 @@ def build_obj_list(arch_info, sources, static=False):
# A list of static objects
# A list of shared objects
-def build_lib_objects():
+def build_multiisa_lib_objects():
lib_static_objs = [] # static objects
lib_shared_objs = [] # shared objects
@@ -93,20 +93,30 @@ def build_lib_objects():
# Build all the common files for the base architecture
if env['arch'] == 'armv8a':
- lib_static_objs += build_obj_list(filedefs["armv8-a"], lib_files, static=True)
- lib_shared_objs += build_obj_list(filedefs["armv8-a"], lib_files, static=False)
+ lib_static_objs += build_obj_list(filedefs["armv8-a"], misa_lib_files, static=True)
+ lib_shared_objs += build_obj_list(filedefs["armv8-a"], misa_lib_files, static=False)
else:
- lib_static_objs += build_obj_list(filedefs["armv8.2-a"], lib_files, static=True)
- lib_shared_objs += build_obj_list(filedefs["armv8.2-a"], lib_files, static=False)
+ lib_static_objs += build_obj_list(filedefs["armv8.2-a"], misa_lib_files, static=True)
+ lib_shared_objs += build_obj_list(filedefs["armv8.2-a"], misa_lib_files, static=False)
+
+ # Build the FP16 specific files
+ lib_static_objs += build_obj_list(filedefs["armv8.2-a"], misa_lib_files_neon_fp16, static=True)
+ lib_shared_objs += build_obj_list(filedefs["armv8.2-a"], misa_lib_files_neon_fp16, static=False)
# Build the SVE specific files
- lib_static_objs += build_obj_list(filedefs["armv8.2-a-sve"], lib_files_sve, static=True)
- lib_shared_objs += build_obj_list(filedefs["armv8.2-a-sve"], lib_files_sve, static=False)
+ lib_static_objs += build_obj_list(filedefs["armv8.2-a-sve"], misa_lib_files_sve, static=True)
+ lib_shared_objs += build_obj_list(filedefs["armv8.2-a-sve"], misa_lib_files_sve, static=False)
+ lib_static_objs += build_obj_list(filedefs["armv8.2-a-sve"], misa_lib_files_sve_fp16, static=True)
+ lib_shared_objs += build_obj_list(filedefs["armv8.2-a-sve"], misa_lib_files_sve_fp16, static=False)
+
# Build the SVE2 specific files
arm_compute_env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_SVE2'])
- lib_static_objs += build_obj_list(filedefs["armv8.6-a-sve2"], lib_files_sve2, static=True)
- lib_shared_objs += build_obj_list(filedefs["armv8.6-a-sve2"], lib_files_sve2, static=False)
+ lib_static_objs += build_obj_list(filedefs["armv8.6-a-sve2"], misa_lib_files_sve2, static=True)
+ lib_shared_objs += build_obj_list(filedefs["armv8.6-a-sve2"], misa_lib_files_sve2, static=False)
+ lib_static_objs += build_obj_list(filedefs["armv8.6-a-sve2"], misa_lib_files_sve2_fp16, static=True)
+ lib_shared_objs += build_obj_list(filedefs["armv8.6-a-sve2"], misa_lib_files_sve2_fp16, static=False)
+
return lib_static_objs, lib_shared_objs
@@ -284,29 +294,29 @@ def get_attrs_list(env, data_types, data_layouts):
return attrs
-def get_operator_backend_files(filelist, operators, backend='', techs=[], attrs=[]):
+def get_operator_backend_files(filelist, operators, backend='', techs=[], attrs=[], include_common=True):
files = { "common" : [] }
-
# Early return if filelist is empty
if backend not in filelist:
return files
-
# Iterate over operators and create the file lists to compiler
for operator in operators:
if operator in filelist[backend]['operators']:
- files['common'] += filelist[backend]['operators'][operator]["files"]["common"]
+ if include_common :
+ files['common'] += filelist[backend]['operators'][operator]["files"]["common"]
for tech in techs:
if tech in filelist[backend]['operators'][operator]["files"]:
# Add tech as a key to dictionary if not there
if tech not in files:
files[tech] = []
-
# Add tech files to the tech file list
tech_files = filelist[backend]['operators'][operator]["files"][tech]
- files[tech] += tech_files.get('common', [])
+ if include_common:
+ files[tech] += tech_files.get('common', [])
for attr in attrs:
files[tech] += tech_files.get(attr, [])
+
# Remove duplicates if they exist
return {k: list(set(v)) for k,v in files.items()}
@@ -608,6 +618,17 @@ if env['opencl']:
lib_files_sve = []
lib_files_sve2 = []
+# the variables below are used for the multi_isa builds
+# please note that the variables names without the _fp16 suffix
+# do not hold any fp16 files.
+
+misa_lib_files = lib_files
+misa_lib_files_sve = []
+misa_lib_files_sve2 = []
+misa_lib_files_neon_fp16 = []
+misa_lib_files_sve_fp16 = []
+misa_lib_files_sve2_fp16 = []
+
if env['neon']:
# build winograd/depthwise sources for either v7a / v8a
arm_compute_env.Append(CPPPATH = ["src/core/NEON/kernels/arm_gemm",
@@ -620,8 +641,6 @@ if env['neon']:
"arm_compute/core/NEON/kernels/assembly/",
"src/cpu/kernels/assembly/"])
- lib_files += filelist['cpu']['common']
-
# Setup SIMD file list to include
simd = ['neon']
if env['multi_isa']:
@@ -636,7 +655,6 @@ if env['neon']:
else:
attrs = get_attrs_list(env, env['data_type_support'], env['data_layout_support'])
-
if env['fixed_format_kernels']:
attrs.append("fixed_format_kernels")
@@ -644,19 +662,46 @@ if env['neon']:
cpu_operators = custom_operators if use_custom_ops else filelist['cpu']['operators'].keys()
cpu_ops_to_build = resolve_operator_dependencies(filelist, cpu_operators, 'cpu')
- cpu_files = get_operator_backend_files(filelist, cpu_ops_to_build, 'cpu', simd, attrs)
+ if env['multi_isa']:
+ misa_lib_files += filelist['cpu']['common']
+
+ # For multi_isa builds we need to build fp16 files for armv8.2-a+fp16 so we filter them out of cpu_files removing the attribute fp16
+ attrs.remove('fp16')
+ cpu_files = get_operator_backend_files(filelist, cpu_ops_to_build, 'cpu', simd, attrs)
+
+ # Shared among ALL CPU files
+ misa_lib_files += cpu_files.get('common', [])
+
+ # Arm® Neon™ specific files
+ misa_lib_files += cpu_files.get('neon', [])
+
+ # Get all the fp16 files
+ fp16_cpu_files = get_operator_backend_files(filelist, cpu_ops_to_build, 'cpu', simd, ['fp16'],False)
+
+ misa_lib_files_neon_fp16 = fp16_cpu_files.get('neon',[])
+ misa_lib_files_sve_fp16 = fp16_cpu_files.get('sve',[])
+ misa_lib_files_sve2_fp16 = fp16_cpu_files.get('sve2',[])
+
+ # SVE files only minus FP16
+ misa_lib_files_sve = cpu_files.get('sve', [])
+
+ # SVE2 files only minus FP16
+ misa_lib_files_sve2 = cpu_files.get('sve2', [])
+ else:
+ lib_files += filelist['cpu']['common']
+
+ # Non multi_isa build
+ cpu_files = get_operator_backend_files(filelist, cpu_ops_to_build, 'cpu', simd, attrs)
- # Shared among ALL CPU files
- lib_files += cpu_files.get('common', [])
+ # Shared among ALL CPU files
+ lib_files += cpu_files.get('common', [])
- # Arm® Neon™ specific files
- lib_files += cpu_files.get('neon', [])
+ # Arm® Neon™ specific files
+ lib_files += cpu_files.get('neon', [])
- # SVE files only
- lib_files_sve = cpu_files.get('sve', [])
+ lib_files_sve = cpu_files.get('sve', [])
- # SVE2 files only
- lib_files_sve2 = cpu_files.get('sve2', [])
+ lib_files_sve2 = cpu_files.get('sve2', [])
graph_files += Glob('src/graph/backends/NEON/*.cpp')
@@ -674,7 +719,7 @@ Export('bootcode_o')
if (env['multi_isa']):
- lib_static_objs, lib_shared_objs = build_lib_objects()
+ lib_static_objs, lib_shared_objs = build_multiisa_lib_objects()
# STATIC library build.
diff --git a/SConstruct b/SConstruct
index 68c518a4a0..3eee4c0561 100644
--- a/SConstruct
+++ b/SConstruct
@@ -62,8 +62,14 @@ def read_build_config_json(build_config):
def update_data_type_layout_flags(env, data_types, data_layouts):
# Manage data-types
- if any(i in data_types for i in ['all', 'fp16']):
- env.Append(CXXFLAGS = ['-DENABLE_FP16_KERNELS'])
+ if env['multi_isa']:
+ if any(i in data_types for i in ['all', 'fp16']):
+ env.Append(CXXFLAGS = ['-DENABLE_FP16_KERNELS'])
+ else:
+ if not 'v8a' in env['arch'] and not 'v7a' in env['arch']:
+ if any(i in data_types for i in ['all', 'fp16']):
+ env.Append(CXXFLAGS = ['-DENABLE_FP16_KERNELS'])
+
if any(i in data_types for i in ['all', 'fp32']):
env.Append(CXXFLAGS = ['-DENABLE_FP32_KERNELS'])
if any(i in data_types for i in ['all', 'qasymm8']):
@@ -112,7 +118,7 @@ vars.AddVariables(
BoolVariable("exceptions", "Enable/disable C++ exception support", True),
BoolVariable("high_priority", "Generate a library containing only the high priority operators", False),
PathVariable("linker_script", "Use an external linker script", "", PathVariable.PathAccept),
- PathVariable("external_tests_dir", """Add examples, benchmarks and tests to the tests suite from an external path. In order to use this option, the external tests directory must have the following structure:
+ PathVariable("external_tests_dir", """Add examples, benchmarks and tests to the tests suite from an external path. In order to use this option, the external tests directory must have the following structure:
EXTERNAL_TESTS_DIR:
└── tests
├── benchmark
@@ -240,7 +246,6 @@ env.Append(CXXFLAGS = ['-DARCH_ARM',
if not 'windows' in env['os']:
env.Append(CXXFLAGS = ['-Wall','-std=c++14', '-pedantic' ])
-env.Append(CPPDEFINES = ['_GLIBCXX_USE_NANOSLEEP'])
cpp_tool = {'linux': 'g++', 'android' : 'clang++',
'tizen': 'g++', 'macos':'clang++',
@@ -312,8 +317,7 @@ if env['multi_isa']:
Exit(1)
if 'v8a' in env['arch']:
- print("INFO: multi_isa armv8-a architecture build doesn't enable __ARM_FEATURE_FP16_VECTOR_ARITHMETIC. Use armv8.2-a or beyond to enable FP16 vector arithmetic support")
- env.Append(CXXFLAGS = ['-march=armv8-a']) # note: this will disable fp16 extension __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+ env.Append(CXXFLAGS = ['-march=armv8-a'])
else:
if 'v8.6-a' in env['arch']:
if "disable_mmla_fp" not in env['custom_options']:
@@ -536,7 +540,7 @@ if env['standalone']:
if not 'windows' in env['os']:
env.Append(CXXFLAGS = ['-fPIC'])
env.Append(LINKFLAGS = ['-static-libgcc','-static-libstdc++'])
-
+
if env['Werror']:
env.Append(CXXFLAGS = ['-Werror'])
@@ -597,7 +601,7 @@ if env['debug']:
else:
env.Append(CXXFLAGS = ['-Z7','-MTd','-fms-compatibility','-fdelayed-template-parsing'])
env.Append(LINKFLAGS = ['-DEBUG'])
-
+
env.Append(CPPDEFINES = ['ARM_COMPUTE_DEBUG_ENABLED'])
else:
if not 'windows' in env['os']:
diff --git a/docs/user_guide/release_version_and_change_log.dox b/docs/user_guide/release_version_and_change_log.dox
index 6d27ae31ad..13f4e9ea2a 100644
--- a/docs/user_guide/release_version_and_change_log.dox
+++ b/docs/user_guide/release_version_and_change_log.dox
@@ -44,6 +44,8 @@ If there is more than one release in a month then an extra sequential number is
v24.01 Public major release
- Remove the legacy 'libarm_compute_core' library. This library is an artifact of Compute Library's legacy library architecture and no longer serves any purpose.
You should link only to the main `libarm_compute` library for core functionality.
+ - New features
+ - Add support for FP16 in all multi_isa builds.
v23.11 Public major release
- New features
diff --git a/src/core/common/Registrars.h b/src/core/common/Registrars.h
index 686304b8d7..50b3fc1284 100644
--- a/src/core/common/Registrars.h
+++ b/src/core/common/Registrars.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2022 Arm Limited.
+ * Copyright (c) 2020-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_CORE_COMMON_REGISTRARS_H
-#define SRC_CORE_COMMON_REGISTRARS_H
+#ifndef ACL_SRC_CORE_COMMON_REGISTRARS_H
+#define ACL_SRC_CORE_COMMON_REGISTRARS_H
#if defined(ENABLE_FP16_KERNELS)
@@ -38,11 +38,11 @@
#define REGISTER_FP16_SVE2(func_name) nullptr
#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
-#if defined(ARM_COMPUTE_ENABLE_NEON) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
+#if defined(ARM_COMPUTE_ENABLE_NEON)
#define REGISTER_FP16_NEON(func_name) &(func_name)
#else /* !defined(ARM_COMPUTE_ENABLE_NEON) */
#define REGISTER_FP16_NEON(func_name) nullptr
-#endif /* defined(ARM_COMPUTE_ENABLE_NEON) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */
+#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */
#else /* !defined(ENABLE_FP16_KERNELS) */
#define REGISTER_FP16_NEON(func_name) nullptr
@@ -179,4 +179,4 @@
#define REGISTER_BF16_NEON(func_name) nullptr
#endif /* defined(ARM_COMPUTE_ENABLE_BF16)*/
-#endif /* SRC_CORE_COMMON_REGISTRARS_H */
+#endif // ACL_SRC_CORE_COMMON_REGISTRARS_H