From 6f3a9f5f4ef6ec7aa8e91df3c1f373d95931dd7b Mon Sep 17 00:00:00 2001 From: Motti Gondabi Date: Tue, 9 Nov 2021 15:47:17 +0200 Subject: Add Multi ISA support for SCons build System (part #1) - Enhance the SCons build system to support V8 SVE/SVE2 achitecture in a single binary - Add additional filedefs.json to include build definitions Resolves: COMPMID-4921 Signed-off-by: Motti Gondabi Change-Id: Ie3c0ef444303270ba560ca3f43c6e22d50b86679 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6689 Tested-by: Arm Jenkins Reviewed-by: Giorgio Arena Comments-Addressed: Arm Jenkins --- SConscript | 67 ++++++++++++++++++---- SConstruct | 95 +++++++++++++++++--------------- docs/user_guide/introduction.dox | 2 +- docs/user_guide/library.dox | 4 +- filedefs.json | 41 ++++++++++++++ src/cpu/kernels/add/generic/sve/fp16.cpp | 2 + 6 files changed, 153 insertions(+), 58 deletions(-) create mode 100644 filedefs.json diff --git a/SConscript b/SConscript index 7e901019cb..afff4e58bc 100644 --- a/SConscript +++ b/SConscript @@ -19,6 +19,7 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. + import collections import os.path import re @@ -44,11 +45,42 @@ def build_bootcode_objs(sources): Default(obj) return obj +# @brief Generates SVE/SVE2 shared object files for a specific V8 architechture. +# +# @param sources The target source files +# @param arch_info A Tuple represents the architecture info +# such as the compiler flags and defines. +# +# @return A list of objects for the corresponding architecture. +def build_multi_isa_objs(sources, arch_v8_info): + + arch_v8 = arch_v8_info[0] -def build_sve_objs(sources): + # Create a temp environment tmp_env = arm_compute_env.Clone() - tmp_env.Append(CXXFLAGS = "-march=armv8.2-a+sve+fp16") - obj = tmp_env.SharedObject(sources) + + if 'cxxflags' in arch_v8_info[1] and len(arch_v8_info[1]['cxxflags']) > 0: + tmp_env.Append(CXXFLAGS = arch_v8_info[1]['cxxflags']) + if 'cppdefines' in arch_v8_info[1] and len(arch_v8_info[1]['cppdefines']) > 0: + tmp_env.Append(CPPDEFINES = arch_v8_info[1]['cppdefines']) + + if 'sve' in arch_v8: + # Toggle SVE/SVE2 specific extensions + tmp_env.Append(CPPDEFINES = ['ENABLE_SVE', 'ARM_COMPUTE_ENABLE_SVE']) + if 'sve2' in arch_v8: + tmp_env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_SVE2']) + else: + # FIXME: The NEON flags should be always defined for CPU. + # however, build fails when SVE/SVE2 & NEON flags + # defined together. + tmp_env.Append(CPPDEFINES = ['ENABLE_NEON', 'ARM_COMPUTE_ENABLE_NEON']) + + # we must differentiate the file object names + # as we accumulate the set. + obj = [] + for src in sources: + obj += tmp_env.SharedObject(target='{}-{}'.format(src, arch_v8), source=src) + Default(obj) return obj @@ -422,6 +454,11 @@ arm_compute_env.Append(CPPPATH =[Dir("./src/core/").path] ) arm_compute_env.Append(LIBS = ['dl']) +# Load build definitions file +with (open(Dir('#').path + '/filedefs.json')) as fd: + filedefs = json.load(fd) + + with (open(Dir('#').path + '/filelist.json')) as fp: filelist = json.load(fp) @@ -472,8 +509,9 @@ if env['opencl']: graph_files += Glob('src/graph/backends/CL/*.cpp') -sve_o = [] +multi_isa_objs_list = [] lib_files_sve = [] + if env['neon']: # build winograd/depthwise sources for either v7a / v8a arm_compute_env.Append(CPPPATH = ["src/core/NEON/kernels/convolution/common/", @@ -481,14 +519,14 @@ if env['neon']: "src/core/NEON/kernels/convolution/depthwise/", "src/core/NEON/kernels/assembly/", "arm_compute/core/NEON/kernels/assembly/", - "src/cpu/kernels/assembly/",]) + "src/cpu/kernels/assembly/"]) lib_files += filelist['cpu']['common'] # Setup SIMD file list to include simd = [] - if 'sve' in env['arch'] or env['fat_binary']: simd += ['sve'] - if 'sve' not in env['arch'] or env['fat_binary']: simd += ['neon'] + if 'sve' in env['arch'] or env['multi_isa']: simd += ['sve'] + if 'sve' not in env['arch'] or env['multi_isa']: simd += ['neon'] # Get attributes if(use_custom_ops): @@ -501,6 +539,7 @@ if env['neon']: cpu_ops_to_build = resolve_operator_dependencies(filelist, cpu_operators, 'cpu') cpu_files = get_operator_backend_files(filelist, cpu_ops_to_build, 'cpu', simd, attrs) + lib_files += cpu_files.get('common', []) lib_files += cpu_files.get('neon', []) lib_files_sve += cpu_files.get('sve', []) @@ -520,17 +559,21 @@ if env['os'] == 'bare_metal': Export('bootcode_o') # Build static libraries -if (env['fat_binary']): - sve_o = build_sve_objs(lib_files_sve) - arm_compute_a = build_library('arm_compute-static', arm_compute_env, lib_files + sve_o, static=True) +if (env['multi_isa']): + # Available architecture + arch_v8s = filedefs['cpu']['arch'] + for arch_v8_info in arch_v8s.items(): + multi_isa_objs_list += build_multi_isa_objs(lib_files_sve, arch_v8_info) + + arm_compute_a = build_library('arm_compute-static', arm_compute_env, lib_files + multi_isa_objs_list, static=True) else: arm_compute_a = build_library('arm_compute-static', arm_compute_env, lib_files + lib_files_sve, static=True) Export('arm_compute_a') # Build shared libraries if env['os'] != 'bare_metal' and not env['standalone']: - if (env['fat_binary']): - arm_compute_so = build_library('arm_compute', arm_compute_env, lib_files + sve_o, static=False) + if (env['multi_isa']): + arm_compute_so = build_library('arm_compute', arm_compute_env, lib_files + multi_isa_objs_list, static=False) else: arm_compute_so = build_library('arm_compute', arm_compute_env, lib_files + lib_files_sve, static=False) diff --git a/SConstruct b/SConstruct index 400228c71a..2a8403f8ce 100644 --- a/SConstruct +++ b/SConstruct @@ -99,7 +99,7 @@ vars.AddVariables( BoolVariable("examples", "Build example programs", True), BoolVariable("gemm_tuner", "Build gemm_tuner programs", True), BoolVariable("Werror", "Enable/disable the -Werror compilation flag", True), - BoolVariable("fat_binary", "Build fat binary version of library. Note works only for armv8.2-a", False), + BoolVariable("multi_isa", "Build Multi ISA binary version of library. Note works only for armv8.2-a", False), BoolVariable("standalone", "Builds the tests as standalone executables, links statically with libgcc, libstdc++ and libarm_compute", False), BoolVariable("opencl", "Enable OpenCL support", True), BoolVariable("neon", "Enable Arm® Neon™ support", False), @@ -250,40 +250,63 @@ if 'v7a' in env['estate'] and env['estate'] == '64': # Add architecture specific flags prefix = "" -if 'v7a' in env['arch']: - env.Append(CXXFLAGS = ['-march=armv7-a', '-mthumb', '-mfpu=neon']) - if (env['os'] == 'android' or env['os'] == 'tizen') and not 'hf' in env['arch']: - env.Append(CXXFLAGS = ['-mfloat-abi=softfp']) - else: - env.Append(CXXFLAGS = ['-mfloat-abi=hard']) -elif 'v8' in env['arch']: - if 'sve2' in env['arch']: - env.Append(CXXFLAGS = ['-march=armv8.2-a+sve2+fp16+dotprod']) - env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_SVE2']) - elif 'sve' in env['arch']: - env.Append(CXXFLAGS = ['-march=armv8.2-a+sve+fp16+dotprod']) - elif 'armv8r64' in env['arch']: - env.Append(CXXFLAGS = ['-march=armv8.4-a']) - elif 'v8.' in env['arch']: - env.Append(CXXFLAGS = ['-march=armv8.2-a+fp16']) # explicitly enable fp16 extension otherwise __ARM_FEATURE_FP16_VECTOR_ARITHMETIC is undefined - else: - env.Append(CXXFLAGS = ['-march=armv8-a']) +if env['multi_isa']: + # assert arch version is v8 + if 'v8' not in env['arch']: + print("Currently Multi ISA binary is only supported for arm v8 family") + Exit(1) if 'v8.6-a' in env['arch']: - env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_I8MM', 'ARM_COMPUTE_ENABLE_BF16']) if "disable_mmla_fp" not in env['custom_options']: env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_SVEF32MM']) - if 'v8.' in env['arch']: - env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_FP16']) -elif 'x86' in env['arch']: - if env['estate'] == '32': - env.Append(CCFLAGS = ['-m32']) - env.Append(LINKFLAGS = ['-m32']) +else: # NONE "multi_isa" builds + + if 'sve' in env['arch']: + env.Append(CPPDEFINES = ['ENABLE_SVE', 'ARM_COMPUTE_ENABLE_SVE']) + if 'sve2' in env['arch']: + env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_SVE2']) else: - env.Append(CXXFLAGS = ['-fPIC']) - env.Append(CCFLAGS = ['-m64']) - env.Append(LINKFLAGS = ['-m64']) + # FIXME: The NEON flags should be always defined for CPU. + # however, build fails when SVE/SVE2 & NEON flags + # defined together. + env.Append(CPPDEFINES = ['ENABLE_NEON', 'ARM_COMPUTE_ENABLE_NEON']) + + + if 'v7a' in env['arch']: + env.Append(CXXFLAGS = ['-march=armv7-a', '-mthumb', '-mfpu=neon']) + if (env['os'] == 'android' or env['os'] == 'tizen') and not 'hf' in env['arch']: + env.Append(CXXFLAGS = ['-mfloat-abi=softfp']) + else: + env.Append(CXXFLAGS = ['-mfloat-abi=hard']) + elif 'v8' in env['arch']: + # Preserve the V8 archs for non-multi-ISA variants + if 'sve2' in env['arch']: + env.Append(CXXFLAGS = ['-march=armv8.2-a+sve2+fp16+dotprod']) + elif 'sve' in env['arch']: + env.Append(CXXFLAGS = ['-march=armv8.2-a+sve+fp16+dotprod']) + elif 'armv8r64' in env['arch']: + env.Append(CXXFLAGS = ['-march=armv8.4-a']) + elif 'v8.' in env['arch']: + env.Append(CXXFLAGS = ['-march=armv8.2-a+fp16']) # explicitly enable fp16 extension otherwise __ARM_FEATURE_FP16_VECTOR_ARITHMETIC is undefined + else: + env.Append(CXXFLAGS = ['-march=armv8-a']) + + if 'v8.6-a' in env['arch']: + env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_I8MM', 'ARM_COMPUTE_ENABLE_BF16']) + if "disable_mmla_fp" not in env['custom_options']: + env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_SVEF32MM']) + if 'v8.' in env['arch']: + env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_FP16']) + + elif 'x86' in env['arch']: + if env['estate'] == '32': + env.Append(CCFLAGS = ['-m32']) + env.Append(LINKFLAGS = ['-m32']) + else: + env.Append(CXXFLAGS = ['-fPIC']) + env.Append(CCFLAGS = ['-m64']) + env.Append(LINKFLAGS = ['-m64']) # Define toolchain prefix = "" @@ -307,11 +330,6 @@ if 'x86' not in env['arch']: elif env['os'] == 'tizen': prefix = "aarch64-tizen-linux-gnu-" -if 'sve' in env['arch']: - env.Append(CXXFLAGS = ['-DENABLE_SVE', '-DARM_COMPUTE_ENABLE_SVE']) -else: - env.Append(CXXFLAGS = ['-DENABLE_NEON', '-DARM_COMPUTE_ENABLE_NEON']) - if env['build'] == 'native': prefix = "" @@ -355,15 +373,6 @@ if not GetOption("help"): if not version_at_least(compiler_ver, '7.0.0') and env['os'] == 'bare_metal': env.Append(LINKFLAGS = ['-fstack-protector-strong']) -if env['fat_binary']: - if env['arch'] != 'armv8.2-a': - print("Currently fat binary is only supported with armv8.2-a") - Exit(1) - env.Append(CXXFLAGS = ['-DENABLE_NEON', '-DARM_COMPUTE_ENABLE_NEON', - '-DENABLE_SVE', '-DARM_COMPUTE_ENABLE_SVE', - '-DARM_COMPUTE_ENABLE_FP16', '-DARM_COMPUTE_ENABLE_BF16', - '-DARM_COMPUTE_ENABLE_I8MM', '-DARM_COMPUTE_ENABLE_SVEF32MM']) - if env['high_priority'] and env['build_config']: print("The high priority library cannot be built in conjuction with a user-specified build configuration") Exit(1) diff --git a/docs/user_guide/introduction.dox b/docs/user_guide/introduction.dox index a8c9926b8b..d685a49ba9 100644 --- a/docs/user_guide/introduction.dox +++ b/docs/user_guide/introduction.dox @@ -85,7 +85,7 @@ These binaries have been built using the following toolchains: - Linux armv7a: gcc-linaro-7.2.1-2017.11-x86_64_arm-linux-gnueabihf - Linux arm64-v8a: gcc-linaro-7.2.1-2017.11-x86_64_aarch64-linux-gnu - Linux arm64-v8.2-a: gcc-linaro-7.2.1-2017.11-x86_64_aarch64-linux-gnu - - Linux arm64-v8.2-a (fat binary): gcc-arm-10.2-2020.11-x86_64-aarch64-none-linux-gnu + - Linux arm64-v8.2-a (multi-ISA binary): gcc-arm-10.2-2020.11-x86_64-aarch64-none-linux-gnu - Linux armv8.2a-sve: gcc-arm-10.2-2020.11-x86_64-aarch64-none-linux-gnu - Android armv7a: clang++ / libc++ NDK r20b - Android am64-v8a: clang++ / libc++ NDK r20b diff --git a/docs/user_guide/library.dox b/docs/user_guide/library.dox index fc08dbc437..7a45fe9d9d 100644 --- a/docs/user_guide/library.dox +++ b/docs/user_guide/library.dox @@ -555,9 +555,9 @@ The responsibilities of the operators can be summarized as follows: - Providing information to the caller required by the computation (e.g., memory requirements) - Allocation of any required auxiliary memory if it isn't given by its caller explicitly -@subsection architecture_experimental_build_fat_binary Build fat binary +@subsection architecture_experimental_build_multi_isa Build multi-ISA binary -Selecting fat_binary when building Compute Library, will create a library that contains all the supported ISA features. +Selecting multi_isa when building Compute Library, will create a library that contains all the supported ISA features. Based on the CPU support, the appropriate kernel will be selected at runtime for execution. Currently this option is only supported with armv8.2-a as the base architecture. diff --git a/filedefs.json b/filedefs.json new file mode 100644 index 0000000000..0bc030e1d3 --- /dev/null +++ b/filedefs.json @@ -0,0 +1,41 @@ +{ + "cpu": { + "arch" : { + "armv8-a": { + "cxxflags": ["-march=armv8.2-a"] + }, + "armv8.2-a": { + "cxxflags": ["-march=armv8.2-a+fp16"], + "cppdefines": ["ARM_COMPUTE_ENABLE_FP16"] + }, + "armv8.2-a-sve": { + "cxxflags": ["-march=armv8.2-a+sve+fp16+dotprod"], + "cppdefines": ["ARM_COMPUTE_ENABLE_FP16", "ARM_COMPUTE_ENABLE_BF16", + "ARM_COMPUTE_ENABLE_I8MM", "ARM_COMPUTE_ENABLE_SVEF32MM"] + }, + "armv8.2-a-sve2": { + "cxxflags": ["-march=armv8.2-a+sve2+fp16+dotprod"], + "cppdefines": ["ARM_COMPUTE_ENABLE_FP16", "ARM_COMPUTE_ENABLE_BF16", + "ARM_COMPUTE_ENABLE_I8MM", "ARM_COMPUTE_ENABLE_SVEF32MM"] + }, + "armv8r64": { + "cxxflags": ["-march=armv8.4-a"] + }, + "armv8.6-a": { + "cxxflags": ["-march=armv8.6-a+fp16"], + "cppdefines": ["ARM_COMPUTE_ENABLE_FP16"] + }, + "armv8.6-a-sve": { + "cxxflags": ["-march=armv8.6-a+sve+fp16+dotprod"], + "cppdefines": ["ARM_COMPUTE_ENABLE_FP16", "ARM_COMPUTE_ENABLE_BF16", + "ARM_COMPUTE_ENABLE_I8MM"] + }, + "armv8.6-a-sve2": { + "cxxflags": ["-march=armv8.6-a+sve2+fp16+dotprod"], + "cppdefines": ["ARM_COMPUTE_ENABLE_FP16", "ARM_COMPUTE_ENABLE_BF16", + "ARM_COMPUTE_ENABLE_I8MM"] + + } + } + } +} diff --git a/src/cpu/kernels/add/generic/sve/fp16.cpp b/src/cpu/kernels/add/generic/sve/fp16.cpp index 71056a0a48..28f4d2ba8e 100644 --- a/src/cpu/kernels/add/generic/sve/fp16.cpp +++ b/src/cpu/kernels/add/generic/sve/fp16.cpp @@ -21,6 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#if defined(ARM_COMPUTE_ENABLE_SVE) #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) #include "src/cpu/kernels/add/generic/sve/impl.h" @@ -36,3 +37,4 @@ void add_fp16_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const } } // namespace arm_compute #endif /* (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */ +#endif /* #if defined(ARM_COMPUTE_ENABLE_SVE) */ \ No newline at end of file -- cgit v1.2.1