aboutsummaryrefslogtreecommitdiff
path: root/SConscript
diff options
context:
space:
mode:
Diffstat (limited to 'SConscript')
-rw-r--r--SConscript707
1 files changed, 555 insertions, 152 deletions
diff --git a/SConscript b/SConscript
index 94ba6d423f..80aa87cae8 100644
--- a/SConscript
+++ b/SConscript
@@ -1,4 +1,7 @@
-# Copyright (c) 2016, 2017 Arm Limited.
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2016-2024 Arm Limited.
#
# SPDX-License-Identifier: MIT
#
@@ -19,17 +22,18 @@
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
+
import collections
import os.path
import re
import subprocess
import zlib
-import base64
-import string
import json
+import codecs
+import platform
VERSION = "v0.0-unreleased"
-LIBRARY_VERSION_MAJOR = 23
+LIBRARY_VERSION_MAJOR = 36
LIBRARY_VERSION_MINOR = 0
LIBRARY_VERSION_PATCH = 0
SONAME_VERSION = str(LIBRARY_VERSION_MAJOR) + "." + str(LIBRARY_VERSION_MINOR) + "." + str(LIBRARY_VERSION_PATCH)
@@ -38,6 +42,13 @@ Import('env')
Import('vars')
Import('install_lib')
+# Workaround to enable cross-compiling from macOS® to Android™ using the Android NDK.
+if platform.system() == 'Darwin' and env['os'] == 'android':
+ # SCons incorrectly assumes that we always want to build a dynamic library on a macOS host.
+ # When targeting Android, we overwrite the following construction variables to build a shared library instead.
+ env.Replace(SHLIBSUFFIX = '.so') # overwrites .dylib
+ env.Replace(SHLINKFLAGS = ['$LINKFLAGS', '-shared']) # overwrites -dynamiclib
+
def build_bootcode_objs(sources):
arm_compute_env.Append(ASFLAGS = "-I bootcode/")
obj = arm_compute_env.Object(sources)
@@ -45,27 +56,140 @@ def build_bootcode_objs(sources):
Default(obj)
return obj
-def build_sve_objs(sources):
+
+# @brief Create a list of object from a given file list.
+#
+# @param arch_info A dictionary represents the architecture info such as the
+# compiler flags and defines (filedefs.json).
+#
+# @param sources A list of files to build
+#
+# @return A list of objects for the corresponding architecture.
+
+def build_obj_list(arch_info, sources, static=False):
+
+ # Clone environment
tmp_env = arm_compute_env.Clone()
- tmp_env.Append(CXXFLAGS = "-march=armv8.2-a+sve+fp16")
- obj = tmp_env.SharedObject(sources)
- obj = install_lib(obj)
- Default(obj)
- return obj
+
+ # Append architecture spec
+ if 'cxxflags' in arch_info and len(arch_info['cxxflags']) > 0:
+ tmp_env.Append(CXXFLAGS = arch_info['cxxflags'])
+
+ # Build and return objects
+ if static:
+ objs = tmp_env.StaticObject(sources)
+ else:
+ objs = tmp_env.SharedObject(sources)
+
+ tmp_env.Default(objs)
+ return objs
+
+# @brief Build multi-ISA files with the respective architecture.
+#
+# @return Two distinct lists:
+# A list of static objects
+# A list of shared objects
+
+def build_multiisa_lib_objects():
+ lib_static_objs = [] # static objects
+ lib_shared_objs = [] # shared objects
+
+ # note that ARM_COMPUTE_ENABLE_FP16 is enabled in update_data_type_layout_flags() to make
+ # sure the environment is progated to the validation suite
+ arm_compute_env.Append(CPPDEFINES = ['ENABLE_NEON', 'ARM_COMPUTE_ENABLE_NEON',
+ 'ENABLE_SVE', 'ARM_COMPUTE_ENABLE_SVE','ARM_COMPUTE_ENABLE_BF16',
+ 'ARM_COMPUTE_ENABLE_I8MM', 'ARM_COMPUTE_ENABLE_SVEF32MM'])
+
+ # Build all the common files for the base architecture
+ if env['arch'] == 'armv8a' or env['arch'] == 'arm64-v8a':
+ lib_static_objs += build_obj_list(filedefs["armv8-a"], misa_lib_files, static=True)
+ lib_shared_objs += build_obj_list(filedefs["armv8-a"], misa_lib_files, static=False)
+ else:
+ lib_static_objs += build_obj_list(filedefs["armv8.2-a"], misa_lib_files, static=True)
+ lib_shared_objs += build_obj_list(filedefs["armv8.2-a"], misa_lib_files, static=False)
+
+ # Build the FP16 specific files
+ lib_static_objs += build_obj_list(filedefs["armv8.2-a"], misa_lib_files_neon_fp16, static=True)
+ lib_shared_objs += build_obj_list(filedefs["armv8.2-a"], misa_lib_files_neon_fp16, static=False)
+
+ # Build the SVE specific files
+ lib_static_objs += build_obj_list(filedefs["armv8.2-a-sve"], misa_lib_files_sve, static=True)
+ lib_shared_objs += build_obj_list(filedefs["armv8.2-a-sve"], misa_lib_files_sve, static=False)
+ lib_static_objs += build_obj_list(filedefs["armv8.2-a-sve"], misa_lib_files_sve_fp16, static=True)
+ lib_shared_objs += build_obj_list(filedefs["armv8.2-a-sve"], misa_lib_files_sve_fp16, static=False)
+
+
+ # Build the SVE2 specific files
+ arm_compute_env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_SVE2'])
+ lib_static_objs += build_obj_list(filedefs["armv8.6-a-sve2"], misa_lib_files_sve2, static=True)
+ lib_shared_objs += build_obj_list(filedefs["armv8.6-a-sve2"], misa_lib_files_sve2, static=False)
+ lib_static_objs += build_obj_list(filedefs["armv8.6-a-sve2"], misa_lib_files_sve2_fp16, static=True)
+ lib_shared_objs += build_obj_list(filedefs["armv8.6-a-sve2"], misa_lib_files_sve2_fp16, static=False)
+
+
+ return lib_static_objs, lib_shared_objs
+
+
+# The built-in SCons Glob() method does not support recursive searching of directories, thus we implement our own:
+def recursive_glob(root_dir, pattern):
+ files = []
+ regex = re.compile(pattern)
+
+ for dirpath, _, filenames in os.walk(root_dir):
+ for f in filenames:
+ f = os.path.join(dirpath, f)
+ if regex.match(f):
+ files.append(f)
+
+ return files
+
+
+def get_ckw_obj_list():
+ cmake_obj_dir = os.path.abspath("CMakeFiles/ckw.dir/src")
+ return recursive_glob(root_dir=cmake_obj_dir, pattern=".*.o$")
+
def build_library(name, build_env, sources, static=False, libs=[]):
+ cloned_build_env = build_env.Clone()
+ if env['os'] == 'android' and static == False:
+ cloned_build_env["LINKFLAGS"].remove('-pie')
+ cloned_build_env["LINKFLAGS"].remove('-static-libstdc++')
+
+ # -- Static Library --
if static:
- obj = build_env.StaticLibrary(name, source=sources, LIBS = arm_compute_env["LIBS"] + libs)
+ # Recreate the list to avoid mutating the original
+ static_sources = list(sources)
+
+ # Dynamic Fusion has a direct dependency on the Compute Kernel Writer (CKW) subproject, therefore we collect the
+ # built CKW objects to pack into the Compute Library archive.
+ if env['experimental_dynamic_fusion'] and name == "arm_compute-static":
+ static_sources += get_ckw_obj_list()
+
+ obj = cloned_build_env.StaticLibrary(name, source=static_sources, LIBS=arm_compute_env["LIBS"] + libs)
+
+ # -- Shared Library --
else:
+ # Always statically link Compute Library against CKW
+ if env['experimental_dynamic_fusion'] and name == "arm_compute":
+ libs.append('libckw.a')
+
+ # Add shared library versioning
if env['set_soname']:
- obj = build_env.SharedLibrary(name, source=sources, SHLIBVERSION = SONAME_VERSION, LIBS = arm_compute_env["LIBS"] + libs)
+ obj = cloned_build_env.SharedLibrary(name, source=sources, SHLIBVERSION = SONAME_VERSION, LIBS = arm_compute_env["LIBS"] + libs)
+ else:
+ obj = cloned_build_env.SharedLibrary(name, source=sources, LIBS = arm_compute_env["LIBS"] + libs)
+
+ if env['mapfile']:
+ if not 'windows' in env['os'] and not 'macos' in env['os']:
+ cloned_build_env['LINKFLAGS'].append('"-Wl,-Map='+ name + '.map"')
else:
- obj = build_env.SharedLibrary(name, source=sources, LIBS = arm_compute_env["LIBS"] + libs)
+ cloned_build_env['LINKFLAGS'].append('-Wl,-map,' + name + '.map')
obj = install_lib(obj)
- Default(obj)
+ build_env.Default(obj)
return obj
+
def remove_incode_comments(code):
def replace_with_empty(match):
s = match.group(0)
@@ -77,6 +201,7 @@ def remove_incode_comments(code):
comment_regex = re.compile(r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', re.DOTALL | re.MULTILINE)
return re.sub(comment_regex, replace_with_empty, code)
+
def resolve_includes(target, source, env):
# File collection
FileEntry = collections.namedtuple('FileEntry', 'target_name file_contents')
@@ -108,7 +233,9 @@ def resolve_includes(target, source, env):
for line in tmp_file:
found = pattern.search(line)
if found:
- include_file = found.group(1)
+ # Only get the header file name and discard the relative path.
+ # E.g. "src/core/CL/cl_kernels/activation_float_helpers.h" -> "activation_float_helpers.h"
+ include_file = found.group(1).split('/')[-1]
data = files_dict[include_file].file_contents
updated_file.extend(data)
else:
@@ -131,10 +258,12 @@ def resolve_includes(target, source, env):
with open(file[1].target_name.get_path(), 'w+') as out_file:
file_to_write = "\n".join( file[1].file_contents )
if env['compress_kernels']:
- file_to_write = zlib.compress(file_to_write, 9).encode("base64").replace("\n", "")
+ file_to_write = zlib.compress(file_to_write.encode('utf-8'), 9)
+ file_to_write = codecs.encode(file_to_write, "base64").decode('utf-8').replace("\n", "")
file_to_write = "R\"(" + file_to_write + ")\""
out_file.write(file_to_write)
+
def create_version_file(target, source, env):
# Generate string with build options library version to embed in the library:
try:
@@ -142,24 +271,261 @@ def create_version_file(target, source, env):
except (OSError, subprocess.CalledProcessError):
git_hash="unknown"
- build_info = "\"arm_compute_version=%s Build options: %s Git hash=%s\"" % (VERSION, vars.args, git_hash.strip())
+ build_options = str(vars.args).replace('"', '\\"')
+ build_info = "\"arm_compute_version=%s Build options: %s Git hash=%s\"" % (VERSION,build_options, git_hash.strip())
with open(target[0].get_path(), "w") as fd:
fd.write(build_info)
+
+def get_attrs_list(env, data_types, data_layouts):
+ attrs = []
+
+ # Manage data-types
+ if 'all' in data_types:
+ attrs += ['fp16', 'fp32', 'integer', 'qasymm8', 'qasymm8_signed', 'qsymm16']
+ else:
+ if 'fp16' in data_types: attrs += ['fp16']
+ if 'fp32' in data_types: attrs += ['fp32']
+ if 'integer' in data_types: attrs += ['integer']
+ if 'qasymm8' in data_types: attrs += ['qasymm8']
+ if 'qasymm8_signed' in data_types: attrs += ['qasymm8_signed']
+ if 'qsymm16' in data_types: attrs += ['qsymm16']
+ # Manage data-layouts
+ if 'all' in data_layouts:
+ attrs += ['nhwc', 'nchw']
+ else:
+ if 'nhwc' in data_layouts: attrs += ['nhwc']
+ if 'nchw' in data_layouts: attrs += ['nchw']
+
+ # Manage execution state
+ attrs += ['estate32' if (env['estate'] == 'auto' and 'v7a' in env['arch']) or '32' in env['estate'] else 'estate64']
+
+ return attrs
+
+
+def get_operator_backend_files(filelist, operators, backend='', techs=[], attrs=[], include_common=True):
+ files = { "common" : [] }
+ # Early return if filelist is empty
+ if backend not in filelist:
+ return files
+ # Iterate over operators and create the file lists to compiler
+ for operator in operators:
+ if operator in filelist[backend]['operators']:
+ if include_common :
+ files['common'] += filelist[backend]['operators'][operator]["files"]["common"]
+ for tech in techs:
+ if tech in filelist[backend]['operators'][operator]["files"]:
+ # Add tech as a key to dictionary if not there
+ if tech not in files:
+ files[tech] = []
+ # Add tech files to the tech file list
+ tech_files = filelist[backend]['operators'][operator]["files"][tech]
+ if include_common:
+ files[tech] += tech_files.get('common', [])
+ for attr in attrs:
+ files[tech] += tech_files.get(attr, [])
+
+
+ # Remove duplicates if they exist
+ return {k: list(set(v)) for k,v in files.items()}
+
+def collect_operators(filelist, operators, backend=''):
+ ops = set()
+ for operator in operators:
+ if operator in filelist[backend]['operators']:
+ ops.add(operator)
+ if 'deps' in filelist[backend]['operators'][operator]:
+ ops.update(filelist[backend]['operators'][operator]['deps'])
+ else:
+ print("Operator {0} is unsupported on {1} backend!".format(operator, backend))
+
+ return ops
+
+
+def resolve_operator_dependencies(filelist, operators, backend=''):
+ resolved_operators = collect_operators(filelist, operators, backend)
+
+ are_ops_resolved = False
+ while not are_ops_resolved:
+ resolution_pass = collect_operators(filelist, resolved_operators, backend)
+ if len(resolution_pass) != len(resolved_operators):
+ resolved_operators.update(resolution_pass)
+ else:
+ are_ops_resolved = True
+
+ return resolved_operators
+
+def read_build_config_json(build_config):
+ build_config_contents = {}
+ custom_operators = []
+ custom_types = []
+ custom_layouts = []
+ if os.path.isfile(build_config):
+ with open(build_config) as f:
+ try:
+ build_config_contents = json.load(f)
+ except:
+ print("Warning: Build configuration file is of invalid JSON format!")
+ else:
+ try:
+ build_config_contents = json.loads(build_config)
+ except:
+ print("Warning: Build configuration string is of invalid JSON format!")
+ if build_config_contents:
+ custom_operators = build_config_contents.get("operators", [])
+ custom_types = build_config_contents.get("data_types", [])
+ custom_layouts = build_config_contents.get("data_layouts", [])
+ return custom_operators, custom_types, custom_layouts
+
arm_compute_env = env.Clone()
version_file = arm_compute_env.Command("src/core/arm_compute_version.embed", "", action=create_version_file)
arm_compute_env.AlwaysBuild(version_file)
-default_cpp_compiler = 'g++' if env['os'] not in ['android', 'macos'] else 'clang++'
+default_cpp_compiler = 'g++' if env['os'] not in ['android', 'macos', 'openbsd'] else 'clang++'
cpp_compiler = os.environ.get('CXX', default_cpp_compiler)
# Generate embed files
generate_embed = [ version_file ]
if env['opencl'] and env['embed_kernels']:
- cl_files = Glob('src/core/CL/cl_kernels/*.cl')
- cl_files += Glob('src/core/CL/cl_kernels/*.h')
- embed_files = [ f.get_path()+"embed" for f in cl_files ]
+ # Header files
+ cl_helper_files = [ 'src/core/CL/cl_kernels/activation_float_helpers.h',
+ 'src/core/CL/cl_kernels/activation_quant_helpers.h',
+ 'src/core/CL/cl_kernels/gemm_helpers.h',
+ 'src/core/CL/cl_kernels/helpers_asymm.h',
+ 'src/core/CL/cl_kernels/helpers.h',
+ 'src/core/CL/cl_kernels/load_store_utility.h',
+ 'src/core/CL/cl_kernels/repeat.h',
+ 'src/core/CL/cl_kernels/tile_helpers.h',
+ 'src/core/CL/cl_kernels/types.h',
+ 'src/core/CL/cl_kernels/warp_helpers.h',
+ ]
+
+ # Common kernels
+ cl_files_common = ['src/core/CL/cl_kernels/common/activation_layer.cl',
+ 'src/core/CL/cl_kernels/common/activation_layer_quant.cl',
+ 'src/core/CL/cl_kernels/common/arg_min_max.cl',
+ 'src/core/CL/cl_kernels/common/batchnormalization_layer.cl',
+ 'src/core/CL/cl_kernels/common/bounding_box_transform.cl',
+ 'src/core/CL/cl_kernels/common/bounding_box_transform_quantized.cl',
+ 'src/core/CL/cl_kernels/common/bitwise_op.cl',
+ 'src/core/CL/cl_kernels/common/cast.cl',
+ 'src/core/CL/cl_kernels/common/comparisons.cl',
+ 'src/core/CL/cl_kernels/common/concatenate.cl',
+ 'src/core/CL/cl_kernels/common/convolution_layer.cl',
+ 'src/core/CL/cl_kernels/common/col2im.cl',
+ 'src/core/CL/cl_kernels/common/convert_fc_weights.cl',
+ 'src/core/CL/cl_kernels/common/copy_tensor.cl',
+ 'src/core/CL/cl_kernels/common/crop_tensor.cl',
+ 'src/core/CL/cl_kernels/common/deconvolution_layer.cl',
+ 'src/core/CL/cl_kernels/common/dequantization_layer.cl',
+ 'src/core/CL/cl_kernels/common/elementwise_operation.cl',
+ 'src/core/CL/cl_kernels/common/elementwise_operation_quantized.cl',
+ 'src/core/CL/cl_kernels/common/elementwise_unary.cl',
+ 'src/core/CL/cl_kernels/common/elementwise_unary_quantized.cl',
+ 'src/core/CL/cl_kernels/common/fft_digit_reverse.cl',
+ 'src/core/CL/cl_kernels/common/fft.cl',
+ 'src/core/CL/cl_kernels/common/fft_scale.cl',
+ 'src/core/CL/cl_kernels/common/fill_border.cl',
+ 'src/core/CL/cl_kernels/common/floor.cl',
+ 'src/core/CL/cl_kernels/common/gather.cl',
+ 'src/core/CL/cl_kernels/common/scatter.cl',
+ 'src/core/CL/cl_kernels/common/gemm.cl',
+ 'src/core/CL/cl_kernels/common/gemm_reshaped_only_rhs_mmul.cl',
+ 'src/core/CL/cl_kernels/common/gemm_utils.cl',
+ 'src/core/CL/cl_kernels/common/gemmlowp.cl',
+ 'src/core/CL/cl_kernels/common/gemmlowp_reshaped_only_rhs_mmul.cl',
+ 'src/core/CL/cl_kernels/common/gemv.cl',
+ 'src/core/CL/cl_kernels/common/generate_proposals.cl',
+ 'src/core/CL/cl_kernels/common/generate_proposals_quantized.cl',
+ 'src/core/CL/cl_kernels/common/instance_normalization.cl',
+ 'src/core/CL/cl_kernels/common/l2_normalize.cl',
+ 'src/core/CL/cl_kernels/common/mat_mul.cl',
+ 'src/core/CL/cl_kernels/common/mat_mul_mmul.cl',
+ 'src/core/CL/cl_kernels/common/mat_mul_quantized.cl',
+ 'src/core/CL/cl_kernels/common/mat_mul_quantized_mmul.cl',
+ 'src/core/CL/cl_kernels/common/mean_stddev_normalization.cl',
+ 'src/core/CL/cl_kernels/common/memset.cl',
+ 'src/core/CL/cl_kernels/common/minmax_layer.cl',
+ 'src/core/CL/cl_kernels/common/nonmax.cl',
+ 'src/core/CL/cl_kernels/common/pad_layer.cl',
+ 'src/core/CL/cl_kernels/common/permute.cl',
+ 'src/core/CL/cl_kernels/common/pixelwise_mul_float.cl',
+ 'src/core/CL/cl_kernels/common/pixelwise_mul_int.cl',
+ 'src/core/CL/cl_kernels/common/qlstm_layer_normalization.cl',
+ 'src/core/CL/cl_kernels/common/quantization_layer.cl',
+ 'src/core/CL/cl_kernels/common/range.cl',
+ 'src/core/CL/cl_kernels/common/reduction_operation.cl',
+ 'src/core/CL/cl_kernels/common/reshape_layer.cl',
+ 'src/core/CL/cl_kernels/common/reverse.cl',
+ 'src/core/CL/cl_kernels/common/roi_align_layer.cl',
+ 'src/core/CL/cl_kernels/common/roi_align_layer_quantized.cl',
+ 'src/core/CL/cl_kernels/common/roi_pooling_layer.cl',
+ 'src/core/CL/cl_kernels/common/select.cl',
+ 'src/core/CL/cl_kernels/common/slice_ops.cl',
+ 'src/core/CL/cl_kernels/common/softmax_layer.cl',
+ 'src/core/CL/cl_kernels/common/stack_layer.cl',
+ 'src/core/CL/cl_kernels/common/tile.cl',
+ 'src/core/CL/cl_kernels/common/transpose.cl',
+ 'src/core/CL/cl_kernels/common/unpooling_layer.cl'
+ ]
+
+ # NCHW kernels
+ cl_files_nchw = ['src/core/CL/cl_kernels/nchw/batch_to_space.cl',
+ 'src/core/CL/cl_kernels/nchw/batchnormalization_layer.cl',
+ 'src/core/CL/cl_kernels/nchw/channel_shuffle.cl',
+ 'src/core/CL/cl_kernels/nchw/depth_to_space.cl',
+ 'src/core/CL/cl_kernels/nchw/direct_convolution.cl',
+ 'src/core/CL/cl_kernels/nchw/dequantization_layer.cl',
+ 'src/core/CL/cl_kernels/nchw/im2col.cl',
+ 'src/core/CL/cl_kernels/nchw/normalization_layer.cl',
+ 'src/core/CL/cl_kernels/nchw/normalize_planar_yuv_layer.cl',
+ 'src/core/CL/cl_kernels/nchw/normalize_planar_yuv_layer_quantized.cl',
+ 'src/core/CL/cl_kernels/nchw/pooling_layer.cl',
+ 'src/core/CL/cl_kernels/nchw/prior_box_layer.cl',
+ 'src/core/CL/cl_kernels/nchw/reorg_layer.cl',
+ 'src/core/CL/cl_kernels/nchw/scale.cl',
+ 'src/core/CL/cl_kernels/nchw/space_to_batch.cl',
+ 'src/core/CL/cl_kernels/nchw/space_to_depth.cl',
+ 'src/core/CL/cl_kernels/nchw/upsample_layer.cl',
+ 'src/core/CL/cl_kernels/nchw/winograd_filter_transform.cl',
+ 'src/core/CL/cl_kernels/nchw/winograd_input_transform.cl',
+ 'src/core/CL/cl_kernels/nchw/winograd_output_transform.cl'
+ ]
+
+ # NHWC kernels
+ cl_files_nhwc = ['src/core/CL/cl_kernels/nhwc/batch_to_space.cl',
+ 'src/core/CL/cl_kernels/nhwc/batchnormalization_layer.cl',
+ 'src/core/CL/cl_kernels/nhwc/channel_shuffle.cl',
+ 'src/core/CL/cl_kernels/nhwc/direct_convolution.cl',
+ 'src/core/CL/cl_kernels/nhwc/direct_convolution3d.cl',
+ 'src/core/CL/cl_kernels/nhwc/depth_to_space.cl',
+ 'src/core/CL/cl_kernels/nhwc/dequantization_layer.cl',
+ 'src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl',
+ 'src/core/CL/cl_kernels/nhwc/dwc_native_quantized_nhwc.cl',
+ 'src/core/CL/cl_kernels/nhwc/im2col.cl',
+ 'src/core/CL/cl_kernels/nhwc/indirect_convolution.cl',
+ 'src/core/CL/cl_kernels/nhwc/normalization_layer.cl',
+ 'src/core/CL/cl_kernels/nhwc/normalize_planar_yuv_layer.cl',
+ 'src/core/CL/cl_kernels/nhwc/normalize_planar_yuv_layer_quantized.cl',
+ 'src/core/CL/cl_kernels/nhwc/pooling_layer.cl',
+ 'src/core/CL/cl_kernels/nhwc/pooling_3d_layer.cl',
+ 'src/core/CL/cl_kernels/nhwc/pooling_3d_layer_quantized.cl',
+ 'src/core/CL/cl_kernels/nhwc/pooling_layer_quantized.cl',
+ 'src/core/CL/cl_kernels/nhwc/reorg_layer.cl',
+ 'src/core/CL/cl_kernels/nhwc/scale.cl',
+ 'src/core/CL/cl_kernels/nhwc/space_to_batch.cl',
+ 'src/core/CL/cl_kernels/nhwc/space_to_depth.cl',
+ 'src/core/CL/cl_kernels/nhwc/transposed_convolution.cl',
+ 'src/core/CL/cl_kernels/nhwc/upsample_layer.cl',
+ 'src/core/CL/cl_kernels/nhwc/winograd_filter_transform.cl',
+ 'src/core/CL/cl_kernels/nhwc/winograd_input_transform.cl',
+ 'src/core/CL/cl_kernels/nhwc/winograd_output_transform.cl'
+ ]
+
+ cl_files = cl_helper_files + cl_files_common + cl_files_nchw + cl_files_nhwc
+
+ embed_files = [ f+"embed" for f in cl_files ]
arm_compute_env.Append(CPPPATH =[Dir("./src/core/CL/").path] )
generate_embed.append(arm_compute_env.Command(embed_files, cl_files, action=resolve_includes))
@@ -175,187 +541,224 @@ arm_compute_env.Append(CPPDEFINES = [('ARM_COMPUTE_VERSION_MAJOR', LIBRARY_VERSI
# Don't allow undefined references in the libraries:
undefined_flag = '-Wl,-undefined,error' if 'macos' in arm_compute_env["os"] else '-Wl,--no-undefined'
-arm_compute_env.Append(LINKFLAGS=[undefined_flag])
+if not env['thread_sanitizer'] and not env['address_sanitizer'] and not env['undefined_sanitizer']:
+ arm_compute_env.Append(LINKFLAGS=[undefined_flag])
+
arm_compute_env.Append(CPPPATH =[Dir("./src/core/").path] )
-arm_compute_env.Append(LIBS = ['dl'])
+if env['os'] != 'openbsd':
+ if env['os'] == 'windows':
+ arm_compute_env.Append(LIBS = [])
+ else:
+ arm_compute_env.Append(LIBS = ['dl'])
+
+
+# Load build definitions file
+with (open(Dir('#').path + '/filedefs.json')) as fd:
+ filedefs = json.load(fd)
+ filedefs = filedefs['cpu']['arch']
+
with (open(Dir('#').path + '/filelist.json')) as fp:
filelist = json.load(fp)
-core_files = Glob('src/core/*.cpp')
-core_files += Glob('src/core/CPP/*.cpp')
-core_files += Glob('src/core/CPP/kernels/*.cpp')
-core_files += Glob('src/core/helpers/*.cpp')
-core_files += Glob('src/core/utils/*.cpp')
-core_files += Glob('src/core/utils/helpers/*.cpp')
-core_files += Glob('src/core/utils/io/*.cpp')
-core_files += Glob('src/core/utils/quantization/*.cpp')
-core_files += Glob('src/core/utils/misc/*.cpp')
-if env["logging"]:
- core_files += Glob('src/core/utils/logging/*.cpp')
+# Common backend files
+lib_files = filelist['common']
-runtime_files = Glob('src/runtime/*.cpp')
-runtime_files += Glob('src/runtime/CPP/ICPPSimpleFunction.cpp')
-runtime_files += Glob('src/runtime/CPP/functions/*.cpp')
+# Fixed format GEMM kernels.
+if env['fixed_format_kernels']:
+ arm_compute_env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS'])
-# C API files
-runtime_files += filelist['c_api']['cpu']
+# Experimental files
+# Dynamic fusion
+if env['experimental_dynamic_fusion']:
+ lib_files += filelist['experimental']['dynamic_fusion']['common']
+ lib_files += filelist['experimental']['dynamic_fusion']['ckw_driver']
-if env['opencl']:
- runtime_files += filelist['c_api']['gpu']
+# Logging files
+if env["logging"]:
+ lib_files += filelist['logging']
-# Common backend files
-core_files += filelist['common']
+# C API files
+lib_files += filelist['c_api']['common']
+lib_files += filelist['c_api']['operators']
-runtime_files += Glob('src/runtime/CPP/SingleThreadScheduler.cpp')
+# Scheduler infrastructure
+lib_files += filelist['scheduler']['single']
+if env['cppthreads']:
+ lib_files += filelist['scheduler']['threads']
+if env['openmp']:
+ lib_files += filelist['scheduler']['omp']
+# Graph files
graph_files = Glob('src/graph/*.cpp')
graph_files += Glob('src/graph/*/*.cpp')
-if env['cppthreads']:
- runtime_files += Glob('src/runtime/CPP/CPPScheduler.cpp')
+# Specify user-defined priority operators
+custom_operators = []
+custom_types = []
+custom_layouts = []
-if env['openmp']:
- runtime_files += Glob('src/runtime/OMP/OMPScheduler.cpp')
+use_custom_ops = env['high_priority'] or env['build_config']
+
+if env['high_priority']:
+ custom_operators = filelist['high_priority']
+ custom_types = ['all']
+ custom_layouts = ['all']
+
+if env['build_config']:
+ custom_operators, custom_types, custom_layouts = read_build_config_json(env['build_config'])
if env['opencl']:
- cl_kernel_hp_files = ['src/core/gpu/cl/kernels/gemm/ClGemmHelpers.cpp',
- 'src/core/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeBifrost.cpp',
- 'src/core/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeMidgard.cpp',
- 'src/core/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeValhall.cpp',
- 'src/core/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedBifrost.cpp',
- 'src/core/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedValhall.cpp',
- 'src/core/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyBifrost.cpp',
- 'src/core/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.cpp',
- ]
- core_files += cl_kernel_hp_files
- core_files += Glob('src/core/CL/*.cpp')
- core_files += Glob('src/core/gpu/cl/*.cpp')
-
- runtime_files += Glob('src/runtime/CL/*.cpp')
- runtime_files += Glob('src/runtime/CL/functions/*.cpp')
- runtime_files += Glob('src/runtime/CL/gemm/*.cpp')
- runtime_files += Glob('src/runtime/CL/tuners/*.cpp')
- runtime_files += Glob('src/runtime/gpu/cl/*.cpp')
- runtime_files += Glob('src/runtime/gpu/cl/operators/*.cpp')
- runtime_files += Glob('src/runtime/CL/mlgo/*.cpp')
- runtime_files += Glob('src/runtime/CL/gemm_auto_heuristics/*.cpp')
-
- runtime_files += Glob('src/gpu/cl/*.cpp')
+ lib_files += filelist['c_api']['gpu']
+ lib_files += filelist['gpu']['common']
+
+ cl_operators = custom_operators if use_custom_ops else filelist['gpu']['operators'].keys()
+ cl_ops_to_build = resolve_operator_dependencies(filelist, cl_operators, 'gpu')
+ lib_files += get_operator_backend_files(filelist, cl_ops_to_build, 'gpu')['common']
+
graph_files += Glob('src/graph/backends/CL/*.cpp')
- core_files += filelist['gpu']['core']['kernels']['high_priority'] + filelist['gpu']['core']['kernels']['all']
-sve_o = []
-core_files_sve = []
-if env['neon']:
- core_files += Glob('src/core/NEON/*.cpp')
- core_files += Glob('src/core/NEON/kernels/*.cpp')
- core_files += Glob('src/core/NEON/kernels/assembly/*.cpp')
+lib_files_sve = []
+lib_files_sve2 = []
- core_files += Glob('src/core/NEON/kernels/arm_gemm/*.cpp')
- core_files += Glob('src/core/NEON/kernels/arm_conv/*.cpp')
- core_files += Glob('src/core/NEON/kernels/arm_conv/pooling/*.cpp')
- core_files += Glob('src/core/NEON/kernels/arm_conv/pooling/kernels/cpp_*/*.cpp')
+# the variables below are used for the multi_isa builds
+# please note that the variables names without the _fp16 suffix
+# do not hold any fp16 files.
+misa_lib_files = lib_files
+misa_lib_files_sve = []
+misa_lib_files_sve2 = []
+misa_lib_files_neon_fp16 = []
+misa_lib_files_sve_fp16 = []
+misa_lib_files_sve2_fp16 = []
+
+if env['neon']:
# build winograd/depthwise sources for either v7a / v8a
- core_files += Glob('src/core/NEON/kernels/convolution/*/*.cpp')
- core_files += Glob('src/core/NEON/kernels/convolution/winograd/*/*.cpp')
- arm_compute_env.Append(CPPPATH = ["src/core/NEON/kernels/convolution/common/",
+ arm_compute_env.Append(CPPPATH = ["src/core/NEON/kernels/arm_gemm",
+ "src/core/NEON/kernels/convolution/common/",
"src/core/NEON/kernels/convolution/winograd/",
- "src/core/NEON/kernels/convolution/depthwise/",
+ "src/core/NEON/kernels/arm_conv/depthwise/",
+ "src/core/NEON/kernels/arm_conv/pooling/",
+ "src/core/NEON/kernels/arm_conv/",
"src/core/NEON/kernels/assembly/",
"arm_compute/core/NEON/kernels/assembly/",
- "src/core/cpu/kernels/assembly/",])
+ "src/cpu/kernels/assembly/"])
+
+ # Setup SIMD file list to include
+ simd = ['neon']
+ if env['multi_isa']:
+ simd += ['sve', 'sve2']
+ else:
+ if 'sve' in env['arch']: simd += ['sve']
+ if 'sve2' in env['arch']: simd += ['sve2']
+
+ # Get attributes
+ if(use_custom_ops):
+ attrs = get_attrs_list(env, custom_types, custom_layouts)
+ else:
+ attrs = get_attrs_list(env, env['data_type_support'], env['data_layout_support'])
+
+ if env['fixed_format_kernels']:
+ attrs.append("fixed_format_kernels")
+
+ # Setup data-type and data-layout files to include
+ cpu_operators = custom_operators if use_custom_ops else filelist['cpu']['operators'].keys()
+ cpu_ops_to_build = resolve_operator_dependencies(filelist, cpu_operators, 'cpu')
+
+ if env['multi_isa']:
+ misa_lib_files += filelist['cpu']['common']
+
+ # For multi_isa builds we need to build fp16 files for armv8.2-a+fp16 so we filter them out of cpu_files removing the attribute fp16
+ attrs.remove('fp16')
+ cpu_files = get_operator_backend_files(filelist, cpu_ops_to_build, 'cpu', simd, attrs)
+
+ # Shared among ALL CPU files
+ misa_lib_files += cpu_files.get('common', [])
+
+ # Arm® Neon™ specific files
+ misa_lib_files += cpu_files.get('neon', [])
+
+ # Get all the fp16 files
+ fp16_cpu_files = get_operator_backend_files(filelist, cpu_ops_to_build, 'cpu', simd, ['fp16'],False)
+
+ misa_lib_files_neon_fp16 = fp16_cpu_files.get('neon',[])
+ misa_lib_files_sve_fp16 = fp16_cpu_files.get('sve',[])
+ misa_lib_files_sve2_fp16 = fp16_cpu_files.get('sve2',[])
+
+ # SVE files only minus FP16
+ misa_lib_files_sve = cpu_files.get('sve', [])
+
+ # SVE2 files only minus FP16
+ misa_lib_files_sve2 = cpu_files.get('sve2', [])
+ else:
+ lib_files += filelist['cpu']['common']
+
+ # Non multi_isa build
+ cpu_files = get_operator_backend_files(filelist, cpu_ops_to_build, 'cpu', simd, attrs)
+
+ # Shared among ALL CPU files
+ lib_files += cpu_files.get('common', [])
+
+ # Arm® Neon™ specific files
+ lib_files += cpu_files.get('neon', [])
+
+ lib_files_sve = cpu_files.get('sve', [])
+
+ lib_files_sve2 = cpu_files.get('sve2', [])
graph_files += Glob('src/graph/backends/NEON/*.cpp')
- if env['estate'] == '32':
- core_files += Glob('src/core/NEON/kernels/arm_gemm/kernels/a32_*/*.cpp')
-
- if env['estate'] == '64':
- core_files += Glob('src/core/NEON/kernels/arm_gemm/kernels/a64_*/*.cpp')
- core_files += Glob('src/core/NEON/kernels/arm_conv/pooling/kernels/a64_*/*.cpp')
- if "sve" in env['arch'] or env['fat_binary']:
- core_files_sve += filelist['cpu']['core']['sve']['all']
- core_files_sve += Glob('src/core/NEON/kernels/arm_gemm/kernels/sve_*/*.cpp')
- core_files_sve += Glob('src/core/NEON/kernels/arm_conv/pooling/kernels/sve_*/*.cpp')
-
- if any(i in env['data_layout_support'] for i in ['all', 'nchw']):
- core_files += filelist['cpu']['core']['neon']['nchw']
-
- if any(i in env['data_type_support'] for i in ['all', 'fp16']):
- if not "sve" in env['arch'] or env['fat_binary']:
- core_files += filelist['cpu']['core']['neon']['fp16']
- if "sve" in env['arch'] or env['fat_binary']:
- core_files_sve += filelist['cpu']['core']['sve']['fp16']
- if any(i in env['data_type_support'] for i in ['all', 'fp32']):
- if not "sve" in env['arch'] or env['fat_binary']:
- core_files += filelist['cpu']['core']['neon']['fp32']
- if "sve" in env['arch'] or env['fat_binary']:
- core_files_sve += filelist['cpu']['core']['sve']['fp32']
- if any(i in env['data_type_support'] for i in ['all', 'qasymm8']):
- core_files += filelist['cpu']['core']['neon']['qasymm8']
- core_files_sve += filelist['cpu']['core']['sve']['qasymm8']
- if any(i in env['data_type_support'] for i in ['all', 'qasymm8_signed']):
- core_files += filelist['cpu']['core']['neon']['qasymm8_signed']
- core_files_sve += filelist['cpu']['core']['sve']['qasymm8_signed']
- if any(i in env['data_type_support'] for i in ['all', 'qsymm16']):
- core_files += filelist['cpu']['core']['neon']['qsymm16']
- core_files_sve += filelist['cpu']['core']['sve']['qsymm16']
- if any(i in env['data_type_support'] for i in ['all', 'integer']):
- if not "sve" in env['arch'] or env['fat_binary']:
- core_files += filelist['cpu']['core']['neon']['integer']
- if "sve" in env['arch'] or env['fat_binary']:
- core_files_sve += filelist['cpu']['core']['sve']['integer']
-
- core_files += Glob('src/core/cpu/kernels/*/*.cpp')
- core_files += filelist['cpu']['core']['kernels']['high_priority'] + filelist['cpu']['core']['kernels']['all']
-
- runtime_files += Glob('src/runtime/NEON/*.cpp')
- runtime_files += Glob('src/runtime/NEON/functions/*.cpp')
- runtime_files += Glob('src/runtime/NEON/functions/assembly/*.cpp')
- runtime_files += filelist['cpu']['runtime']['all'] + filelist['cpu']['runtime']['operators']['high_priority'] \
- + filelist['cpu']['runtime']['operators']['all'] + filelist['cpu']['runtime']['operators']['internal']
+# Restrict from building graph API if a reduced operator list has been provided
+if use_custom_ops:
+ print("WARNING: Graph library requires all operators to be built")
+ graph_files = []
+# Build bootcode in case of bare-metal
bootcode_o = []
if env['os'] == 'bare_metal':
bootcode_files = Glob('bootcode/*.s')
bootcode_o = build_bootcode_objs(bootcode_files)
Export('bootcode_o')
-if (env['fat_binary']):
- sve_o = build_sve_objs(core_files_sve)
- arm_compute_core_a = build_library('arm_compute_core-static', arm_compute_env, core_files + sve_o, static=True)
+
+if (env['multi_isa']):
+ lib_static_objs, lib_shared_objs = build_multiisa_lib_objects()
+ # STATIC library build.
+ arm_compute_a = build_library('arm_compute-static', arm_compute_env, lib_static_objs, static=True)
else:
- arm_compute_core_a = build_library('arm_compute_core-static', arm_compute_env, core_files + core_files_sve, static=True)
-Export('arm_compute_core_a')
+ if 'sve2' in env['arch']:
+ lib_files += lib_files_sve
+ lib_files += lib_files_sve2
+ elif 'sve' in env['arch']:
+ lib_files += lib_files_sve
-if env['os'] != 'bare_metal' and not env['standalone']:
- if (env['fat_binary']):
- arm_compute_core_so = build_library('arm_compute_core', arm_compute_env, core_files + sve_o, static=False)
- else:
- arm_compute_core_so = build_library('arm_compute_core', arm_compute_env, core_files + core_files_sve, static=False)
- Export('arm_compute_core_so')
+ arm_compute_a = build_library('arm_compute-static', arm_compute_env, lib_files, static=True)
-arm_compute_a = build_library('arm_compute-static', arm_compute_env, runtime_files, static=True, libs = [ arm_compute_core_a ])
Export('arm_compute_a')
+# SHARED library build.
if env['os'] != 'bare_metal' and not env['standalone']:
- arm_compute_so = build_library('arm_compute', arm_compute_env, runtime_files, static=False, libs = [ "arm_compute_core" ])
- Depends(arm_compute_so, arm_compute_core_so)
+ if (env['multi_isa']):
+
+ arm_compute_so = build_library('arm_compute', arm_compute_env, lib_shared_objs, static=False)
+ else:
+ arm_compute_so = build_library('arm_compute', arm_compute_env, lib_files, static=False)
+
Export('arm_compute_so')
+
arm_compute_graph_env = arm_compute_env.Clone()
+# Build graph libraries
arm_compute_graph_env.Append(CXXFLAGS = ['-Wno-redundant-move', '-Wno-pessimizing-move'])
-arm_compute_graph_a = build_library('arm_compute_graph-static', arm_compute_graph_env, graph_files, static=True, libs = [ arm_compute_a])
+arm_compute_graph_a = build_library('arm_compute_graph-static', arm_compute_graph_env, graph_files, static=True)
Export('arm_compute_graph_a')
if env['os'] != 'bare_metal' and not env['standalone']:
- arm_compute_graph_so = build_library('arm_compute_graph', arm_compute_graph_env, graph_files, static=False, libs = [ "arm_compute" , "arm_compute_core"])
+ arm_compute_graph_so = build_library('arm_compute_graph', arm_compute_graph_env, graph_files, static=False, libs = [ "arm_compute" ])
Depends(arm_compute_graph_so, arm_compute_so)
Export('arm_compute_graph_so')
@@ -367,6 +770,6 @@ else:
Default(alias)
if env['standalone']:
- Depends([alias,arm_compute_core_a], generate_embed)
+ Depends([alias], generate_embed)
else:
- Depends([alias,arm_compute_core_so, arm_compute_core_a], generate_embed)
+ Depends([alias], generate_embed)