aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFreddie Liardet <frederick.liardet@arm.com>2021-09-21 12:36:43 +0100
committerFreddie Liardet <frederick.liardet@arm.com>2021-10-18 11:13:39 +0000
commit487d390b94ee93e1d89f5066ef8ca9442ab0a590 (patch)
tree4ecd1d31ee118a6e708dbff33df12cbcb6e58908
parent841c3e9f68b1954828a9137d6780b6d021354eb7 (diff)
downloadComputeLibrary-487d390b94ee93e1d89f5066ef8ca9442ab0a590.tar.gz
Add user provided JSON operator list build
Allow ACL to be built via a user provided JSON file containing operators, data types and data layouts. Modify TFLite file to JSON file script to output data layouts. Fix build issue with "fat_binary" and "high_priority" options. Resolves: COMPMID-4697, COMPMID-4837 Signed-off-by: Freddie Liardet <frederick.liardet@arm.com> Change-Id: I08d494151c98f804325707ffd922ffe216813023 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6427 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
-rw-r--r--SConscript95
-rw-r--r--SConstruct82
-rw-r--r--filelist.json459
-rwxr-xr-x[-rw-r--r--]python/scripts/report-model-ops/report_model_ops.py63
-rw-r--r--python/scripts/utils/model_identification.py4
-rw-r--r--python/scripts/utils/tflite_helpers.py44
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp2
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp2
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp2
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp2
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp3
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp2
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_generic_output9_mla_depthfirst/generic.cpp2
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp2
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp2
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp2
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp2
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp2
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp2
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp2
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp2
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp2
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp2
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp2
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp2
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp2
26 files changed, 399 insertions, 389 deletions
diff --git a/SConscript b/SConscript
index 6672caee9f..bcb93fde62 100644
--- a/SConscript
+++ b/SConscript
@@ -157,31 +157,28 @@ def create_version_file(target, source, env):
fd.write(build_info)
-def get_attrs_list(arch, estate, data_types, data_layouts):
+def get_attrs_list(env, data_types, data_layouts):
attrs = []
# Manage data-types
- if any(i in data_types for i in ['all']):
+ if 'all' in data_types:
attrs += ['fp16', 'fp32', 'integer', 'qasymm8', 'qasymm8_signed', 'qsymm16']
else:
- if any(i in data_types for i in ['fp16']): attrs += ['fp16']
- if any(i in data_types for i in ['fp32']): attrs += ['fp32']
- if any(i in data_types for i in ['integer']): attrs += ['integer']
- if any(i in data_types for i in ['qasymm8']): attrs += ['qasymm8']
- if any(i in data_types for i in ['qasymm8_signed']): attrs += ['qasymm8_signed']
- if any(i in data_types for i in ['qsymm16']): attrs += ['qsymm16']
-
+ if 'fp16' in data_types: attrs += ['fp16']
+ if 'fp32' in data_types: attrs += ['fp32']
+ if 'integer' in data_types: attrs += ['integer']
+ if 'qasymm8' in data_types: attrs += ['qasymm8']
+ if 'qasymm8_signed' in data_types: attrs += ['qasymm8_signed']
+ if 'qsymm16' in data_types: attrs += ['qsymm16']
# Manage data-layouts
- if any(i in data_layouts for i in ['all']):
+ if 'all' in data_layouts:
attrs += ['nhwc', 'nchw']
else:
- if any(i in data_layouts for i in ['nhwc']): attrs += ['nhwc']
- if any(i in data_layouts for i in ['nchw']): attrs += ['nchw']
+ if 'nhwc' in data_layouts: attrs += ['nhwc']
+ if 'nchw' in data_layouts: attrs += ['nchw']
# Manage execution state
- estate_attr = 'estate32' if (estate == 'auto' and 'v7a' in arch) or '32' in estate else 'estate64'
- attrs += [ estate_attr ]
-
+ attrs += ['estate32' if (env['estate'] == 'auto' and 'v7a' in env['arch']) or '32' in env['estate'] else 'estate64']
return attrs
@@ -237,6 +234,27 @@ def resolve_operator_dependencies(filelist, operators, backend=''):
return resolved_operators
+def read_build_config_json(build_config):
+ build_config_contents = {}
+ custom_operators = []
+ custom_types = []
+ custom_layouts = []
+ if os.path.isfile(build_config):
+ with open(build_config) as f:
+ try:
+ build_config_contents = json.load(f)
+ except:
+ print("Warning: Build configuration file is of invalid JSON format!")
+ else:
+ try:
+ build_config_contents = json.loads(build_config)
+ except:
+ print("Warning: Build configuration string is of invalid JSON format!")
+ if build_config_contents:
+ custom_operators = build_config_contents.get("operators", [])
+ custom_types = build_config_contents.get("data_types", [])
+ custom_layouts = build_config_contents.get("data_layouts", [])
+ return custom_operators, custom_types, custom_layouts
arm_compute_env = env.Clone()
version_file = arm_compute_env.Command("src/core/arm_compute_version.embed", "", action=create_version_file)
@@ -427,30 +445,25 @@ graph_files = Glob('src/graph/*.cpp')
graph_files += Glob('src/graph/*/*.cpp')
# Specify user-defined priority operators
-use_priority_ops = env['high_priority']
-priority_operators = filelist['high_priority']
-if env['build_config'] != "":
- build_config = env['build_config']
- build_config_contents = {}
- if os.path.isfile(build_config):
- with open(build_config) as f:
- try:
- build_config_contents = json.load(f)
- except:
- print("Warning: Build configuration file is of invalid JSON format!")
- else:
- try:
- build_config_contents = json.loads(build_config)
- except:
- print("Warning: Build configuration string is of invalid JSON format!")
- if build_config_contents:
- priority_operators = build_config_contents.get("operators", [])
+custom_operators = []
+custom_types = []
+custom_layouts = []
+
+use_custom_ops = env['high_priority'] or env['build_config'];
+
+if env['high_priority']:
+ custom_operators = filelist['high_priority']
+ custom_types = ['all']
+ custom_layouts = ['all']
+
+if env['build_config']:
+ custom_operators, custom_types, custom_layouts = read_build_config_json(env['build_config'])
if env['opencl']:
lib_files += filelist['c_api']['gpu']
lib_files += filelist['gpu']['common']
- cl_operators = priority_operators if use_priority_ops else filelist['gpu']['operators'].keys()
+ cl_operators = custom_operators if use_custom_ops else filelist['gpu']['operators'].keys()
cl_ops_to_build = resolve_operator_dependencies(filelist, cl_operators, 'gpu')
lib_files += get_operator_backend_files(filelist, cl_ops_to_build, 'gpu')['common']
@@ -475,11 +488,15 @@ if env['neon']:
if 'sve' not in env['arch'] or env['fat_binary']: simd += ['neon']
# Get attributes
- attrs = get_attrs_list(env['arch'], env['estate'], env['data_type_support'], env['data_layout_support'])
+ if(use_custom_ops):
+ attrs = get_attrs_list(env, custom_types, custom_layouts)
+ else:
+ attrs = get_attrs_list(env, env['data_type_support'], env['data_layout_support'])
# Setup data-type and data-layout files to include
- cpu_operators = priority_operators if use_priority_ops else filelist['cpu']['operators'].keys()
- cpu_ops_to_build = resolve_operator_dependencies(filelist, filelist['cpu']['operators'], 'cpu')
+ cpu_operators = custom_operators if use_custom_ops else filelist['cpu']['operators'].keys()
+ cpu_ops_to_build = resolve_operator_dependencies(filelist, cpu_operators, 'cpu')
+
cpu_files = get_operator_backend_files(filelist, cpu_ops_to_build, 'cpu', simd, attrs)
lib_files += cpu_files.get('common', [])
lib_files += cpu_files.get('neon', [])
@@ -488,8 +505,8 @@ if env['neon']:
graph_files += Glob('src/graph/backends/NEON/*.cpp')
# Restrict from building graph API if a reduced operator list has been provided
-if use_priority_ops:
- print("Graph library requires all operators to be built")
+if use_custom_ops:
+ print("WARNING: Graph library requires all operators to be built")
graph_files = []
# Build bootcode in case of bare-metal
diff --git a/SConstruct b/SConstruct
index 7591075cd1..400228c71a 100644
--- a/SConstruct
+++ b/SConstruct
@@ -41,6 +41,50 @@ def version_at_least(version, required):
return True
+def read_build_config_json(build_config):
+ build_config_contents = {}
+ custom_types = []
+ custom_layouts = []
+ if os.path.isfile(build_config):
+ with open(build_config) as f:
+ try:
+ build_config_contents = json.load(f)
+ except:
+ print("Warning: Build configuration file is of invalid JSON format!")
+ else:
+ try:
+ build_config_contents = json.loads(build_config)
+ except:
+ print("Warning: Build configuration string is of invalid JSON format!")
+ if build_config_contents:
+ custom_types = build_config_contents.get("data_types", [])
+ custom_layouts = build_config_contents.get("data_layouts", [])
+ return custom_types, custom_layouts
+
+def update_data_type_layout_flags(env, data_types, data_layouts):
+ # Manage data-types
+ if any(i in data_types for i in ['all', 'fp16']):
+ env.Append(CXXFLAGS = ['-DENABLE_FP16_KERNELS'])
+ if any(i in data_types for i in ['all', 'fp32']):
+ env.Append(CXXFLAGS = ['-DENABLE_FP32_KERNELS'])
+ if any(i in data_types for i in ['all', 'qasymm8']):
+ env.Append(CXXFLAGS = ['-DENABLE_QASYMM8_KERNELS'])
+ if any(i in data_types for i in ['all', 'qasymm8_signed']):
+ env.Append(CXXFLAGS = ['-DENABLE_QASYMM8_SIGNED_KERNELS'])
+ if any(i in data_types for i in ['all', 'qsymm16']):
+ env.Append(CXXFLAGS = ['-DENABLE_QSYMM16_KERNELS'])
+ if any(i in data_types for i in ['all', 'integer']):
+ env.Append(CXXFLAGS = ['-DENABLE_INTEGER_KERNELS'])
+
+ # Manage data-layouts
+ if any(i in data_layouts for i in ['all', 'nhwc']):
+ env.Append(CXXFLAGS = ['-DENABLE_NHWC_KERNELS'])
+ if any(i in data_layouts for i in ['all', 'nchw']):
+ env.Append(CXXFLAGS = ['-DENABLE_NCHW_KERNELS'])
+
+ return env
+
+
vars = Variables("scons")
vars.AddVariables(
BoolVariable("debug", "Debug", False),
@@ -327,25 +371,20 @@ if env['high_priority'] and env['build_config']:
if not env['high_priority'] and not env['build_config']:
env.Append(CPPDEFINES = ['ARM_COMPUTE_GRAPH_ENABLED'])
-if env['data_type_support']:
- if any(i in env['data_type_support'] for i in ['all', 'fp16']):
- env.Append(CXXFLAGS = ['-DENABLE_FP16_KERNELS'])
- if any(i in env['data_type_support'] for i in ['all', 'fp32']):
- env.Append(CXXFLAGS = ['-DENABLE_FP32_KERNELS'])
- if any(i in env['data_type_support'] for i in ['all', 'qasymm8']):
- env.Append(CXXFLAGS = ['-DENABLE_QASYMM8_KERNELS'])
- if any(i in env['data_type_support'] for i in ['all', 'qasymm8_signed']):
- env.Append(CXXFLAGS = ['-DENABLE_QASYMM8_SIGNED_KERNELS'])
- if any(i in env['data_type_support'] for i in ['all', 'qsymm16']):
- env.Append(CXXFLAGS = ['-DENABLE_QSYMM16_KERNELS'])
- if any(i in env['data_type_support'] for i in ['all', 'integer']):
- env.Append(CXXFLAGS = ['-DENABLE_INTEGER_KERNELS'])
+data_types = []
+data_layouts = []
-if env['data_layout_support']:
- if any(i in env['data_layout_support'] for i in ['all', 'nhwc']):
- env.Append(CXXFLAGS = ['-DENABLE_NHWC_KERNELS'])
- if any(i in env['data_layout_support'] for i in ['all', 'nchw']):
- env.Append(CXXFLAGS = ['-DENABLE_NCHW_KERNELS'])
+# Set correct data types / layouts to build
+if env['high_priority']:
+ data_types = ['all']
+ data_layouts = ['all']
+elif env['build_config']:
+ data_types, data_layouts = read_build_config_json(env['build_config'])
+else:
+ data_types = env['data_type_support']
+ data_layouts = env['data_layout_support']
+
+env = update_data_type_layout_flags(env, data_types, data_layouts)
if env['standalone']:
env.Append(CXXFLAGS = ['-fPIC'])
@@ -417,6 +456,10 @@ Export('version_at_least')
SConscript('./SConscript', variant_dir=build_path, duplicate=0)
+if env['examples'] and (env['build_config'] or env['high_priority']):
+ print("WARNING: Building examples for selected operators not supported. Use examples=0")
+ Return()
+
if env['examples'] and env['exceptions']:
if env['os'] == 'bare_metal' and env['arch'] == 'armv7a':
print("WARNING: Building examples for bare metal and armv7a is not supported. Use examples=0")
@@ -424,6 +467,9 @@ if env['examples'] and env['exceptions']:
SConscript('./examples/SConscript', variant_dir='%s/examples' % build_path, duplicate=0)
if env['exceptions']:
+ if env['build_config'] or env['high_priority']:
+ print("WARNING: Building tests for selected operators not supported")
+ Return()
if env['os'] == 'bare_metal' and env['arch'] == 'armv7a':
print("WARNING: Building tests for bare metal and armv7a is not supported")
Return()
diff --git a/filelist.json b/filelist.json
index e52b7c824c..bcc7ecb37a 100644
--- a/filelist.json
+++ b/filelist.json
@@ -845,21 +845,21 @@
"common": [
"src/cpu/operators/CpuActivation.cpp",
"src/cpu/kernels/CpuActivationKernel.cpp",
- "src/runtime/NEON/functions/NEActivationLayer.cpp"
+ "src/runtime/NEON/functions/NEActivationLayer.cpp",
+ "src/cpu/kernels/activation/neon/qasymm8.cpp",
+ "src/cpu/kernels/activation/neon/qasymm8_signed.cpp",
+ "src/cpu/kernels/activation/neon/qsymm16.cpp"
],
"neon": {
"fp16": [ "src/cpu/kernels/activation/neon/fp16.cpp" ],
- "fp32": [ "src/cpu/kernels/activation/neon/fp32.cpp" ],
- "qasymm8": [ "src/cpu/kernels/activation/neon/qasymm8.cpp" ],
- "qasymm8_signed": [ "src/cpu/kernels/activation/neon/qasymm8_signed.cpp" ],
- "qsymm16": [ "src/cpu/kernels/activation/neon/qsymm16.cpp" ]
+ "fp32": [ "src/cpu/kernels/activation/neon/fp32.cpp" ]
},
"sve": {
"fp16": [ "src/cpu/kernels/activation/sve/fp16.cpp" ],
"fp32": [ "src/cpu/kernels/activation/sve/fp32.cpp" ],
- "qasymm8": [ "src/cpu/kernels/activation/neon/qasymm8.cpp", "src/cpu/kernels/activation/sve/qasymm8.cpp" ],
- "qasymm8_signed": [ "src/cpu/kernels/activation/neon/qasymm8_signed.cpp", "src/cpu/kernels/activation/sve/qasymm8_signed.cpp" ],
- "qsymm16": [ "src/cpu/kernels/activation/neon/qsymm16.cpp", "src/cpu/kernels/activation/sve/qsymm16.cpp" ]
+ "qasymm8": [ "src/cpu/kernels/activation/sve/qasymm8.cpp" ],
+ "qasymm8_signed": [ "src/cpu/kernels/activation/sve/qasymm8_signed.cpp" ],
+ "qsymm16": [ "src/cpu/kernels/activation/sve/qsymm16.cpp" ]
}
}
},
@@ -874,18 +874,16 @@
"common": [
"src/cpu/operators/CpuAdd.cpp",
"src/cpu/kernels/CpuAddKernel.cpp",
- "src/runtime/NEON/functions/NEArithmeticAddition.cpp"
+ "src/runtime/NEON/functions/NEArithmeticAddition.cpp",
+ "src/cpu/kernels/add/neon/qasymm8.cpp",
+ "src/cpu/kernels/add/neon/qasymm8_signed.cpp",
+ "src/cpu/kernels/add/neon/qsymm16.cpp"
],
- "neon": {
- "qasymm8": [ "src/cpu/kernels/add/neon/qasymm8.cpp" ],
- "qasymm8_signed": [ "src/cpu/kernels/add/neon/qasymm8_signed.cpp" ],
- "qsymm16": [ "src/cpu/kernels/add/neon/qsymm16.cpp" ]
- },
"sve": {
"common": [ "src/cpu/kernels/add/sve/impl.cpp" ],
- "qasymm8": [ "src/cpu/kernels/add/neon/qasymm8.cpp", "src/cpu/kernels/add/sve/qasymm8.cpp" ],
- "qasymm8_signed": [ "src/cpu/kernels/add/neon/qasymm8_signed.cpp", "src/cpu/kernels/add/sve/qasymm8_signed.cpp" ],
- "qsymm16": [ "src/cpu/kernels/add/neon/qsymm16.cpp", "src/cpu/kernels/add/sve/qsymm16.cpp" ]
+ "qasymm8": [ "src/cpu/kernels/add/sve/qasymm8.cpp" ],
+ "qasymm8_signed": [ "src/cpu/kernels/add/sve/qasymm8_signed.cpp" ],
+ "qsymm16": [ "src/cpu/kernels/add/sve/qsymm16.cpp" ]
}
}
},
@@ -1103,68 +1101,62 @@
"src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8q.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8s8u8q.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/interleaves/8b_mla.cpp",
- "src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp"
+ "src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_generic_output9_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_generic_output9_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_s8q_3x3_dot.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_u8q_3x3_dot.cpp"
],
- "neon": {
- "estate64": [
- "src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_s8q_3x3_dot.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_u8q_3x3_dot.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_generic_output9_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_generic_output9_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp"
- ]
- },
"sve": {
"common": [
- "src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_8b_mla.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_s8q_3x3_dot.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_u8q_3x3_dot.cpp",
@@ -1209,57 +1201,7 @@
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_s8q_3x3_dot.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_u8q_3x3_dot.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_generic_output9_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_generic_output9_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp"
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp"
]
}
}
@@ -1316,6 +1258,7 @@
}
},
"FFT1D": {
+ "deps": [ "Reduction" ],
"files": {
"common": [
"src/core/NEON/kernels/NEFFTDigitReverseKernel.cpp",
@@ -1385,6 +1328,7 @@
}
},
"Gemm": {
+ "deps": [ "Quantize", "Add"],
"files": {
"common": [
"src/cpu/kernels/CpuConvertQuantizedSignednessKernel.cpp",
@@ -1422,7 +1366,61 @@
"src/core/NEON/kernels/arm_gemm/transform.cpp",
"src/runtime/NEON/functions/NEGEMM.cpp",
"src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp",
- "src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp"
+ "src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s16_8x12/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/a55r1.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/x1.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u16_8x12/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/a55r1.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/x1.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/a55r1.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/x1.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_4x24/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_4x24/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_mmla_4x16/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_mmla_6x16/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_mmla_6x16/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_mmla_4x16/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_mmla_6x16/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_8x12/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_mmla_8x12/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a53.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55r1.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/x1.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x6/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_6x4/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_8x4/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_6x4/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_6x4/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_8x4/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_8x4/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_6x4/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_6x4/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/generic.cpp"
],
"neon": {
"estate32": [
@@ -1431,68 +1429,14 @@
"src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6/generic.cpp"
],
"estate64": [
- "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s16_8x12/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/a55r1.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/x1.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u16_8x12/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/a55r1.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/x1.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/a55r1.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/x1.cpp",
"src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_dot_6x16/generic.cpp",
"src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_mmla_6x16/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/a55.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_4x24/a55.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_4x24/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/a55.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/a55.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/generic.cpp",
"src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_4x24/generic.cpp",
"src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_6x16/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/a55.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_mmla_4x16/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/a55.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_mmla_6x16/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/a55.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_mmla_6x16/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/a55.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_mmla_4x16/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/a55.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_mmla_6x16/generic.cpp",
"src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12/generic.cpp",
"src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12/x1.cpp",
"src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_8x12/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_8x12/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_mmla_8x12/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a53.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55r1.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/x1.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x6/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemv_pretransposed/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_6x4/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_8x4/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_6x4/a55.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_6x4/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_8x4/a55.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_8x4/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_6x4/a55.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_6x4/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/a55.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/generic.cpp"
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemv_pretransposed/generic.cpp"
]
},
"sve": {
@@ -1536,69 +1480,7 @@
"src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp",
"src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp",
"src/core/NEON/kernels/arm_gemm/mergeresults-sve.cpp",
- "src/core/NEON/kernels/arm_gemm/transform-sve.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s16_8x12/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/a55r1.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/x1.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u16_8x12/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/a55r1.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/x1.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/a55r1.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/x1.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_dot_6x16/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_mmla_6x16/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/a55.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_4x24/a55.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_4x24/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/a55.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/a55.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_4x24/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_6x16/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/a55.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_mmla_4x16/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/a55.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_mmla_6x16/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/a55.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_mmla_6x16/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/a55.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_mmla_4x16/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/a55.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_mmla_6x16/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12/x1.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_8x12/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_8x12/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_mmla_8x12/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a53.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55r1.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/x1.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x6/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemv_pretransposed/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_6x4/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_8x4/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_6x4/a55.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_6x4/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_8x4/a55.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_8x4/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_6x4/a55.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_6x4/generic.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/a55.cpp",
- "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/generic.cpp"
+ "src/core/NEON/kernels/arm_gemm/transform-sve.cpp"
]
}
}
@@ -1735,38 +1617,34 @@
"src/core/NEON/kernels/arm_conv/pooling/pooling_s8q.cpp",
"src/core/NEON/kernels/arm_conv/pooling/pooling_u8.cpp",
"src/core/NEON/kernels/arm_conv/pooling/pooling_u8q.cpp",
- "src/runtime/NEON/functions/NEPoolingLayer.cpp"
+ "src/runtime/NEON/functions/NEPoolingLayer.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_avg_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_avg_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_max_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_avg_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_avg_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_max_generic_depthfirst/generic.cpp",
+ "src/cpu/kernels/pool2d/neon/qasymm8.cpp",
+ "src/cpu/kernels/pool2d/neon/qasymm8_signed.cpp"
],
"neon": {
"nchw": [ "src/cpu/kernels/pool2d/neon/nchw/all.cpp" ],
"fp16": [ "src/cpu/kernels/pool2d/neon/fp16.cpp" ],
- "fp32": [ "src/cpu/kernels/pool2d/neon/fp32.cpp" ],
- "qasymm8": [ "src/cpu/kernels/pool2d/neon/qasymm8.cpp" ],
- "qasymm8_signed": [ "src/cpu/kernels/pool2d/neon/qasymm8_signed.cpp" ],
- "estate64": [
- "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_generic_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_generic_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_generic_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_generic_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_avg_generic_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_generic_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_avg_generic_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_max_generic_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_avg_generic_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_generic_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_avg_generic_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_max_generic_depthfirst/generic.cpp"
- ]
+ "fp32": [ "src/cpu/kernels/pool2d/neon/fp32.cpp" ]
},
"sve": {
- "qasymm8": [ "src/cpu/kernels/pool2d/neon/qasymm8.cpp" ],
- "qasymm8_signed": [ "src/cpu/kernels/pool2d/neon/qasymm8_signed.cpp" ],
"common": [
"src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp",
"src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_generic_depthfirst/generic.cpp",
@@ -1785,25 +1663,7 @@
"src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
"src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_generic_depthfirst/generic.cpp",
"src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_avg_generic_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_max_generic_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_generic_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_generic_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_generic_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_generic_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_avg_generic_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_generic_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_avg_generic_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_max_generic_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_avg_generic_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_generic_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_avg_generic_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_max_generic_depthfirst/generic.cpp"
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_max_generic_depthfirst/generic.cpp"
]
}
}
@@ -2002,18 +1862,11 @@
"common": [
"src/cpu/operators/CpuSub.cpp",
"src/cpu/kernels/CpuSubKernel.cpp",
- "src/runtime/NEON/functions/NEArithmeticSubtraction.cpp"
- ],
- "sve": {
- "qasymm8": [ "src/cpu/kernels/sub/neon/qasymm8.cpp" ],
- "qasymm8_signed": [ "src/cpu/kernels/sub/neon/qasymm8_signed.cpp" ],
- "qsymm16": [ "src/cpu/kernels/sub/neon/qsymm16.cpp" ]
- },
- "neon": {
- "qasymm8": [ "src/cpu/kernels/sub/neon/qasymm8.cpp" ],
- "qasymm8_signed": [ "src/cpu/kernels/sub/neon/qasymm8_signed.cpp" ],
- "qsymm16": [ "src/cpu/kernels/sub/neon/qsymm16.cpp" ]
- }
+ "src/runtime/NEON/functions/NEArithmeticSubtraction.cpp",
+ "src/cpu/kernels/sub/neon/qasymm8.cpp",
+ "src/cpu/kernels/sub/neon/qasymm8_signed.cpp",
+ "src/cpu/kernels/sub/neon/qsymm16.cpp"
+ ]
}
},
"Tile": {
diff --git a/python/scripts/report-model-ops/report_model_ops.py b/python/scripts/report-model-ops/report_model_ops.py
index 3888b801e6..1549005da5 100644..100755
--- a/python/scripts/report-model-ops/report_model_ops.py
+++ b/python/scripts/report-model-ops/report_model_ops.py
@@ -31,20 +31,20 @@ import tflite
sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
from utils.model_identification import identify_model_type
-from utils.tflite_helpers import tflite_op2acl, tflite_typecode2name
+from utils.tflite_helpers import tflite_op2acl, tflite_typecode2name, tflite_typecode2aclname
SUPPORTED_MODEL_TYPES = ["tflite"]
logger = logging.getLogger("report_model_ops")
-def get_ops_from_tflite_graph(model):
+def get_ops_types_from_tflite_graph(model):
"""
- Helper function that extract operator related meta-data from a TfLite model
+ Helper function that extract operator related meta-data from a TFLite model
Parameters
----------
model: str
- Respective TfLite model to analyse
+ Respective TFLite model to analyse
Returns
----------
@@ -52,7 +52,7 @@ def get_ops_from_tflite_graph(model):
A tuple with the sets of unique operator types and data-types that are present in the model
"""
- logger.debug(f"Analysing TfLite mode '{model}'!")
+ logger.debug(f"Analysing TFLite mode '{model}'!")
with open(model, "rb") as f:
buf = f.read()
@@ -63,11 +63,16 @@ def get_ops_from_tflite_graph(model):
unique_ops = {tflite.opcode2name(model.OperatorCodes(op_id).BuiltinCode()) for op_id in range(0, nr_unique_ops)}
# Extract IO data-types
- data_types = set()
+ supported_data_types = set()
+ unsupported_data_types = set()
for subgraph_id in range(0, model.SubgraphsLength()):
subgraph = model.Subgraphs(subgraph_id)
for tensor_id in range(0, subgraph.TensorsLength()):
- data_types.add(tflite_typecode2name(subgraph.Tensors(tensor_id).Type()))
+ try:
+ supported_data_types.add(tflite_typecode2aclname(subgraph.Tensors(tensor_id).Type()))
+ except ValueError:
+ unsupported_data_types.add(tflite_typecode2name(subgraph.Tensors(tensor_id).Type()))
+ logger.warning(f"Data type {tflite_typecode2name(subgraph.Tensors(tensor_id).Type())} is not supported by ComputeLibrary")
# Perform mapping between TfLite ops to ComputeLibrary ones
supported_ops = set()
@@ -75,17 +80,17 @@ def get_ops_from_tflite_graph(model):
for top in unique_ops:
try:
supported_ops.add(tflite_op2acl(top))
- except:
+ except ValueError:
unsupported_ops.add(top)
- logger.warning(f"Operator {top} has not ComputeLibrary mapping")
+ logger.warning(f"Operator {top} does not have ComputeLibrary mapping")
- return (supported_ops, unsupported_ops, data_types)
+ return (supported_ops, unsupported_ops, supported_data_types, unsupported_data_types)
def extract_model_meta(model, model_type):
"""
Function that calls the appropriate model parser to extract model related meta-data
- Supported parsers: TfLite
+ Supported parsers: TFLite
Parameters
----------
@@ -101,13 +106,13 @@ def extract_model_meta(model, model_type):
"""
if model_type == "tflite":
- return get_ops_from_tflite_graph(model)
+ return get_ops_types_from_tflite_graph(model)
else:
logger.warning(f"Model type '{model_type}' is unsupported!")
return ()
-def generate_build_config(ops, data_types):
+def generate_build_config(ops, data_types, data_layouts):
"""
Function that generates a compatible ComputeLibrary operator-based build configuration
@@ -117,6 +122,8 @@ def generate_build_config(ops, data_types):
Set with the operators to add in the build configuration
data_types:
Set with the data types to add in the build configuration
+ data_layouts:
+ Set with the data layouts to add in the build configuration
Returns
----------
@@ -126,6 +133,7 @@ def generate_build_config(ops, data_types):
config_data = {}
config_data["operators"] = list(ops)
config_data["data_types"] = list(data_types)
+ config_data["data_layouts"] = list(data_layouts)
return config_data
@@ -134,7 +142,7 @@ if __name__ == "__main__":
parser = ArgumentParser(
description="""Report map of operations in a list of models.
The script consumes deep learning models and reports the type of operations and data-types used
- Supported model types: TfLite """
+ Supported model types: TFLite """
)
parser.add_argument(
@@ -163,26 +171,35 @@ if __name__ == "__main__":
# Extract operator mapping
final_supported_ops = set()
final_unsupported_ops = set()
- final_dts = set()
+ final_supported_dts = set()
+ final_unsupported_dts = set()
+ final_layouts = {"nhwc"} # Data layout for TFLite is always NHWC
for model in args.models:
logger.debug(f"Starting analyzing {model} model")
model_type = identify_model_type(model)
- supported_model_ops, unsupported_mode_ops, model_dts = extract_model_meta(model, model_type)
+ supported_model_ops, unsupported_mode_ops, supported_model_dts, unsupported_model_dts = extract_model_meta(model, model_type)
final_supported_ops.update(supported_model_ops)
final_unsupported_ops.update(unsupported_mode_ops)
- final_dts.update(model_dts)
+ final_supported_dts.update(supported_model_dts)
+ final_unsupported_dts.update(unsupported_model_dts)
logger.info("=== Supported Operators")
logger.info(final_supported_ops)
- logger.info("=== Unsupported Operators")
- logger.info(final_unsupported_ops)
+ if(len(final_unsupported_ops)):
+ logger.info("=== Unsupported Operators")
+ logger.info(final_unsupported_ops)
logger.info("=== Data Types")
- logger.info(final_dts)
-
- # Generate json file
+ logger.info(final_supported_dts)
+ if(len(final_unsupported_dts)):
+ logger.info("=== Unsupported Data Types")
+ logger.info(final_unsupported_dts)
+ logger.info("=== Data Layouts")
+ logger.info(final_layouts)
+
+ # Generate JSON file
if args.config:
logger.debug("Generating JSON build configuration file")
- config_data = generate_build_config(final_supported_ops, final_dts)
+ config_data = generate_build_config(final_supported_ops, final_supported_dts, final_layouts)
with open(args.config, "w") as f:
json.dump(config_data, f)
diff --git a/python/scripts/utils/model_identification.py b/python/scripts/utils/model_identification.py
index 43e7d20f61..84a6e1a097 100644
--- a/python/scripts/utils/model_identification.py
+++ b/python/scripts/utils/model_identification.py
@@ -24,7 +24,7 @@ import os
def is_tflite_model(model_path):
- """Check if a model is of TfLite type
+ """Check if a model is of TFLite type
Parameters:
----------
@@ -34,7 +34,7 @@ def is_tflite_model(model_path):
Returns
----------
bool:
- True if given path is a valid TfLite model
+ True if given path is a valid TFLite model
"""
try:
diff --git a/python/scripts/utils/tflite_helpers.py b/python/scripts/utils/tflite_helpers.py
index 8f8d422743..c2aeaac6a7 100644
--- a/python/scripts/utils/tflite_helpers.py
+++ b/python/scripts/utils/tflite_helpers.py
@@ -20,6 +20,19 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
+_TFLITE_TYPECODE2ACLNAME = {
+ 0: "fp32", # Float32
+ 1: "fp16", # Float16
+ 2: "integer", # Int32
+ 3: "qasymm8", # Uint8
+ # 4: "Unsupported", # Int64
+ # 5: "Unsupported", # String
+ 6: "integer", # Bool
+ 7: "qsymm16", # Int16
+ # 8: "Unsupported", # Complex64
+ 9: "qasymm8_signed", # Int8
+}
+
_TFLITE_TYPECODE2NAME = {
0: "Float32",
1: "Float16",
@@ -182,13 +195,36 @@ _TFLITE_TO_ACL = {
}
+def tflite_typecode2aclname(toc):
+ """Stringify TFLite data-type opcodes to ACL versions
+
+ Parameters:
+ ----------
+ toc: int
+ TFLite type opcode
+
+ Returns
+ ----------
+ str
+ Stringified opcode
+
+ Raises
+ ------
+ ValueError
+ If opcode does not exist in the map
+ """
+ if toc in _TFLITE_TYPECODE2ACLNAME:
+ return _TFLITE_TYPECODE2ACLNAME[toc]
+ else:
+ raise ValueError("Unknown ACL typecode %d" % toc)
+
def tflite_typecode2name(toc):
- """Stringify TfLite data-type opcodes
+ """Stringify TFLite data-type opcodes
Parameters:
----------
toc: int
- TfLite type opcode
+ TFLite type opcode
Returns
----------
@@ -207,12 +243,12 @@ def tflite_typecode2name(toc):
def tflite_op2acl(top):
- """Map TfLite operators to ComputeLibrary ones
+ """Map TFLite operators to ComputeLibrary ones
Parameters:
----------
top: str
- TfLite operator name
+ TFLite operator name
Returns
----------
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp
index 7a26ba4230..5107ddacbc 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp
@@ -72,6 +72,7 @@ namespace
);
}
+#if defined(__aarch64__)
unsigned int not_preferred(const DepthwiseArgs &, const Nothing &)
{
return std::numeric_limits<unsigned int>::max();
@@ -81,6 +82,7 @@ namespace
{
return args.channel_multiplier > 1 ? 0 : std::numeric_limits<unsigned int>::max();
}
+#endif // defined(__aarch64__)
}
static const DepthwiseImplementation<float, float> depthwise_fp32_methods[] = {
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp
index 1c4c7576f5..46a31185d7 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp
@@ -62,11 +62,13 @@ namespace depthwise {
namespace
{
+#if defined(__aarch64__)
bool qp_weights_are_symmetric(const DepthwiseArgs &, const void *_qp)
{
const auto qp = static_cast<const arm_gemm::Requantize32 *>(_qp);
return qp->b_offset == 0;
}
+#endif // defined(__aarch64__)
}
static const DepthwiseImplementation<int8_t, int8_t, int8_t, Requantize32> depthwise_s8q_methods[] = {
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp
index e8e817e9cc..c0b87ada75 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp
@@ -22,6 +22,7 @@
* SOFTWARE.
*/
+#if defined(__aarch64__)
#include <cstddef>
#include <cstdint>
@@ -377,3 +378,4 @@ void a64_fp32_nhwc_generic_output9_mla_depthfirst_impl(
} // namespace depthwise
} // namespace arm_conv
+#endif // defined(__aarch64__)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp
index 5e334ec7b8..04a7abd3bd 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp
@@ -22,6 +22,7 @@
* SOFTWARE.
*/
+#if defined(__aarch64__)
#include <cstddef>
#include <cstdint>
@@ -530,3 +531,4 @@ void a64_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst_imp
} // namespace depthwise
} // namespace arm_conv
+#endif // defined(__aarch64__)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp
index 6e9e97fa29..67fc09b2ee 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp
@@ -22,6 +22,8 @@
* SOFTWARE.
*/
+#if defined(__aarch64__)
+
#include <cstddef>
#include <cstdint>
@@ -914,3 +916,4 @@ void a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst_imp
} // namespace depthwise
} // namespace arm_conv
+#endif // defined(__aarch64__)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp
index c93037d183..46210e2964 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp
@@ -22,6 +22,7 @@
* SOFTWARE.
*/
+#if defined(__aarch64__)
#include <cstddef>
#include <cstdint>
@@ -849,3 +850,4 @@ void a64_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_im
} // namespace depthwise
} // namespace arm_conv
+#endif // defined(__aarch64__)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_generic_output9_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_generic_output9_mla_depthfirst/generic.cpp
index ad5545a304..78f748ad58 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_generic_output9_mla_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_generic_output9_mla_depthfirst/generic.cpp
@@ -22,6 +22,7 @@
* SOFTWARE.
*/
+#if defined(__aarch64__)
#include "arm_gemm.hpp"
#include <cstddef>
@@ -622,3 +623,4 @@ void a64_s8q_nhwc_generic_output9_mla_depthfirst_impl(
} // namespace depthwise
} // namespace arm_conv
+#endif // defined(__aarch64__)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp
index 2fb6d3538f..cbe3d2cd1c 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp
@@ -22,6 +22,7 @@
* SOFTWARE.
*/
+#if defined(__aarch64__)
#include "arm_gemm.hpp"
#include <cstddef>
@@ -525,3 +526,4 @@ void a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst_impl
} // namespace depthwise
} // namespace arm_conv
+#endif // defined(__aarch64__)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp
index 95ad78cf6c..b198eff6ac 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp
@@ -22,6 +22,7 @@
* SOFTWARE.
*/
+#if defined(__aarch64__)
#include "arm_gemm.hpp"
#include <cstddef>
@@ -660,3 +661,4 @@ void a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst_impl
} // namespace depthwise
} // namespace arm_conv
+#endif // defined(__aarch64__)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp
index c0acd8805e..bbfa9f439f 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp
@@ -22,6 +22,7 @@
* SOFTWARE.
*/
+#if defined(__aarch64__)
#include "arm_gemm.hpp"
#include <cstddef>
@@ -1482,3 +1483,4 @@ void a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_imp
} // namespace depthwise
} // namespace arm_conv
+#endif // defined(__aarch64__)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp
index 42d9b2f408..9cebfe8f03 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp
@@ -22,6 +22,7 @@
* SOFTWARE.
*/
+#if defined(__aarch64__)
#include "arm_gemm.hpp"
#include <cstddef>
@@ -622,3 +623,4 @@ void a64_u8q_nhwc_generic_output9_mla_depthfirst_impl(
} // namespace depthwise
} // namespace arm_conv
+#endif // defined(__aarch64__)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp
index 2106cf7086..057b1ef492 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp
@@ -22,6 +22,7 @@
* SOFTWARE.
*/
+#if defined(__aarch64__)
#include "arm_gemm.hpp"
#include <cstddef>
@@ -525,3 +526,4 @@ void a64_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst_impl
} // namespace depthwise
} // namespace arm_conv
+#endif // defined(__aarch64__)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp
index 8bcd682e3c..40242e9718 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp
@@ -22,6 +22,7 @@
* SOFTWARE.
*/
+#if defined(__aarch64__)
#include "arm_gemm.hpp"
#include <cstddef>
@@ -660,3 +661,4 @@ void a64_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst_impl
} // namespace depthwise
} // namespace arm_conv
+#endif // defined(__aarch64__)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp
index ada1818eba..e896304c59 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp
@@ -22,6 +22,7 @@
* SOFTWARE.
*/
+#if defined(__aarch64__)
#include "arm_gemm.hpp"
#include <cstddef>
@@ -1482,3 +1483,4 @@ void a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_imp
} // namespace depthwise
} // namespace arm_conv
+#endif // defined(__aarch64__)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp
index 1633639ad5..08a2b7a98e 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp
@@ -22,6 +22,7 @@
* SOFTWARE.
*/
+#if defined(__aarch64__)
#include "arm_gemm.hpp"
#include <cstddef>
@@ -622,3 +623,4 @@ void a64_u8s8u8q_nhwc_generic_output9_mla_depthfirst_impl(
} // namespace depthwise
} // namespace arm_conv
+#endif // defined(__aarch64__)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp
index 152999dd1a..09b274056f 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp
@@ -22,6 +22,7 @@
* SOFTWARE.
*/
+#if defined(__aarch64__)
#include "arm_gemm.hpp"
#include <cstddef>
@@ -1482,3 +1483,4 @@ void a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst
} // namespace depthwise
} // namespace arm_conv
+#endif // defined(__aarch64__)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp
index ff8d7d8ba1..71a8c7496a 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp
@@ -22,6 +22,7 @@
* SOFTWARE.
*/
+#if defined(__aarch64__)
#include <algorithm>
#include <cstddef>
#include <cstdint>
@@ -249,3 +250,4 @@ void a64_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst_impl(
} // namespace pooling
} // namespace arm_conv
+#endif // defined(__aarch64__)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp
index ea7e2195d1..a924c9a7a6 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp
@@ -22,6 +22,7 @@
* SOFTWARE.
*/
+#if defined(__aarch64__)
#include <cstddef>
#include <cstdint>
@@ -172,3 +173,4 @@ void a64_fp32_nhwc_max_2x2_s1_output2x2_depthfirst_impl(
} // namespace pooling
} // namespace arm_conv
+#endif // defined(__aarch64__)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp
index 298db96861..e344e14e34 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp
@@ -22,6 +22,7 @@
* SOFTWARE.
*/
+#if defined(__aarch64__)
#include <cstddef>
#include <cstdint>
@@ -172,3 +173,4 @@ void a64_s8_nhwc_max_2x2_s1_output2x2_depthfirst_impl(
} // namespace pooling
} // namespace arm_conv
+#endif // defined(__aarch64__)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp
index 02c43ccaba..9d379d183e 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp
@@ -22,6 +22,7 @@
* SOFTWARE.
*/
+#if defined(__aarch64__)
#include <cstddef>
#include <cstdint>
@@ -172,3 +173,4 @@ void a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst_impl(
} // namespace pooling
} // namespace arm_conv
+#endif // defined(__aarch64__)