From 487d390b94ee93e1d89f5066ef8ca9442ab0a590 Mon Sep 17 00:00:00 2001 From: Freddie Liardet Date: Tue, 21 Sep 2021 12:36:43 +0100 Subject: Add user provided JSON operator list build Allow ACL to be built via a user provided JSON file containing operators, data types and data layouts. Modify TFLite file to JSON file script to output data layouts. Fix build issue with "fat_binary" and "high_priority" options. Resolves: COMPMID-4697, COMPMID-4837 Signed-off-by: Freddie Liardet Change-Id: I08d494151c98f804325707ffd922ffe216813023 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6427 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Gunes Bayir --- SConscript | 95 +++-- SConstruct | 82 +++- filelist.json | 459 +++++++-------------- .../scripts/report-model-ops/report_model_ops.py | 63 +-- python/scripts/utils/model_identification.py | 4 +- python/scripts/utils/tflite_helpers.py | 44 +- .../kernels/arm_conv/depthwise/depthwise_fp32.cpp | 2 + .../kernels/arm_conv/depthwise/depthwise_s8q.cpp | 2 + .../generic.cpp | 2 + .../generic.cpp | 2 + .../generic.cpp | 3 + .../generic.cpp | 2 + .../generic.cpp | 2 + .../generic.cpp | 2 + .../generic.cpp | 2 + .../generic.cpp | 2 + .../generic.cpp | 2 + .../generic.cpp | 2 + .../generic.cpp | 2 + .../generic.cpp | 2 + .../generic.cpp | 2 + .../generic.cpp | 2 + .../generic.cpp | 2 + .../generic.cpp | 2 + .../generic.cpp | 2 + .../generic.cpp | 2 + 26 files changed, 399 insertions(+), 389 deletions(-) mode change 100644 => 100755 python/scripts/report-model-ops/report_model_ops.py diff --git a/SConscript b/SConscript index 6672caee9f..bcb93fde62 100644 --- a/SConscript +++ b/SConscript @@ -157,31 +157,28 @@ def create_version_file(target, source, env): fd.write(build_info) -def get_attrs_list(arch, estate, data_types, data_layouts): +def get_attrs_list(env, data_types, data_layouts): attrs = [] # Manage data-types - if any(i in data_types for i in ['all']): + if 'all' in data_types: attrs += ['fp16', 'fp32', 'integer', 'qasymm8', 'qasymm8_signed', 'qsymm16'] else: - if any(i in data_types for i in ['fp16']): attrs += ['fp16'] - if any(i in data_types for i in ['fp32']): attrs += ['fp32'] - if any(i in data_types for i in ['integer']): attrs += ['integer'] - if any(i in data_types for i in ['qasymm8']): attrs += ['qasymm8'] - if any(i in data_types for i in ['qasymm8_signed']): attrs += ['qasymm8_signed'] - if any(i in data_types for i in ['qsymm16']): attrs += ['qsymm16'] - + if 'fp16' in data_types: attrs += ['fp16'] + if 'fp32' in data_types: attrs += ['fp32'] + if 'integer' in data_types: attrs += ['integer'] + if 'qasymm8' in data_types: attrs += ['qasymm8'] + if 'qasymm8_signed' in data_types: attrs += ['qasymm8_signed'] + if 'qsymm16' in data_types: attrs += ['qsymm16'] # Manage data-layouts - if any(i in data_layouts for i in ['all']): + if 'all' in data_layouts: attrs += ['nhwc', 'nchw'] else: - if any(i in data_layouts for i in ['nhwc']): attrs += ['nhwc'] - if any(i in data_layouts for i in ['nchw']): attrs += ['nchw'] + if 'nhwc' in data_layouts: attrs += ['nhwc'] + if 'nchw' in data_layouts: attrs += ['nchw'] # Manage execution state - estate_attr = 'estate32' if (estate == 'auto' and 'v7a' in arch) or '32' in estate else 'estate64' - attrs += [ estate_attr ] - + attrs += ['estate32' if (env['estate'] == 'auto' and 'v7a' in env['arch']) or '32' in env['estate'] else 'estate64'] return attrs @@ -237,6 +234,27 @@ def resolve_operator_dependencies(filelist, operators, backend=''): return resolved_operators +def read_build_config_json(build_config): + build_config_contents = {} + custom_operators = [] + custom_types = [] + custom_layouts = [] + if os.path.isfile(build_config): + with open(build_config) as f: + try: + build_config_contents = json.load(f) + except: + print("Warning: Build configuration file is of invalid JSON format!") + else: + try: + build_config_contents = json.loads(build_config) + except: + print("Warning: Build configuration string is of invalid JSON format!") + if build_config_contents: + custom_operators = build_config_contents.get("operators", []) + custom_types = build_config_contents.get("data_types", []) + custom_layouts = build_config_contents.get("data_layouts", []) + return custom_operators, custom_types, custom_layouts arm_compute_env = env.Clone() version_file = arm_compute_env.Command("src/core/arm_compute_version.embed", "", action=create_version_file) @@ -427,30 +445,25 @@ graph_files = Glob('src/graph/*.cpp') graph_files += Glob('src/graph/*/*.cpp') # Specify user-defined priority operators -use_priority_ops = env['high_priority'] -priority_operators = filelist['high_priority'] -if env['build_config'] != "": - build_config = env['build_config'] - build_config_contents = {} - if os.path.isfile(build_config): - with open(build_config) as f: - try: - build_config_contents = json.load(f) - except: - print("Warning: Build configuration file is of invalid JSON format!") - else: - try: - build_config_contents = json.loads(build_config) - except: - print("Warning: Build configuration string is of invalid JSON format!") - if build_config_contents: - priority_operators = build_config_contents.get("operators", []) +custom_operators = [] +custom_types = [] +custom_layouts = [] + +use_custom_ops = env['high_priority'] or env['build_config']; + +if env['high_priority']: + custom_operators = filelist['high_priority'] + custom_types = ['all'] + custom_layouts = ['all'] + +if env['build_config']: + custom_operators, custom_types, custom_layouts = read_build_config_json(env['build_config']) if env['opencl']: lib_files += filelist['c_api']['gpu'] lib_files += filelist['gpu']['common'] - cl_operators = priority_operators if use_priority_ops else filelist['gpu']['operators'].keys() + cl_operators = custom_operators if use_custom_ops else filelist['gpu']['operators'].keys() cl_ops_to_build = resolve_operator_dependencies(filelist, cl_operators, 'gpu') lib_files += get_operator_backend_files(filelist, cl_ops_to_build, 'gpu')['common'] @@ -475,11 +488,15 @@ if env['neon']: if 'sve' not in env['arch'] or env['fat_binary']: simd += ['neon'] # Get attributes - attrs = get_attrs_list(env['arch'], env['estate'], env['data_type_support'], env['data_layout_support']) + if(use_custom_ops): + attrs = get_attrs_list(env, custom_types, custom_layouts) + else: + attrs = get_attrs_list(env, env['data_type_support'], env['data_layout_support']) # Setup data-type and data-layout files to include - cpu_operators = priority_operators if use_priority_ops else filelist['cpu']['operators'].keys() - cpu_ops_to_build = resolve_operator_dependencies(filelist, filelist['cpu']['operators'], 'cpu') + cpu_operators = custom_operators if use_custom_ops else filelist['cpu']['operators'].keys() + cpu_ops_to_build = resolve_operator_dependencies(filelist, cpu_operators, 'cpu') + cpu_files = get_operator_backend_files(filelist, cpu_ops_to_build, 'cpu', simd, attrs) lib_files += cpu_files.get('common', []) lib_files += cpu_files.get('neon', []) @@ -488,8 +505,8 @@ if env['neon']: graph_files += Glob('src/graph/backends/NEON/*.cpp') # Restrict from building graph API if a reduced operator list has been provided -if use_priority_ops: - print("Graph library requires all operators to be built") +if use_custom_ops: + print("WARNING: Graph library requires all operators to be built") graph_files = [] # Build bootcode in case of bare-metal diff --git a/SConstruct b/SConstruct index 7591075cd1..400228c71a 100644 --- a/SConstruct +++ b/SConstruct @@ -41,6 +41,50 @@ def version_at_least(version, required): return True +def read_build_config_json(build_config): + build_config_contents = {} + custom_types = [] + custom_layouts = [] + if os.path.isfile(build_config): + with open(build_config) as f: + try: + build_config_contents = json.load(f) + except: + print("Warning: Build configuration file is of invalid JSON format!") + else: + try: + build_config_contents = json.loads(build_config) + except: + print("Warning: Build configuration string is of invalid JSON format!") + if build_config_contents: + custom_types = build_config_contents.get("data_types", []) + custom_layouts = build_config_contents.get("data_layouts", []) + return custom_types, custom_layouts + +def update_data_type_layout_flags(env, data_types, data_layouts): + # Manage data-types + if any(i in data_types for i in ['all', 'fp16']): + env.Append(CXXFLAGS = ['-DENABLE_FP16_KERNELS']) + if any(i in data_types for i in ['all', 'fp32']): + env.Append(CXXFLAGS = ['-DENABLE_FP32_KERNELS']) + if any(i in data_types for i in ['all', 'qasymm8']): + env.Append(CXXFLAGS = ['-DENABLE_QASYMM8_KERNELS']) + if any(i in data_types for i in ['all', 'qasymm8_signed']): + env.Append(CXXFLAGS = ['-DENABLE_QASYMM8_SIGNED_KERNELS']) + if any(i in data_types for i in ['all', 'qsymm16']): + env.Append(CXXFLAGS = ['-DENABLE_QSYMM16_KERNELS']) + if any(i in data_types for i in ['all', 'integer']): + env.Append(CXXFLAGS = ['-DENABLE_INTEGER_KERNELS']) + + # Manage data-layouts + if any(i in data_layouts for i in ['all', 'nhwc']): + env.Append(CXXFLAGS = ['-DENABLE_NHWC_KERNELS']) + if any(i in data_layouts for i in ['all', 'nchw']): + env.Append(CXXFLAGS = ['-DENABLE_NCHW_KERNELS']) + + return env + + vars = Variables("scons") vars.AddVariables( BoolVariable("debug", "Debug", False), @@ -327,25 +371,20 @@ if env['high_priority'] and env['build_config']: if not env['high_priority'] and not env['build_config']: env.Append(CPPDEFINES = ['ARM_COMPUTE_GRAPH_ENABLED']) -if env['data_type_support']: - if any(i in env['data_type_support'] for i in ['all', 'fp16']): - env.Append(CXXFLAGS = ['-DENABLE_FP16_KERNELS']) - if any(i in env['data_type_support'] for i in ['all', 'fp32']): - env.Append(CXXFLAGS = ['-DENABLE_FP32_KERNELS']) - if any(i in env['data_type_support'] for i in ['all', 'qasymm8']): - env.Append(CXXFLAGS = ['-DENABLE_QASYMM8_KERNELS']) - if any(i in env['data_type_support'] for i in ['all', 'qasymm8_signed']): - env.Append(CXXFLAGS = ['-DENABLE_QASYMM8_SIGNED_KERNELS']) - if any(i in env['data_type_support'] for i in ['all', 'qsymm16']): - env.Append(CXXFLAGS = ['-DENABLE_QSYMM16_KERNELS']) - if any(i in env['data_type_support'] for i in ['all', 'integer']): - env.Append(CXXFLAGS = ['-DENABLE_INTEGER_KERNELS']) +data_types = [] +data_layouts = [] -if env['data_layout_support']: - if any(i in env['data_layout_support'] for i in ['all', 'nhwc']): - env.Append(CXXFLAGS = ['-DENABLE_NHWC_KERNELS']) - if any(i in env['data_layout_support'] for i in ['all', 'nchw']): - env.Append(CXXFLAGS = ['-DENABLE_NCHW_KERNELS']) +# Set correct data types / layouts to build +if env['high_priority']: + data_types = ['all'] + data_layouts = ['all'] +elif env['build_config']: + data_types, data_layouts = read_build_config_json(env['build_config']) +else: + data_types = env['data_type_support'] + data_layouts = env['data_layout_support'] + +env = update_data_type_layout_flags(env, data_types, data_layouts) if env['standalone']: env.Append(CXXFLAGS = ['-fPIC']) @@ -417,6 +456,10 @@ Export('version_at_least') SConscript('./SConscript', variant_dir=build_path, duplicate=0) +if env['examples'] and (env['build_config'] or env['high_priority']): + print("WARNING: Building examples for selected operators not supported. Use examples=0") + Return() + if env['examples'] and env['exceptions']: if env['os'] == 'bare_metal' and env['arch'] == 'armv7a': print("WARNING: Building examples for bare metal and armv7a is not supported. Use examples=0") @@ -424,6 +467,9 @@ if env['examples'] and env['exceptions']: SConscript('./examples/SConscript', variant_dir='%s/examples' % build_path, duplicate=0) if env['exceptions']: + if env['build_config'] or env['high_priority']: + print("WARNING: Building tests for selected operators not supported") + Return() if env['os'] == 'bare_metal' and env['arch'] == 'armv7a': print("WARNING: Building tests for bare metal and armv7a is not supported") Return() diff --git a/filelist.json b/filelist.json index e52b7c824c..bcc7ecb37a 100644 --- a/filelist.json +++ b/filelist.json @@ -845,21 +845,21 @@ "common": [ "src/cpu/operators/CpuActivation.cpp", "src/cpu/kernels/CpuActivationKernel.cpp", - "src/runtime/NEON/functions/NEActivationLayer.cpp" + "src/runtime/NEON/functions/NEActivationLayer.cpp", + "src/cpu/kernels/activation/neon/qasymm8.cpp", + "src/cpu/kernels/activation/neon/qasymm8_signed.cpp", + "src/cpu/kernels/activation/neon/qsymm16.cpp" ], "neon": { "fp16": [ "src/cpu/kernels/activation/neon/fp16.cpp" ], - "fp32": [ "src/cpu/kernels/activation/neon/fp32.cpp" ], - "qasymm8": [ "src/cpu/kernels/activation/neon/qasymm8.cpp" ], - "qasymm8_signed": [ "src/cpu/kernels/activation/neon/qasymm8_signed.cpp" ], - "qsymm16": [ "src/cpu/kernels/activation/neon/qsymm16.cpp" ] + "fp32": [ "src/cpu/kernels/activation/neon/fp32.cpp" ] }, "sve": { "fp16": [ "src/cpu/kernels/activation/sve/fp16.cpp" ], "fp32": [ "src/cpu/kernels/activation/sve/fp32.cpp" ], - "qasymm8": [ "src/cpu/kernels/activation/neon/qasymm8.cpp", "src/cpu/kernels/activation/sve/qasymm8.cpp" ], - "qasymm8_signed": [ "src/cpu/kernels/activation/neon/qasymm8_signed.cpp", "src/cpu/kernels/activation/sve/qasymm8_signed.cpp" ], - "qsymm16": [ "src/cpu/kernels/activation/neon/qsymm16.cpp", "src/cpu/kernels/activation/sve/qsymm16.cpp" ] + "qasymm8": [ "src/cpu/kernels/activation/sve/qasymm8.cpp" ], + "qasymm8_signed": [ "src/cpu/kernels/activation/sve/qasymm8_signed.cpp" ], + "qsymm16": [ "src/cpu/kernels/activation/sve/qsymm16.cpp" ] } } }, @@ -874,18 +874,16 @@ "common": [ "src/cpu/operators/CpuAdd.cpp", "src/cpu/kernels/CpuAddKernel.cpp", - "src/runtime/NEON/functions/NEArithmeticAddition.cpp" + "src/runtime/NEON/functions/NEArithmeticAddition.cpp", + "src/cpu/kernels/add/neon/qasymm8.cpp", + "src/cpu/kernels/add/neon/qasymm8_signed.cpp", + "src/cpu/kernels/add/neon/qsymm16.cpp" ], - "neon": { - "qasymm8": [ "src/cpu/kernels/add/neon/qasymm8.cpp" ], - "qasymm8_signed": [ "src/cpu/kernels/add/neon/qasymm8_signed.cpp" ], - "qsymm16": [ "src/cpu/kernels/add/neon/qsymm16.cpp" ] - }, "sve": { "common": [ "src/cpu/kernels/add/sve/impl.cpp" ], - "qasymm8": [ "src/cpu/kernels/add/neon/qasymm8.cpp", "src/cpu/kernels/add/sve/qasymm8.cpp" ], - "qasymm8_signed": [ "src/cpu/kernels/add/neon/qasymm8_signed.cpp", "src/cpu/kernels/add/sve/qasymm8_signed.cpp" ], - "qsymm16": [ "src/cpu/kernels/add/neon/qsymm16.cpp", "src/cpu/kernels/add/sve/qsymm16.cpp" ] + "qasymm8": [ "src/cpu/kernels/add/sve/qasymm8.cpp" ], + "qasymm8_signed": [ "src/cpu/kernels/add/sve/qasymm8_signed.cpp" ], + "qsymm16": [ "src/cpu/kernels/add/sve/qsymm16.cpp" ] } } }, @@ -1103,68 +1101,62 @@ "src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8q.cpp", "src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8s8u8q.cpp", "src/core/NEON/kernels/arm_conv/depthwise/interleaves/8b_mla.cpp", - "src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp" + "src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_generic_output9_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_generic_output9_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_s8q_3x3_dot.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_u8q_3x3_dot.cpp" ], - "neon": { - "estate64": [ - "src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_s8q_3x3_dot.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_u8q_3x3_dot.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_generic_output9_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_generic_output9_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp" - ] - }, "sve": { "common": [ - "src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp", "src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_8b_mla.cpp", "src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_s8q_3x3_dot.cpp", "src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_u8q_3x3_dot.cpp", @@ -1209,57 +1201,7 @@ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp", "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp", "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_s8q_3x3_dot.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_u8q_3x3_dot.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_generic_output9_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_generic_output9_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp" + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp" ] } } @@ -1316,6 +1258,7 @@ } }, "FFT1D": { + "deps": [ "Reduction" ], "files": { "common": [ "src/core/NEON/kernels/NEFFTDigitReverseKernel.cpp", @@ -1385,6 +1328,7 @@ } }, "Gemm": { + "deps": [ "Quantize", "Add"], "files": { "common": [ "src/cpu/kernels/CpuConvertQuantizedSignednessKernel.cpp", @@ -1422,7 +1366,61 @@ "src/core/NEON/kernels/arm_gemm/transform.cpp", "src/runtime/NEON/functions/NEGEMM.cpp", "src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp", - "src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp" + "src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s16_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/a55r1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/x1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u16_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/a55r1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/x1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/a55r1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/x1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_4x24/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_4x24/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_mmla_4x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_mmla_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_mmla_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_mmla_4x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_mmla_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_mmla_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a53.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55r1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/x1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x6/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_6x4/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_8x4/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_6x4/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_6x4/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_8x4/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_8x4/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_6x4/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_6x4/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/generic.cpp" ], "neon": { "estate32": [ @@ -1431,68 +1429,14 @@ "src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6/generic.cpp" ], "estate64": [ - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s16_8x12/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/a55r1.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/x1.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u16_8x12/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/a55r1.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/x1.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/a55r1.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/x1.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_dot_6x16/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_mmla_6x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_4x24/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_4x24/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_4x24/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_6x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_mmla_4x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_mmla_6x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_mmla_6x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_mmla_4x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_mmla_6x16/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12/x1.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_8x12/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_8x12/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_mmla_8x12/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a53.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55r1.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/x1.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x6/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemv_pretransposed/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_6x4/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_8x4/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_6x4/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_6x4/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_8x4/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_8x4/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_6x4/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_6x4/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/generic.cpp" + "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemv_pretransposed/generic.cpp" ] }, "sve": { @@ -1536,69 +1480,7 @@ "src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp", "src/core/NEON/kernels/arm_gemm/mergeresults-sve.cpp", - "src/core/NEON/kernels/arm_gemm/transform-sve.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s16_8x12/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/a55r1.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/x1.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u16_8x12/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/a55r1.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/x1.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/a55r1.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/x1.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_dot_6x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_mmla_6x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_4x24/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_4x24/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_4x24/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_6x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_mmla_4x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_mmla_6x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_mmla_6x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_mmla_4x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_mmla_6x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12/x1.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_8x12/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_8x12/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_mmla_8x12/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a53.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55r1.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/x1.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x6/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemv_pretransposed/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_6x4/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_8x4/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_6x4/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_6x4/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_8x4/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_8x4/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_6x4/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_6x4/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/generic.cpp" + "src/core/NEON/kernels/arm_gemm/transform-sve.cpp" ] } } @@ -1735,38 +1617,34 @@ "src/core/NEON/kernels/arm_conv/pooling/pooling_s8q.cpp", "src/core/NEON/kernels/arm_conv/pooling/pooling_u8.cpp", "src/core/NEON/kernels/arm_conv/pooling/pooling_u8q.cpp", - "src/runtime/NEON/functions/NEPoolingLayer.cpp" + "src/runtime/NEON/functions/NEPoolingLayer.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_max_generic_depthfirst/generic.cpp", + "src/cpu/kernels/pool2d/neon/qasymm8.cpp", + "src/cpu/kernels/pool2d/neon/qasymm8_signed.cpp" ], "neon": { "nchw": [ "src/cpu/kernels/pool2d/neon/nchw/all.cpp" ], "fp16": [ "src/cpu/kernels/pool2d/neon/fp16.cpp" ], - "fp32": [ "src/cpu/kernels/pool2d/neon/fp32.cpp" ], - "qasymm8": [ "src/cpu/kernels/pool2d/neon/qasymm8.cpp" ], - "qasymm8_signed": [ "src/cpu/kernels/pool2d/neon/qasymm8_signed.cpp" ], - "estate64": [ - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_generic_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_generic_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_generic_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_generic_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_avg_generic_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_generic_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_avg_generic_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_max_generic_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_avg_generic_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_generic_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_avg_generic_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_max_generic_depthfirst/generic.cpp" - ] + "fp32": [ "src/cpu/kernels/pool2d/neon/fp32.cpp" ] }, "sve": { - "qasymm8": [ "src/cpu/kernels/pool2d/neon/qasymm8.cpp" ], - "qasymm8_signed": [ "src/cpu/kernels/pool2d/neon/qasymm8_signed.cpp" ], "common": [ "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp", "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_generic_depthfirst/generic.cpp", @@ -1785,25 +1663,7 @@ "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_generic_depthfirst/generic.cpp", "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_avg_generic_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_max_generic_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_generic_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_generic_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_generic_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_generic_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_avg_generic_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_generic_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_avg_generic_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_max_generic_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_avg_generic_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_generic_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_avg_generic_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_max_generic_depthfirst/generic.cpp" + "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_max_generic_depthfirst/generic.cpp" ] } } @@ -2002,18 +1862,11 @@ "common": [ "src/cpu/operators/CpuSub.cpp", "src/cpu/kernels/CpuSubKernel.cpp", - "src/runtime/NEON/functions/NEArithmeticSubtraction.cpp" - ], - "sve": { - "qasymm8": [ "src/cpu/kernels/sub/neon/qasymm8.cpp" ], - "qasymm8_signed": [ "src/cpu/kernels/sub/neon/qasymm8_signed.cpp" ], - "qsymm16": [ "src/cpu/kernels/sub/neon/qsymm16.cpp" ] - }, - "neon": { - "qasymm8": [ "src/cpu/kernels/sub/neon/qasymm8.cpp" ], - "qasymm8_signed": [ "src/cpu/kernels/sub/neon/qasymm8_signed.cpp" ], - "qsymm16": [ "src/cpu/kernels/sub/neon/qsymm16.cpp" ] - } + "src/runtime/NEON/functions/NEArithmeticSubtraction.cpp", + "src/cpu/kernels/sub/neon/qasymm8.cpp", + "src/cpu/kernels/sub/neon/qasymm8_signed.cpp", + "src/cpu/kernels/sub/neon/qsymm16.cpp" + ] } }, "Tile": { diff --git a/python/scripts/report-model-ops/report_model_ops.py b/python/scripts/report-model-ops/report_model_ops.py old mode 100644 new mode 100755 index 3888b801e6..1549005da5 --- a/python/scripts/report-model-ops/report_model_ops.py +++ b/python/scripts/report-model-ops/report_model_ops.py @@ -31,20 +31,20 @@ import tflite sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../") from utils.model_identification import identify_model_type -from utils.tflite_helpers import tflite_op2acl, tflite_typecode2name +from utils.tflite_helpers import tflite_op2acl, tflite_typecode2name, tflite_typecode2aclname SUPPORTED_MODEL_TYPES = ["tflite"] logger = logging.getLogger("report_model_ops") -def get_ops_from_tflite_graph(model): +def get_ops_types_from_tflite_graph(model): """ - Helper function that extract operator related meta-data from a TfLite model + Helper function that extract operator related meta-data from a TFLite model Parameters ---------- model: str - Respective TfLite model to analyse + Respective TFLite model to analyse Returns ---------- @@ -52,7 +52,7 @@ def get_ops_from_tflite_graph(model): A tuple with the sets of unique operator types and data-types that are present in the model """ - logger.debug(f"Analysing TfLite mode '{model}'!") + logger.debug(f"Analysing TFLite mode '{model}'!") with open(model, "rb") as f: buf = f.read() @@ -63,11 +63,16 @@ def get_ops_from_tflite_graph(model): unique_ops = {tflite.opcode2name(model.OperatorCodes(op_id).BuiltinCode()) for op_id in range(0, nr_unique_ops)} # Extract IO data-types - data_types = set() + supported_data_types = set() + unsupported_data_types = set() for subgraph_id in range(0, model.SubgraphsLength()): subgraph = model.Subgraphs(subgraph_id) for tensor_id in range(0, subgraph.TensorsLength()): - data_types.add(tflite_typecode2name(subgraph.Tensors(tensor_id).Type())) + try: + supported_data_types.add(tflite_typecode2aclname(subgraph.Tensors(tensor_id).Type())) + except ValueError: + unsupported_data_types.add(tflite_typecode2name(subgraph.Tensors(tensor_id).Type())) + logger.warning(f"Data type {tflite_typecode2name(subgraph.Tensors(tensor_id).Type())} is not supported by ComputeLibrary") # Perform mapping between TfLite ops to ComputeLibrary ones supported_ops = set() @@ -75,17 +80,17 @@ def get_ops_from_tflite_graph(model): for top in unique_ops: try: supported_ops.add(tflite_op2acl(top)) - except: + except ValueError: unsupported_ops.add(top) - logger.warning(f"Operator {top} has not ComputeLibrary mapping") + logger.warning(f"Operator {top} does not have ComputeLibrary mapping") - return (supported_ops, unsupported_ops, data_types) + return (supported_ops, unsupported_ops, supported_data_types, unsupported_data_types) def extract_model_meta(model, model_type): """ Function that calls the appropriate model parser to extract model related meta-data - Supported parsers: TfLite + Supported parsers: TFLite Parameters ---------- @@ -101,13 +106,13 @@ def extract_model_meta(model, model_type): """ if model_type == "tflite": - return get_ops_from_tflite_graph(model) + return get_ops_types_from_tflite_graph(model) else: logger.warning(f"Model type '{model_type}' is unsupported!") return () -def generate_build_config(ops, data_types): +def generate_build_config(ops, data_types, data_layouts): """ Function that generates a compatible ComputeLibrary operator-based build configuration @@ -117,6 +122,8 @@ def generate_build_config(ops, data_types): Set with the operators to add in the build configuration data_types: Set with the data types to add in the build configuration + data_layouts: + Set with the data layouts to add in the build configuration Returns ---------- @@ -126,6 +133,7 @@ def generate_build_config(ops, data_types): config_data = {} config_data["operators"] = list(ops) config_data["data_types"] = list(data_types) + config_data["data_layouts"] = list(data_layouts) return config_data @@ -134,7 +142,7 @@ if __name__ == "__main__": parser = ArgumentParser( description="""Report map of operations in a list of models. The script consumes deep learning models and reports the type of operations and data-types used - Supported model types: TfLite """ + Supported model types: TFLite """ ) parser.add_argument( @@ -163,26 +171,35 @@ if __name__ == "__main__": # Extract operator mapping final_supported_ops = set() final_unsupported_ops = set() - final_dts = set() + final_supported_dts = set() + final_unsupported_dts = set() + final_layouts = {"nhwc"} # Data layout for TFLite is always NHWC for model in args.models: logger.debug(f"Starting analyzing {model} model") model_type = identify_model_type(model) - supported_model_ops, unsupported_mode_ops, model_dts = extract_model_meta(model, model_type) + supported_model_ops, unsupported_mode_ops, supported_model_dts, unsupported_model_dts = extract_model_meta(model, model_type) final_supported_ops.update(supported_model_ops) final_unsupported_ops.update(unsupported_mode_ops) - final_dts.update(model_dts) + final_supported_dts.update(supported_model_dts) + final_unsupported_dts.update(unsupported_model_dts) logger.info("=== Supported Operators") logger.info(final_supported_ops) - logger.info("=== Unsupported Operators") - logger.info(final_unsupported_ops) + if(len(final_unsupported_ops)): + logger.info("=== Unsupported Operators") + logger.info(final_unsupported_ops) logger.info("=== Data Types") - logger.info(final_dts) - - # Generate json file + logger.info(final_supported_dts) + if(len(final_unsupported_dts)): + logger.info("=== Unsupported Data Types") + logger.info(final_unsupported_dts) + logger.info("=== Data Layouts") + logger.info(final_layouts) + + # Generate JSON file if args.config: logger.debug("Generating JSON build configuration file") - config_data = generate_build_config(final_supported_ops, final_dts) + config_data = generate_build_config(final_supported_ops, final_supported_dts, final_layouts) with open(args.config, "w") as f: json.dump(config_data, f) diff --git a/python/scripts/utils/model_identification.py b/python/scripts/utils/model_identification.py index 43e7d20f61..84a6e1a097 100644 --- a/python/scripts/utils/model_identification.py +++ b/python/scripts/utils/model_identification.py @@ -24,7 +24,7 @@ import os def is_tflite_model(model_path): - """Check if a model is of TfLite type + """Check if a model is of TFLite type Parameters: ---------- @@ -34,7 +34,7 @@ def is_tflite_model(model_path): Returns ---------- bool: - True if given path is a valid TfLite model + True if given path is a valid TFLite model """ try: diff --git a/python/scripts/utils/tflite_helpers.py b/python/scripts/utils/tflite_helpers.py index 8f8d422743..c2aeaac6a7 100644 --- a/python/scripts/utils/tflite_helpers.py +++ b/python/scripts/utils/tflite_helpers.py @@ -20,6 +20,19 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +_TFLITE_TYPECODE2ACLNAME = { + 0: "fp32", # Float32 + 1: "fp16", # Float16 + 2: "integer", # Int32 + 3: "qasymm8", # Uint8 + # 4: "Unsupported", # Int64 + # 5: "Unsupported", # String + 6: "integer", # Bool + 7: "qsymm16", # Int16 + # 8: "Unsupported", # Complex64 + 9: "qasymm8_signed", # Int8 +} + _TFLITE_TYPECODE2NAME = { 0: "Float32", 1: "Float16", @@ -182,13 +195,36 @@ _TFLITE_TO_ACL = { } +def tflite_typecode2aclname(toc): + """Stringify TFLite data-type opcodes to ACL versions + + Parameters: + ---------- + toc: int + TFLite type opcode + + Returns + ---------- + str + Stringified opcode + + Raises + ------ + ValueError + If opcode does not exist in the map + """ + if toc in _TFLITE_TYPECODE2ACLNAME: + return _TFLITE_TYPECODE2ACLNAME[toc] + else: + raise ValueError("Unknown ACL typecode %d" % toc) + def tflite_typecode2name(toc): - """Stringify TfLite data-type opcodes + """Stringify TFLite data-type opcodes Parameters: ---------- toc: int - TfLite type opcode + TFLite type opcode Returns ---------- @@ -207,12 +243,12 @@ def tflite_typecode2name(toc): def tflite_op2acl(top): - """Map TfLite operators to ComputeLibrary ones + """Map TFLite operators to ComputeLibrary ones Parameters: ---------- top: str - TfLite operator name + TFLite operator name Returns ---------- diff --git a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp index 7a26ba4230..5107ddacbc 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp @@ -72,6 +72,7 @@ namespace ); } +#if defined(__aarch64__) unsigned int not_preferred(const DepthwiseArgs &, const Nothing &) { return std::numeric_limits::max(); @@ -81,6 +82,7 @@ namespace { return args.channel_multiplier > 1 ? 0 : std::numeric_limits::max(); } +#endif // defined(__aarch64__) } static const DepthwiseImplementation depthwise_fp32_methods[] = { diff --git a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp index 1c4c7576f5..46a31185d7 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp @@ -62,11 +62,13 @@ namespace depthwise { namespace { +#if defined(__aarch64__) bool qp_weights_are_symmetric(const DepthwiseArgs &, const void *_qp) { const auto qp = static_cast(_qp); return qp->b_offset == 0; } +#endif // defined(__aarch64__) } static const DepthwiseImplementation depthwise_s8q_methods[] = { diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp index e8e817e9cc..c0b87ada75 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp @@ -22,6 +22,7 @@ * SOFTWARE. */ +#if defined(__aarch64__) #include #include @@ -377,3 +378,4 @@ void a64_fp32_nhwc_generic_output9_mla_depthfirst_impl( } // namespace depthwise } // namespace arm_conv +#endif // defined(__aarch64__) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp index 5e334ec7b8..04a7abd3bd 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp @@ -22,6 +22,7 @@ * SOFTWARE. */ +#if defined(__aarch64__) #include #include @@ -530,3 +531,4 @@ void a64_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst_imp } // namespace depthwise } // namespace arm_conv +#endif // defined(__aarch64__) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp index 6e9e97fa29..67fc09b2ee 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp @@ -22,6 +22,8 @@ * SOFTWARE. */ +#if defined(__aarch64__) + #include #include @@ -914,3 +916,4 @@ void a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst_imp } // namespace depthwise } // namespace arm_conv +#endif // defined(__aarch64__) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp index c93037d183..46210e2964 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp @@ -22,6 +22,7 @@ * SOFTWARE. */ +#if defined(__aarch64__) #include #include @@ -849,3 +850,4 @@ void a64_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_im } // namespace depthwise } // namespace arm_conv +#endif // defined(__aarch64__) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_generic_output9_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_generic_output9_mla_depthfirst/generic.cpp index ad5545a304..78f748ad58 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_generic_output9_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_generic_output9_mla_depthfirst/generic.cpp @@ -22,6 +22,7 @@ * SOFTWARE. */ +#if defined(__aarch64__) #include "arm_gemm.hpp" #include @@ -622,3 +623,4 @@ void a64_s8q_nhwc_generic_output9_mla_depthfirst_impl( } // namespace depthwise } // namespace arm_conv +#endif // defined(__aarch64__) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp index 2fb6d3538f..cbe3d2cd1c 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp @@ -22,6 +22,7 @@ * SOFTWARE. */ +#if defined(__aarch64__) #include "arm_gemm.hpp" #include @@ -525,3 +526,4 @@ void a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst_impl } // namespace depthwise } // namespace arm_conv +#endif // defined(__aarch64__) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp index 95ad78cf6c..b198eff6ac 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp @@ -22,6 +22,7 @@ * SOFTWARE. */ +#if defined(__aarch64__) #include "arm_gemm.hpp" #include @@ -660,3 +661,4 @@ void a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst_impl } // namespace depthwise } // namespace arm_conv +#endif // defined(__aarch64__) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp index c0acd8805e..bbfa9f439f 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp @@ -22,6 +22,7 @@ * SOFTWARE. */ +#if defined(__aarch64__) #include "arm_gemm.hpp" #include @@ -1482,3 +1483,4 @@ void a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_imp } // namespace depthwise } // namespace arm_conv +#endif // defined(__aarch64__) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp index 42d9b2f408..9cebfe8f03 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp @@ -22,6 +22,7 @@ * SOFTWARE. */ +#if defined(__aarch64__) #include "arm_gemm.hpp" #include @@ -622,3 +623,4 @@ void a64_u8q_nhwc_generic_output9_mla_depthfirst_impl( } // namespace depthwise } // namespace arm_conv +#endif // defined(__aarch64__) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp index 2106cf7086..057b1ef492 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp @@ -22,6 +22,7 @@ * SOFTWARE. */ +#if defined(__aarch64__) #include "arm_gemm.hpp" #include @@ -525,3 +526,4 @@ void a64_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst_impl } // namespace depthwise } // namespace arm_conv +#endif // defined(__aarch64__) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp index 8bcd682e3c..40242e9718 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp @@ -22,6 +22,7 @@ * SOFTWARE. */ +#if defined(__aarch64__) #include "arm_gemm.hpp" #include @@ -660,3 +661,4 @@ void a64_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst_impl } // namespace depthwise } // namespace arm_conv +#endif // defined(__aarch64__) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp index ada1818eba..e896304c59 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp @@ -22,6 +22,7 @@ * SOFTWARE. */ +#if defined(__aarch64__) #include "arm_gemm.hpp" #include @@ -1482,3 +1483,4 @@ void a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_imp } // namespace depthwise } // namespace arm_conv +#endif // defined(__aarch64__) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp index 1633639ad5..08a2b7a98e 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp @@ -22,6 +22,7 @@ * SOFTWARE. */ +#if defined(__aarch64__) #include "arm_gemm.hpp" #include @@ -622,3 +623,4 @@ void a64_u8s8u8q_nhwc_generic_output9_mla_depthfirst_impl( } // namespace depthwise } // namespace arm_conv +#endif // defined(__aarch64__) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp index 152999dd1a..09b274056f 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp @@ -22,6 +22,7 @@ * SOFTWARE. */ +#if defined(__aarch64__) #include "arm_gemm.hpp" #include @@ -1482,3 +1483,4 @@ void a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst } // namespace depthwise } // namespace arm_conv +#endif // defined(__aarch64__) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp index ff8d7d8ba1..71a8c7496a 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp @@ -22,6 +22,7 @@ * SOFTWARE. */ +#if defined(__aarch64__) #include #include #include @@ -249,3 +250,4 @@ void a64_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst_impl( } // namespace pooling } // namespace arm_conv +#endif // defined(__aarch64__) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp index ea7e2195d1..a924c9a7a6 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp @@ -22,6 +22,7 @@ * SOFTWARE. */ +#if defined(__aarch64__) #include #include @@ -172,3 +173,4 @@ void a64_fp32_nhwc_max_2x2_s1_output2x2_depthfirst_impl( } // namespace pooling } // namespace arm_conv +#endif // defined(__aarch64__) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp index 298db96861..e344e14e34 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp @@ -22,6 +22,7 @@ * SOFTWARE. */ +#if defined(__aarch64__) #include #include @@ -172,3 +173,4 @@ void a64_s8_nhwc_max_2x2_s1_output2x2_depthfirst_impl( } // namespace pooling } // namespace arm_conv +#endif // defined(__aarch64__) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp index 02c43ccaba..9d379d183e 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp @@ -22,6 +22,7 @@ * SOFTWARE. */ +#if defined(__aarch64__) #include #include @@ -172,3 +173,4 @@ void a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst_impl( } // namespace pooling } // namespace arm_conv +#endif // defined(__aarch64__) -- cgit v1.2.1