From 9d9ad33df8c06184c23bd9ef25f95fdfe1846ad5 Mon Sep 17 00:00:00 2001 From: Motti Gondabi Date: Sun, 23 Jan 2022 12:42:24 +0200 Subject: SCons build system refactoring (phase #2). * Add kernel selection at build time * Modify filelist.json to allow files separation as part of our kernel decoupling process. Issues to address after this change will be merged: (1) Remove SVE/SVE2 defines from already decoupled kernels (2) Adapt the new file list structure (filelist.json) resolves COMPMID-4996 and COMPMID-5048 Change-Id: I8c17a9d6b150bbc7d8c1f2ed38060be82b6aa904 Signed-off-by: Motti Gondabi Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7006 Tested-by: Arm Jenkins Reviewed-by: SiCong Li Comments-Addressed: Arm Jenkins --- SConscript | 145 ++++--- SConstruct | 4 +- filedefs.json | 3 +- filelist.json | 424 +++++++++++---------- src/cpu/kernels/add/generic/sve/fp16.cpp | 2 +- src/cpu/kernels/add/generic/sve/fp32.cpp | 4 +- src/cpu/kernels/add/generic/sve/impl.cpp | 4 +- src/cpu/kernels/add/generic/sve/impl.h | 4 +- src/cpu/kernels/add/generic/sve/integer.cpp | 4 +- src/cpu/kernels/add/generic/sve2/qasymm8.cpp | 5 +- .../kernels/add/generic/sve2/qasymm8_signed.cpp | 4 +- src/cpu/kernels/add/generic/sve2/qsymm16.cpp | 4 +- 12 files changed, 324 insertions(+), 283 deletions(-) diff --git a/SConscript b/SConscript index 45e88184e4..8cc734d44b 100644 --- a/SConscript +++ b/SConscript @@ -1,3 +1,6 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + # Copyright (c) 2016-2021 Arm Limited. # # SPDX-License-Identifier: MIT @@ -45,48 +48,66 @@ def build_bootcode_objs(sources): Default(obj) return obj -# @brief Generates SVE/SVE2 shared object files for a specific V8 architechture. + + + +# @brief Create a list of object from a given file list. # -# @param sources The target source files -# @param arch_info A Tuple represents the architecture info -# such as the compiler flags and defines. +# @param arch_info A dictionary represents the architecture info such as the +# compiler flags and defines (filedefs.json). # -# @return A list of objects for the corresponding architecture. +# @param sources A list of files to build # -def build_multi_isa_objs(sources, arch_v8_info): +# @return A list of objects for the corresponding architecture. - # Get the march - arch_v8 = arch_v8_info[0] +def build_obj_list(arch_info, sources, static=False): - # Create a temp environment + # Clone environment tmp_env = arm_compute_env.Clone() - if 'cxxflags' in arch_v8_info[1] and len(arch_v8_info[1]['cxxflags']) > 0: - tmp_env.Append(CXXFLAGS = arch_v8_info[1]['cxxflags']) - if 'cppdefines' in arch_v8_info[1] and len(arch_v8_info[1]['cppdefines']) > 0: - tmp_env.Append(CPPDEFINES = arch_v8_info[1]['cppdefines']) + # Append architecture spec + if 'cxxflags' in arch_info and len(arch_info['cxxflags']) > 0: + tmp_env.Append(CXXFLAGS = arch_info['cxxflags']) - if 'sve' in arch_v8: - tmp_env.Append(CPPDEFINES = ['ENABLE_SVE', 'ARM_COMPUTE_ENABLE_SVE']) - if 'sve2' in arch_v8: - tmp_env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_SVE2']) + # Build and return objects + if static: + objs = tmp_env.StaticObject(sources) else: - tmp_env.Append(CPPDEFINES = ['ENABLE_NEON', 'ARM_COMPUTE_ENABLE_NEON']) - - # we must differentiate the file object names - # as we accumulate the set. - objs = [] - for src in sources: - objs += tmp_env.SharedObject('{}-{}.o'.format(src.replace('.cpp', ''), arch_v8), src) - + objs = tmp_env.SharedObject(sources) + tmp_env.Default(objs) return objs +# @brief Build multi-ISA files with the respective architecture. +# +# @return Two distinct lists: +# A list of static objects +# A list of shared objects + +def build_lib_objects(): + lib_static_objs = [] # static objects + lib_shared_objs = [] # shared objects + + arm_compute_env.Append(CPPDEFINES = ['ENABLE_NEON', 'ARM_COMPUTE_ENABLE_NEON', + 'ENABLE_SVE', 'ARM_COMPUTE_ENABLE_SVE', + 'ARM_COMPUTE_ENABLE_FP16', 'ARM_COMPUTE_ENABLE_BF16', + 'ARM_COMPUTE_ENABLE_I8MM', 'ARM_COMPUTE_ENABLE_SVEF32MM']) + + # Build all the common files for the base architecture + lib_static_objs += build_obj_list(filedefs["armv8.2-a"], lib_files, static=True) + lib_shared_objs += build_obj_list(filedefs["armv8.2-a"], lib_files, static=False) + + # Build the SVE specific files + lib_static_objs += build_obj_list(filedefs["armv8.2-a-sve"], lib_files_sve, static=True) + lib_shared_objs += build_obj_list(filedefs["armv8.2-a-sve"], lib_files_sve, static=False) + + # Build the SVE2 specific files + arm_compute_env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_SVE2']) + lib_static_objs += build_obj_list(filedefs["armv8.6-a-sve2"], lib_files_sve2, static=True) + lib_shared_objs += build_obj_list(filedefs["armv8.6-a-sve2"], lib_files_sve2, static=False) + + return lib_static_objs, lib_shared_objs -def build_objs(sources): - obj = arm_compute_env.SharedObject(sources) - Default(obj) - return obj def build_library(name, build_env, sources, static=False, libs=[]): @@ -214,6 +235,7 @@ def get_attrs_list(env, data_types, data_layouts): # Manage execution state attrs += ['estate32' if (env['estate'] == 'auto' and 'v7a' in env['arch']) or '32' in env['estate'] else 'estate64'] + return attrs @@ -461,6 +483,7 @@ if env['os'] != 'openbsd': # Load build definitions file with (open(Dir('#').path + '/filedefs.json')) as fd: filedefs = json.load(fd) + filedefs = filedefs['cpu']['arch'] with (open(Dir('#').path + '/filelist.json')) as fp: @@ -513,8 +536,9 @@ if env['opencl']: graph_files += Glob('src/graph/backends/CL/*.cpp') -multi_isa_objs_list = [] + lib_files_sve = [] +lib_files_sve2 = [] if env['neon']: # build winograd/depthwise sources for either v7a / v8a @@ -528,9 +552,12 @@ if env['neon']: lib_files += filelist['cpu']['common'] # Setup SIMD file list to include - simd = [] - if 'sve' in env['arch'] or env['multi_isa']: simd += ['sve'] - if 'sve' not in env['arch'] or env['multi_isa']: simd += ['neon'] + simd = ['neon'] + if env['multi_isa']: + simd += ['sve', 'sve2'] + else: + if 'sve' in env['arch']: simd += ['sve'] + if 'sve2' in env['arch']: simd += ['sve2'] # Get attributes if(use_custom_ops): @@ -544,9 +571,17 @@ if env['neon']: cpu_files = get_operator_backend_files(filelist, cpu_ops_to_build, 'cpu', simd, attrs) + # Shared among ALL CPU files lib_files += cpu_files.get('common', []) + + # Arm® Neon™ specific files lib_files += cpu_files.get('neon', []) - lib_files_sve += cpu_files.get('sve', []) + + # SVE files only + lib_files_sve = cpu_files.get('sve', []) + + # SVE2 files only + lib_files_sve2 = cpu_files.get('sve2', []) graph_files += Glob('src/graph/backends/NEON/*.cpp') @@ -562,45 +597,39 @@ if env['os'] == 'bare_metal': bootcode_o = build_bootcode_objs(bootcode_files) Export('bootcode_o') + +if (env['multi_isa']): + lib_static_objs, lib_shared_objs = build_lib_objects() + + # STATIC library build. -# -# Multi-ISA: We split the library into 2 static libs: arm_compute-static.a and arm_compute_multi_isa-static.a -# to avoid 'argument list too long' error which being thrown due to OS string length limitation at link time. -# -# For single arch build: No change and we produce a single arm_compute-static.a if (env['multi_isa']): - # Get v8 variants - arch_v8s = filedefs['cpu']['arch'] - for arch_v8_info in arch_v8s.items(): - multi_isa_objs_list += build_multi_isa_objs(lib_files_sve, arch_v8_info) + arm_compute_a = build_library('arm_compute-static', arm_compute_env, lib_static_objs, static=True) +else: + if 'sve2' in env['arch']: + lib_files += lib_files_sve + lib_files += lib_files_sve2 + elif 'sve' in env['arch']: + lib_files += lib_files_sve arm_compute_a = build_library('arm_compute-static', arm_compute_env, lib_files, static=True) - arm_compute_multi_isa_a = build_library('arm_compute_multi_isa-static', arm_compute_env, multi_isa_objs_list, static=True) - Export('arm_compute_multi_isa_a') -else: - arm_compute_a = build_library('arm_compute-static', arm_compute_env, lib_files + lib_files_sve, static=True) - arm_compute_multi_isa_a = None Export('arm_compute_a') # SHARED library build. -# -# Multi-ISA: we leverage the prior static build such that the resulting -# "libarm_compute.so" is combined from "arm_compute_multi_isa-static.a" and "lib_files" objects. -# -# For single arch build: No change. if env['os'] != 'bare_metal' and not env['standalone']: if (env['multi_isa']): - arm_compute_so = build_library('arm_compute', arm_compute_env, lib_files, static=False, libs=[arm_compute_multi_isa_a]) + + arm_compute_so = build_library('arm_compute', arm_compute_env, lib_shared_objs, static=False) else: - arm_compute_so = build_library('arm_compute', arm_compute_env, lib_files + lib_files_sve, static=False) + arm_compute_so = build_library('arm_compute', arm_compute_env, lib_files, static=False) Export('arm_compute_so') # Generate dummy core lib for backwards compatibility if env['os'] == 'macos': # macos static library archiver fails if given an empty list of files - arm_compute_core_a = build_library('arm_compute_core-static', arm_compute_env, [lib_files], static=True) + arm_compute_core_a = build_library('arm_compute_core-static', arm_compute_env, lib_files, static=True) else: arm_compute_core_a = build_library('arm_compute_core-static', arm_compute_env, [], static=True) @@ -615,7 +644,7 @@ arm_compute_graph_env = arm_compute_env.Clone() # Build graph libraries arm_compute_graph_env.Append(CXXFLAGS = ['-Wno-redundant-move', '-Wno-pessimizing-move']) -arm_compute_graph_a = build_library('arm_compute_graph-static', arm_compute_graph_env, graph_files, static=True, libs = [ arm_compute_a, arm_compute_multi_isa_a ]) +arm_compute_graph_a = build_library('arm_compute_graph-static', arm_compute_graph_env, graph_files, static=True, libs = [ arm_compute_a ]) Export('arm_compute_graph_a') if env['os'] != 'bare_metal' and not env['standalone']: @@ -624,7 +653,7 @@ if env['os'] != 'bare_metal' and not env['standalone']: Export('arm_compute_graph_so') if env['standalone']: - alias = arm_compute_env.Alias("arm_compute", [arm_compute_a, arm_compute_multi_isa_a]) + alias = arm_compute_env.Alias("arm_compute", [arm_compute_a]) else: alias = arm_compute_env.Alias("arm_compute", [arm_compute_a, arm_compute_so]) diff --git a/SConstruct b/SConstruct index ff53229282..bae197c783 100644 --- a/SConstruct +++ b/SConstruct @@ -249,12 +249,12 @@ if 'v7a' in env['estate'] and env['estate'] == '64': print("ERROR: armv7a architecture has only 32-bit execution state") Exit(1) +env.Append(CPPDEFINES = ['ENABLE_NEON', 'ARM_COMPUTE_ENABLE_NEON']) + if 'sve' in env['arch']: env.Append(CPPDEFINES = ['ENABLE_SVE', 'ARM_COMPUTE_ENABLE_SVE']) if 'sve2' in env['arch']: env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_SVE2']) -else: - env.Append(CPPDEFINES = ['ENABLE_NEON', 'ARM_COMPUTE_ENABLE_NEON']) # Add architecture specific flags prefix = "" diff --git a/filedefs.json b/filedefs.json index 0bc030e1d3..76dccfffee 100644 --- a/filedefs.json +++ b/filedefs.json @@ -34,8 +34,7 @@ "cxxflags": ["-march=armv8.6-a+sve2+fp16+dotprod"], "cppdefines": ["ARM_COMPUTE_ENABLE_FP16", "ARM_COMPUTE_ENABLE_BF16", "ARM_COMPUTE_ENABLE_I8MM"] - } } } -} +} \ No newline at end of file diff --git a/filelist.json b/filelist.json index 37c0f1db12..d6b6f47bb9 100644 --- a/filelist.json +++ b/filelist.json @@ -873,22 +873,24 @@ "common": [ "src/cpu/operators/CpuAdd.cpp", "src/cpu/kernels/CpuAddKernel.cpp", - "src/runtime/NEON/functions/NEArithmeticAddition.cpp", - "src/cpu/kernels/add/generic/neon/qasymm8.cpp", - "src/cpu/kernels/add/generic/neon/qasymm8_signed.cpp", - "src/cpu/kernels/add/generic/neon/qsymm16.cpp" + "src/runtime/NEON/functions/NEArithmeticAddition.cpp" ], "neon": { - "common": [ "src/cpu/kernels/add/generic/neon/impl.cpp" ], + "common": ["src/cpu/kernels/add/generic/neon/impl.cpp"], "fp32":["src/cpu/kernels/add/generic/neon/fp32.cpp"], "fp16":["src/cpu/kernels/add/generic/neon/fp16.cpp"], - "integer":["src/cpu/kernels/add/generic/neon/integer.cpp"] + "integer":["src/cpu/kernels/add/generic/neon/integer.cpp"], + "qasymm8": ["src/cpu/kernels/add/generic/neon/qasymm8.cpp"], + "qasymm8_signed": ["src/cpu/kernels/add/generic/neon/qasymm8_signed.cpp"], + "qsymm16": ["src/cpu/kernels/add/generic/neon/qsymm16.cpp"] }, "sve": { "common": [ "src/cpu/kernels/add/generic/sve/impl.cpp" ], - "fp32":["src/cpu/kernels/add/generic/sve/fp32.cpp"], - "fp16":["src/cpu/kernels/add/generic/sve/fp16.cpp"], "integer":["src/cpu/kernels/add/generic/sve/integer.cpp"], + "fp32":["src/cpu/kernels/add/generic/sve/fp32.cpp"], + "fp16":["src/cpu/kernels/add/generic/sve/fp16.cpp"] + }, + "sve2": { "qasymm8": [ "src/cpu/kernels/add/generic/sve2/qasymm8.cpp" ], "qasymm8_signed": [ "src/cpu/kernels/add/generic/sve2/qasymm8_signed.cpp" ], "qsymm16": [ "src/cpu/kernels/add/generic/sve2/qsymm16.cpp" ] @@ -1024,38 +1026,42 @@ "src/cpu/kernels/CpuCol2ImKernel.cpp", "src/cpu/kernels/CpuIm2ColKernel.cpp", "src/cpu/kernels/CpuWeightsReshapeKernel.cpp", - "src/core/NEON/kernels/convolution/common/padding.cpp", - "src/core/NEON/kernels/convolution/common/qasymm8.cpp", - "src/core/NEON/kernels/convolution/common/qsymm8.cpp", - "src/core/NEON/kernels/convolution/common/utils.cpp", - "src/core/NEON/kernels/convolution/winograd/padding.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_1x8_fp32_fp32_integers.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_4x4_fp16_fp16_integers.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_4x4_fp32_fp32_integers.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_6x6_fp16_fp16_integers.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_6x6_fp32_fp32_integers.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_2_7_fp32_fp32_integers.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_2x2_3x3_fp32_fp32_integers.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_2x2_5x5_fp32_fp32_integers.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_4_5_fp32_fp32_integers.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_4x4_3x3_fp16_fp16_integers.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_4x4_3x3_fp32_fp32_integers.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_6_3_fp32_fp32_integers.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_2_7_fp32_fp32_integers.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_2x2_3x3_fp32_fp32_integers.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_2x2_5x5_fp32_fp32_integers.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_4_5_fp32_fp32_integers.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_4x4_3x3_fp16_fp16_integers.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_4x4_3x3_fp32_fp32_integers.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_6_3_fp32_fp32_integers.cpp", "src/runtime/NEON/functions/NEConvolutionLayer.cpp", "src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp", "src/runtime/NEON/functions/NEFFTConvolutionLayer.cpp", "src/runtime/NEON/functions/NEGEMMConv2d.cpp", "src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp", "src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp" - ] + ], + "neon": { + "common": [ + "src/core/NEON/kernels/convolution/common/padding.cpp", + "src/core/NEON/kernels/convolution/common/qasymm8.cpp", + "src/core/NEON/kernels/convolution/common/qsymm8.cpp", + "src/core/NEON/kernels/convolution/common/utils.cpp", + "src/core/NEON/kernels/convolution/winograd/padding.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_1x8_fp32_fp32_integers.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_4x4_fp16_fp16_integers.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_4x4_fp32_fp32_integers.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_6x6_fp16_fp16_integers.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_6x6_fp32_fp32_integers.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_2_7_fp32_fp32_integers.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_2x2_3x3_fp32_fp32_integers.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_2x2_5x5_fp32_fp32_integers.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_4_5_fp32_fp32_integers.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_4x4_3x3_fp16_fp16_integers.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_4x4_3x3_fp32_fp32_integers.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_6_3_fp32_fp32_integers.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_2_7_fp32_fp32_integers.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_2x2_3x3_fp32_fp32_integers.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_2x2_5x5_fp32_fp32_integers.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_4_5_fp32_fp32_integers.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_4x4_3x3_fp16_fp16_integers.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_4x4_3x3_fp32_fp32_integers.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_6_3_fp32_fp32_integers.cpp" + ] + } } }, "Copy": { @@ -1079,7 +1085,7 @@ "fp32": [ "src/cpu/kernels/crop/generic/neon/fp32.cpp" ], "fp16": [ "src/cpu/kernels/crop/generic/neon/fp16.cpp" ], "integer": [ "src/cpu/kernels/crop/generic/neon/integer.cpp" ] - } + } } }, "Deconv2d": { @@ -1114,68 +1120,72 @@ "src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp", "src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.cpp", "src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.cpp", - "src/core/NEON/kernels/convolution/common/padding.cpp", - "src/core/NEON/kernels/convolution/common/qasymm8.cpp", - "src/core/NEON/kernels/convolution/common/qsymm8.cpp", - "src/core/NEON/kernels/convolution/common/utils.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp16.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8q.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8s8u8q.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/interleaves/8b_mla.cpp", - "src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_generic_output9_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_generic_output9_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_s8q_3x3_dot.cpp", - "src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_u8q_3x3_dot.cpp" + "src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp" ], + "neon": { + "common": [ + "src/core/NEON/kernels/convolution/common/padding.cpp", + "src/core/NEON/kernels/convolution/common/qasymm8.cpp", + "src/core/NEON/kernels/convolution/common/qsymm8.cpp", + "src/core/NEON/kernels/convolution/common/utils.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp16.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8q.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8s8u8q.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/interleaves/8b_mla.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_generic_output9_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_generic_output9_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_s8q_3x3_dot.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_u8q_3x3_dot.cpp" + ] + }, "sve": { "common": [ "src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_8b_mla.cpp", @@ -1388,92 +1398,94 @@ "src/cpu/operators/CpuGemm.cpp", "src/cpu/operators/CpuGemmLowpOutputStage.cpp", "src/cpu/operators/CpuGemmLowpMatrixMultiplyCore.cpp", - "src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp", - "src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp", - "src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp", - "src/core/NEON/kernels/arm_gemm/gemm_int16.cpp", - "src/core/NEON/kernels/arm_gemm/gemm_int8.cpp", - "src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp", - "src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp", - "src/core/NEON/kernels/arm_gemm/gemm_uint16.cpp", - "src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp", - "src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp", - "src/core/NEON/kernels/arm_gemm/mergeresults-fp16.cpp", - "src/core/NEON/kernels/arm_gemm/mergeresults.cpp", - "src/core/NEON/kernels/arm_gemm/misc.cpp", - "src/core/NEON/kernels/arm_gemm/quantized.cpp", - "src/core/NEON/kernels/arm_gemm/rowsum_indirect_s8.cpp", - "src/core/NEON/kernels/arm_gemm/rowsum_indirect_u8.cpp", - "src/core/NEON/kernels/arm_gemm/transform.cpp", "src/runtime/NEON/functions/NEGEMM.cpp", "src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp", - "src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s16_8x12/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/a55r1.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/x1.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u16_8x12/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/a55r1.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/x1.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/a55r1.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/x1.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_dot_6x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_mmla_6x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_4x24/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_4x24/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_4x24/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_6x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_mmla_4x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_mmla_6x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_mmla_6x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_mmla_4x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_mmla_6x16/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12/x1.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_8x12/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_8x12/a510.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_8x12/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_8x12/a510.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_mmla_8x12/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_mmla_8x12/a510.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a53.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55r1.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/x1.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x6/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_6x4/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_8x4/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_6x4/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_6x4/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_8x4/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_8x4/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_6x4/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_6x4/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/generic.cpp" + "src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp" ], "neon": { + "common": [ + "src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp", + "src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp", + "src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp", + "src/core/NEON/kernels/arm_gemm/gemm_int16.cpp", + "src/core/NEON/kernels/arm_gemm/gemm_int8.cpp", + "src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp", + "src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp", + "src/core/NEON/kernels/arm_gemm/gemm_uint16.cpp", + "src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp", + "src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp", + "src/core/NEON/kernels/arm_gemm/mergeresults-fp16.cpp", + "src/core/NEON/kernels/arm_gemm/mergeresults.cpp", + "src/core/NEON/kernels/arm_gemm/misc.cpp", + "src/core/NEON/kernels/arm_gemm/quantized.cpp", + "src/core/NEON/kernels/arm_gemm/rowsum_indirect_s8.cpp", + "src/core/NEON/kernels/arm_gemm/rowsum_indirect_u8.cpp", + "src/core/NEON/kernels/arm_gemm/transform.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s16_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/a55r1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/x1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u16_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/a55r1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/x1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/a55r1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/x1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_dot_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_mmla_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_4x24/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_4x24/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_4x24/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_mmla_4x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_mmla_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_mmla_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_mmla_4x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_mmla_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12/x1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_8x12/a510.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_8x12/a510.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_mmla_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_mmla_8x12/a510.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a53.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55r1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/x1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x6/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_6x4/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_8x4/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_6x4/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_6x4/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_8x4/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_8x4/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_6x4/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_6x4/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/generic.cpp" + ], "estate32": [ "src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6/a53.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6/a55r1.cpp", @@ -1677,36 +1689,38 @@ "src/cpu/operators/CpuPool2d.cpp", "src/cpu/kernels/CpuPool2dKernel.cpp", "src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/cpp_nhwc_1x1_stride_any_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/pooling_fp16.cpp", - "src/core/NEON/kernels/arm_conv/pooling/pooling_fp32.cpp", - "src/core/NEON/kernels/arm_conv/pooling/pooling_s8.cpp", - "src/core/NEON/kernels/arm_conv/pooling/pooling_s8q.cpp", - "src/core/NEON/kernels/arm_conv/pooling/pooling_u8.cpp", - "src/core/NEON/kernels/arm_conv/pooling/pooling_u8q.cpp", - "src/runtime/NEON/functions/NEPoolingLayer.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_generic_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_generic_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_generic_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_generic_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_avg_generic_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_generic_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_avg_generic_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_max_generic_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_avg_generic_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_generic_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_avg_generic_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_max_generic_depthfirst/generic.cpp", "src/cpu/kernels/pool2d/neon/qasymm8.cpp", - "src/cpu/kernels/pool2d/neon/qasymm8_signed.cpp" + "src/cpu/kernels/pool2d/neon/qasymm8_signed.cpp", + "src/runtime/NEON/functions/NEPoolingLayer.cpp" ], "neon": { + "common": [ + "src/core/NEON/kernels/arm_conv/pooling/kernels/cpp_nhwc_1x1_stride_any_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/pooling_fp16.cpp", + "src/core/NEON/kernels/arm_conv/pooling/pooling_fp32.cpp", + "src/core/NEON/kernels/arm_conv/pooling/pooling_s8.cpp", + "src/core/NEON/kernels/arm_conv/pooling/pooling_s8q.cpp", + "src/core/NEON/kernels/arm_conv/pooling/pooling_u8.cpp", + "src/core/NEON/kernels/arm_conv/pooling/pooling_u8q.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_max_generic_depthfirst/generic.cpp" + ], "nchw": [ "src/cpu/kernels/pool2d/neon/nchw/all.cpp" ], "fp16": [ "src/cpu/kernels/pool2d/neon/fp16.cpp" ], "fp32": [ "src/cpu/kernels/pool2d/neon/fp32.cpp" ] diff --git a/src/cpu/kernels/add/generic/sve/fp16.cpp b/src/cpu/kernels/add/generic/sve/fp16.cpp index 28f4d2ba8e..7dc3142d3d 100644 --- a/src/cpu/kernels/add/generic/sve/fp16.cpp +++ b/src/cpu/kernels/add/generic/sve/fp16.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021-2022 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/cpu/kernels/add/generic/sve/fp32.cpp b/src/cpu/kernels/add/generic/sve/fp32.cpp index 8f651b3ed2..5383b887ba 100644 --- a/src/cpu/kernels/add/generic/sve/fp32.cpp +++ b/src/cpu/kernels/add/generic/sve/fp32.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -36,4 +36,4 @@ void add_fp32_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const } } } // namespace arm_compute -#endif //ARM_COMPUTE_ENABLE_SVE +#endif //ARM_COMPUTE_ENABLE_SVE \ No newline at end of file diff --git a/src/cpu/kernels/add/generic/sve/impl.cpp b/src/cpu/kernels/add/generic/sve/impl.cpp index 52429bbe1e..615f346dff 100644 --- a/src/cpu/kernels/add/generic/sve/impl.cpp +++ b/src/cpu/kernels/add/generic/sve/impl.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -133,4 +133,4 @@ template void add_same_sve(const ITensor *src0, const ITensor *src1, #endif /* (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */ } // namespace cpu } // namespace arm_compute -#endif // ARM_COMPUTE_ENABLE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE \ No newline at end of file diff --git a/src/cpu/kernels/add/generic/sve/impl.h b/src/cpu/kernels/add/generic/sve/impl.h index 59f39e90c9..219dbc9b2e 100644 --- a/src/cpu/kernels/add/generic/sve/impl.h +++ b/src/cpu/kernels/add/generic/sve/impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -37,4 +37,4 @@ void add_same_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const } // namespace cpu } // namespace arm_compute #endif // SRC_CORE_SVE_KERNELS_ADD_IMPL_H -#endif // ARM_COMPUTE_ENABLE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE \ No newline at end of file diff --git a/src/cpu/kernels/add/generic/sve/integer.cpp b/src/cpu/kernels/add/generic/sve/integer.cpp index d197717cf0..1d25d3463a 100644 --- a/src/cpu/kernels/add/generic/sve/integer.cpp +++ b/src/cpu/kernels/add/generic/sve/integer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -46,4 +46,4 @@ void add_s32_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const C } } } // namespace arm_compute -#endif //(ARM_COMPUTE_ENABLE_SVE) +#endif //(ARM_COMPUTE_ENABLE_SVE) \ No newline at end of file diff --git a/src/cpu/kernels/add/generic/sve2/qasymm8.cpp b/src/cpu/kernels/add/generic/sve2/qasymm8.cpp index c61089e937..5fe9b95a48 100644 --- a/src/cpu/kernels/add/generic/sve2/qasymm8.cpp +++ b/src/cpu/kernels/add/generic/sve2/qasymm8.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021 Arm Limited. + * Copyright (c) 2020-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,7 +22,6 @@ * SOFTWARE. */ #if defined(ARM_COMPUTE_ENABLE_SVE2) - #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Types.h" @@ -180,4 +179,4 @@ void add_qasymm8_sve2(const ITensor *src0, const ITensor *src1, ITensor *dst, co } } // namespace cpu } // namespace arm_compute -#endif //ARM_COMPUTE_ENABLE_SVE2 +#endif //ARM_COMPUTE_ENABLE_SVE2 \ No newline at end of file diff --git a/src/cpu/kernels/add/generic/sve2/qasymm8_signed.cpp b/src/cpu/kernels/add/generic/sve2/qasymm8_signed.cpp index 9ac138aaef..9135dfdcf6 100644 --- a/src/cpu/kernels/add/generic/sve2/qasymm8_signed.cpp +++ b/src/cpu/kernels/add/generic/sve2/qasymm8_signed.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021 Arm Limited. + * Copyright (c) 2020-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -178,4 +178,4 @@ void add_qasymm8_signed_sve2(const ITensor *src0, const ITensor *src1, ITensor * } } // namespace cpu } // namespace arm_compute -#endif //ARM_COMPUTE_ENABLE_SVE2 +#endif //(ARM_COMPUTE_ENABLE_SVE2) \ No newline at end of file diff --git a/src/cpu/kernels/add/generic/sve2/qsymm16.cpp b/src/cpu/kernels/add/generic/sve2/qsymm16.cpp index f148872c17..723d2a6c98 100644 --- a/src/cpu/kernels/add/generic/sve2/qsymm16.cpp +++ b/src/cpu/kernels/add/generic/sve2/qsymm16.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021 Arm Limited. + * Copyright (c) 2020-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -153,4 +153,4 @@ void add_qsymm16_sve2(const ITensor *src0, const ITensor *src1, ITensor *dst, co } } // namespace cpu } // namespace arm_compute -#endif //ARM_COMPUTE_ENABLE_SVE2 +#endif //ARM_COMPUTE_ENABLE_SVE2 \ No newline at end of file -- cgit v1.2.1