From 20fca524baf99402f742ce38c538f2fd07d5fff9 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Mon, 7 Jun 2021 14:23:57 +0100 Subject: Create core library using high priority operators A smaller core library is created using a subset of the operators. Changed the structure of filelist.json in order to include more information about the kernels and make the selection easier. Resolves: COMPMID-4514 Change-Id: I079ca7d8e64346174eebdd13b834e1dd4dc36ca2 Signed-off-by: Michalis Spyrou Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5786 Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas Comments-Addressed: Arm Jenkins --- .gitignore | 1 + Android.bp | 2 +- SConscript | 187 +- SConstruct | 16 +- arm_compute/core/CPP/CPPTypes.h | 57 +- arm_compute/runtime/IScheduler.h | 2 - filelist.json | 2060 +++++++++++++++++--- src/common/cpuinfo/CpuInfo.cpp | 15 +- src/common/cpuinfo/CpuInfo.h | 16 +- src/common/cpuinfo/CpuIsaInfo.cpp | 24 +- src/common/cpuinfo/CpuIsaInfo.h | 6 +- src/common/cpuinfo/target/CpuInfoSveUtils.cpp | 40 - src/common/cpuinfo/target/CpuInfoSveUtils.h | 40 - src/core/CPP/CPPTypes.cpp | 41 +- src/core/NEON/SVEAsymm.h | 6 +- src/core/NEON/SVEAsymm.inl | 6 +- src/core/NEON/SVEMath.h | 4 +- src/core/NEON/SVEMath.inl | 56 +- src/core/NEON/SVESymm.h | 6 +- .../kernels/NEBatchNormalizationLayerKernel.cpp | 19 +- .../kernels/arm_conv/depthwise/depthwise_fp16.cpp | 23 +- .../kernels/arm_conv/depthwise/depthwise_fp32.cpp | 31 +- .../depthwise_implementation_constraints.hpp | 12 + .../kernels/arm_conv/depthwise/depthwise_s8q.cpp | 29 +- .../kernels/arm_conv/depthwise/depthwise_u8q.cpp | 26 +- .../arm_conv/depthwise/depthwise_u8s8u8q.cpp | 17 +- .../arm_conv/depthwise/interleaves/8b_mla.cpp | 4 +- .../arm_conv/depthwise/interleaves/list.hpp | 4 +- .../depthwise/interleaves/sve_s8q_3x3_dot.cpp | 4 +- .../depthwise/interleaves/sve_u8q_3x3_dot.cpp | 4 +- ...e_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp | 4 +- .../generic_direct.cpp | 4 +- .../generic_indirect.cpp | 4 +- ...e_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp | 4 +- .../generic_direct.cpp | 4 +- .../generic_indirect.cpp | 4 +- ...e_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp | 4 +- .../generic_direct.cpp | 4 +- .../generic_indirect.cpp | 4 +- ...e_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp | 4 +- .../generic_direct.cpp | 4 +- .../generic_indirect.cpp | 4 +- ...e_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp | 4 +- .../generic_direct.cpp | 4 +- .../generic_indirect.cpp | 4 +- ...e_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp | 4 +- .../generic.cpp | 4 +- .../generic_direct.cpp | 4 +- .../generic_indirect.cpp | 4 +- ...hwc_3x3_s1_output2x2_mla_depthfirst_strided.hpp | 4 +- .../generic.cpp | 4 +- ...e_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp | 4 +- .../generic_direct.cpp | 4 +- .../generic_indirect.cpp | 4 +- ...e_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp | 4 +- .../generic_direct.cpp | 4 +- .../generic_indirect.cpp | 4 +- ...e_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp | 4 +- .../generic_direct.cpp | 4 +- .../generic_indirect.cpp | 4 +- ...e_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp | 4 +- .../generic_direct.cpp | 4 +- .../generic_indirect.cpp | 4 +- ...ve_fp32_nhwc_generic_output9_mla_depthfirst.hpp | 4 +- .../generic.cpp | 4 +- ...s2_with_multiplier_output3x3_mla_depthfirst.hpp | 4 +- .../generic.cpp | 4 +- ...s1_with_multiplier_output2x4_mla_depthfirst.hpp | 4 +- .../generic.cpp | 4 +- ...ic_with_multiplier_output2x8_mla_depthfirst.hpp | 4 +- .../generic.cpp | 4 +- ...ve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp | 4 +- .../generic.cpp | 4 +- ...ve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp | 4 +- .../generic.cpp | 4 +- ...ve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp | 4 +- .../generic.cpp | 4 +- ...ve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp | 4 +- .../generic.cpp | 4 +- ...s2_with_multiplier_output2x4_dot_depthfirst.hpp | 4 +- .../generic.cpp | 4 +- ...s1_with_multiplier_output4x2_dot_depthfirst.hpp | 4 +- .../generic.cpp | 4 +- ...e_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp | 4 +- .../generic.cpp | 4 +- ...ve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp | 4 +- .../generic.cpp | 4 +- ...ve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp | 4 +- .../generic.cpp | 4 +- ...ve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp | 4 +- .../generic.cpp | 4 +- ...ve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp | 4 +- .../generic.cpp | 4 +- ...s2_with_multiplier_output2x4_dot_depthfirst.hpp | 4 +- .../generic.cpp | 4 +- ...s1_with_multiplier_output4x2_dot_depthfirst.hpp | 4 +- .../generic.cpp | 4 +- ...8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp | 4 +- .../generic.cpp | 4 +- ...8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp | 4 +- .../generic.cpp | 4 +- ...8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp | 4 +- .../generic.cpp | 4 +- ...e_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst.hpp | 4 +- .../generic.cpp | 4 +- .../sve_fp16_nhwc_avg_generic_depthfirst.hpp | 4 +- .../generic.cpp | 4 +- ...e_fp16_nhwc_max_2x2_s1_output2x2_depthfirst.hpp | 4 +- .../generic.cpp | 4 +- .../sve_fp16_nhwc_max_generic_depthfirst.hpp | 4 +- .../generic.cpp | 4 +- ...e_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst.hpp | 4 +- .../generic.cpp | 4 +- .../sve_fp32_nhwc_avg_generic_depthfirst.hpp | 4 +- .../generic.cpp | 4 +- ...e_fp32_nhwc_max_2x2_s1_output2x2_depthfirst.hpp | 4 +- .../generic.cpp | 4 +- .../sve_fp32_nhwc_max_generic_depthfirst.hpp | 4 +- .../generic.cpp | 4 +- .../kernels/sve_s8_nhwc_avg_generic_depthfirst.hpp | 4 +- .../sve_s8_nhwc_avg_generic_depthfirst/generic.cpp | 4 +- ...sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst.hpp | 4 +- .../generic.cpp | 4 +- .../kernels/sve_s8_nhwc_max_generic_depthfirst.hpp | 4 +- .../sve_s8_nhwc_max_generic_depthfirst/generic.cpp | 4 +- .../sve_s8q_nhwc_avg_generic_depthfirst.hpp | 4 +- .../generic.cpp | 4 +- .../sve_s8q_nhwc_max_generic_depthfirst.hpp | 4 +- .../generic.cpp | 4 +- .../kernels/sve_u8_nhwc_avg_generic_depthfirst.hpp | 4 +- .../sve_u8_nhwc_avg_generic_depthfirst/generic.cpp | 4 +- ...sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst.hpp | 4 +- .../generic.cpp | 4 +- .../kernels/sve_u8_nhwc_max_generic_depthfirst.hpp | 4 +- .../sve_u8_nhwc_max_generic_depthfirst/generic.cpp | 4 +- .../sve_u8q_nhwc_avg_generic_depthfirst.hpp | 4 +- .../generic.cpp | 4 +- .../sve_u8q_nhwc_max_generic_depthfirst.hpp | 4 +- .../generic.cpp | 4 +- .../NEON/kernels/arm_conv/pooling/pooling_fp16.cpp | 32 +- .../NEON/kernels/arm_conv/pooling/pooling_fp32.cpp | 24 +- .../NEON/kernels/arm_conv/pooling/pooling_s8.cpp | 24 +- .../NEON/kernels/arm_conv/pooling/pooling_s8q.cpp | 12 +- .../NEON/kernels/arm_conv/pooling/pooling_u8.cpp | 24 +- .../NEON/kernels/arm_conv/pooling/pooling_u8q.cpp | 12 +- src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp | 18 +- src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp | 2 +- src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp | 16 +- src/core/NEON/kernels/arm_gemm/gemm_int8.cpp | 14 +- src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp | 22 +- src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp | 16 +- src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp | 10 +- .../NEON/kernels/arm_gemm/interleave_indirect.cpp | 16 +- .../kernels/sve_gemv_fp32_mla_8VL/generic.cpp | 2 +- .../kernels/sve_hybrid_bf16fp32_dot_6x4VL.hpp | 4 +- .../sve_hybrid_bf16fp32_dot_6x4VL/generic.cpp | 4 +- .../arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL.hpp | 4 +- .../kernels/sve_hybrid_fp16_mla_6x4VL/generic.cpp | 4 +- .../arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL.hpp | 4 +- .../kernels/sve_hybrid_fp32_mla_6x4VL/generic.cpp | 4 +- .../arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL.hpp | 4 +- .../kernels/sve_hybrid_fp32_mla_8x1VL/generic.cpp | 4 +- .../arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL.hpp | 4 +- .../kernels/sve_hybrid_s8qa_dot_4x4VL/generic.cpp | 4 +- .../arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL.hpp | 4 +- .../kernels/sve_hybrid_s8qs_dot_6x4VL/generic.cpp | 4 +- .../kernels/sve_hybrid_s8s32_dot_6x4VL.hpp | 4 +- .../kernels/sve_hybrid_s8s32_dot_6x4VL/generic.cpp | 4 +- .../arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL.hpp | 4 +- .../kernels/sve_hybrid_u8qa_dot_4x4VL/generic.cpp | 4 +- .../kernels/sve_hybrid_u8u32_dot_6x4VL.hpp | 4 +- .../kernels/sve_hybrid_u8u32_dot_6x4VL/generic.cpp | 4 +- .../kernels/sve_interleaved_bf16fp32_dot_8x3VL.hpp | 4 +- .../sve_interleaved_bf16fp32_dot_8x3VL/generic.cpp | 4 +- .../sve_interleaved_bf16fp32_mmla_8x3VL.hpp | 4 +- .../generic.cpp | 4 +- .../kernels/sve_interleaved_fp16_mla_8x3VL.hpp | 4 +- .../sve_interleaved_fp16_mla_8x3VL/generic.cpp | 4 +- .../kernels/sve_interleaved_fp32_mla_8x3VL.hpp | 4 +- .../sve_interleaved_fp32_mla_8x3VL/generic.cpp | 4 +- .../kernels/sve_interleaved_fp32_mmla_8x3VL.hpp | 4 +- .../sve_interleaved_fp32_mmla_8x3VL/generic.cpp | 4 +- .../kernels/sve_interleaved_s8s32_dot_8x3VL.hpp | 4 +- .../sve_interleaved_s8s32_dot_8x3VL/generic.cpp | 4 +- .../kernels/sve_interleaved_s8s32_mmla_8x3VL.hpp | 4 +- .../sve_interleaved_s8s32_mmla_8x3VL/generic.cpp | 4 +- .../kernels/sve_interleaved_u8u32_dot_8x3VL.hpp | 4 +- .../sve_interleaved_u8u32_dot_8x3VL/generic.cpp | 4 +- .../kernels/sve_interleaved_u8u32_mmla_8x3VL.hpp | 4 +- .../sve_interleaved_u8u32_mmla_8x3VL/generic.cpp | 4 +- .../kernels/sve_smallK_hybrid_fp32_mla_8x1VL.hpp | 4 +- .../sve_smallK_hybrid_fp32_mla_8x1VL/generic.cpp | 4 +- .../kernels/sve_smallK_hybrid_s8s32_dot_8x1VL.hpp | 4 +- .../sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp | 4 +- .../kernels/sve_smallK_hybrid_u8u32_dot_8x1VL.hpp | 4 +- .../sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp | 4 +- .../NEON/kernels/arm_gemm/mergeresults-sve.cpp | 41 + src/core/NEON/kernels/arm_gemm/mergeresults.cpp | 6 +- src/core/NEON/kernels/arm_gemm/merges/list-sve.hpp | 28 + src/core/NEON/kernels/arm_gemm/merges/list.hpp | 6 +- .../arm_gemm/merges/sve_merge_fp16_3VLx8.hpp | 4 +- .../arm_gemm/merges/sve_merge_fp32_3VLx8.hpp | 4 +- .../arm_gemm/merges/sve_merge_s32_3VLx8.hpp | 4 +- .../arm_gemm/merges/sve_merge_u32_3VLx8.hpp | 4 +- src/core/NEON/kernels/arm_gemm/utils.hpp | 50 +- .../kernels/batchnormalization/impl/SVE/fp16.cpp | 2 +- .../kernels/batchnormalization/impl/SVE/fp32.cpp | 2 +- src/core/NEON/wrapper/svtraits.h | 4 +- src/core/NEON/wrapper/traits.h | 8 +- src/core/common/Registrars.h | 54 +- src/core/cpu/kernels/CpuActivationKernel.cpp | 30 +- src/core/cpu/kernels/CpuAddKernel.cpp | 120 +- src/core/cpu/kernels/CpuElementwiseKernel.cpp | 231 ++- src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp | 8 +- src/core/cpu/kernels/CpuScaleKernel.cpp | 29 +- src/core/cpu/kernels/CpuSoftmaxKernel.cpp | 49 +- src/core/cpu/kernels/activation/sve/qasymm8.cpp | 5 +- .../cpu/kernels/activation/sve/qasymm8_signed.cpp | 4 +- src/core/cpu/kernels/activation/sve/qsymm16.cpp | 4 +- src/core/cpu/kernels/add/sve/impl.h | 4 +- src/core/cpu/kernels/add/sve/list.h | 4 +- src/core/cpu/kernels/add/sve/qasymm8.cpp | 6 +- src/core/cpu/kernels/add/sve/qasymm8_signed.cpp | 6 +- src/core/cpu/kernels/add/sve/qsymm16.cpp | 6 +- .../cpu/kernels/elementwise/sve/elementwise_list.h | 4 +- .../elementwise/sve/elementwise_quantized_list.h | 4 +- .../elementwise/sve/elementwise_unary_list.h | 4 +- src/core/cpu/kernels/scale/sve/fp16.cpp | 4 +- src/core/cpu/kernels/scale/sve/fp32.cpp | 4 +- src/core/cpu/kernels/scale/sve/integer.cpp | 4 +- src/core/cpu/kernels/scale/sve/qasymm8.cpp | 4 +- src/core/cpu/kernels/scale/sve/qasymm8_signed.cpp | 4 +- src/core/cpu/kernels/softmax/impl/sve/impl.cpp | 4 +- src/core/cpu/kernels/softmax/impl/sve/list.h | 8 +- src/cpu/CpuContext.cpp | 67 +- src/cpu/CpuContext.h | 14 +- src/runtime/CPP/CPPScheduler.cpp | 2 +- src/runtime/CPP/SingleThreadScheduler.cpp | 6 +- src/runtime/IScheduler.cpp | 5 +- src/runtime/NEON/functions/NEFFT2D.cpp | 5 +- src/runtime/OMP/OMPScheduler.cpp | 4 +- tests/validation/cpu/unit/Context.cpp | 10 +- 242 files changed, 3121 insertions(+), 1365 deletions(-) delete mode 100644 src/common/cpuinfo/target/CpuInfoSveUtils.cpp delete mode 100644 src/common/cpuinfo/target/CpuInfoSveUtils.h create mode 100644 src/core/NEON/kernels/arm_gemm/mergeresults-sve.cpp create mode 100644 src/core/NEON/kernels/arm_gemm/merges/list-sve.hpp diff --git a/.gitignore b/.gitignore index e917c499f0..2e7f887550 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ build/ *.txt *.xml *.embed +*.diff # Generated Android bp file Generated_Android.bp diff --git a/Android.bp b/Android.bp index 7dc764de93..ce3420518e 100644 --- a/Android.bp +++ b/Android.bp @@ -64,7 +64,6 @@ cc_library_static { "src/common/cpuinfo/CpuInfo.cpp", "src/common/cpuinfo/CpuIsaInfo.cpp", "src/common/cpuinfo/CpuModel.cpp", - "src/common/cpuinfo/target/CpuInfoSveUtils.cpp", "src/common/utils/LegacySupport.cpp", "src/core/AccessWindowAutoPadding.cpp", "src/core/AccessWindowStatic.cpp", @@ -224,6 +223,7 @@ cc_library_static { "src/core/NEON/kernels/arm_gemm/gemm_uint16.cpp", "src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp", "src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp", + "src/core/NEON/kernels/arm_gemm/mergeresults-sve.cpp", "src/core/NEON/kernels/arm_gemm/mergeresults.cpp", "src/core/NEON/kernels/arm_gemm/misc.cpp", "src/core/NEON/kernels/arm_gemm/quantized.cpp", diff --git a/SConscript b/SConscript index 3e834e347c..2441d1870f 100644 --- a/SConscript +++ b/SConscript @@ -39,6 +39,7 @@ Import('vars') Import('install_lib') def build_bootcode_objs(sources): + arm_compute_env.Append(ASFLAGS = "-I bootcode/") obj = arm_compute_env.Object(sources) obj = install_lib(obj) @@ -46,6 +47,7 @@ def build_bootcode_objs(sources): return obj def build_sve_objs(sources): + tmp_env = arm_compute_env.Clone() tmp_env.Append(CXXFLAGS = "-march=armv8.2-a+sve+fp16") obj = tmp_env.SharedObject(sources) @@ -53,6 +55,13 @@ def build_sve_objs(sources): Default(obj) return obj +def build_objs(sources): + + obj = arm_compute_env.SharedObject(sources) + obj = install_lib(obj) + Default(obj) + return obj + def build_library(name, build_env, sources, static=False, libs=[]): if static: obj = build_env.StaticLibrary(name, source=sources, LIBS = arm_compute_env["LIBS"] + libs) @@ -146,6 +155,73 @@ def create_version_file(target, source, env): with open(target[0].get_path(), "w") as fd: fd.write(build_info) +def get_cpu_runtime_files(operator): + file_list = [] + operators = filelist['cpu']['operators'] + + if "operator" in operators[operator]["files"]: + file_list += operators[operator]["files"]["operator"] + return file_list + +def get_gpu_runtime_files(operator): + file_list = [] + operators = filelist['gpu']['operators'] + + if "operator" in operators[operator]["files"]: + file_list += operators[operator]["files"]["operator"] + return file_list + +def get_cpu_kernel_files(operator): + + file_list = [] + file_list_sve = [] + operators = filelist['cpu']['operators'] + + if env['estate'] == '64' and "neon" in operators[operator]['files'] and "estate64" in operators[operator]['files']['neon']: + file_list += operators[operator]['files']['neon']['estate64'] + if env['estate'] == '32' and "neon" in operators[operator]['files'] and "estate32" in operators[operator]['files']['neon']: + file_list += operators[operator]['files']['neon']['estate32'] + + if "kernel" in operators[operator]["files"]: + file_list += operators[operator]["files"]["kernel"] + + if ("neon" in operators[operator]["files"]): + if any(i in env['data_type_support'] for i in ['all', 'qasymm8']) and ("qasymm8" in operators[operator]["files"]["neon"]): + file_list += operators[operator]["files"]["neon"]["qasymm8"] + if any(i in env['data_type_support'] for i in ['all', 'qasymm8_signed']) and ("qasymm8_signed" in operators[operator]["files"]["neon"]): + file_list += operators[operator]["files"]["neon"]["qasymm8_signed"] + if any(i in env['data_type_support'] for i in ['all', 'qsymm16']) and ("qsymm16" in operators[operator]["files"]["neon"]): + file_list += operators[operator]["files"]["neon"]["qsymm16"] + if any(i in env['data_type_support'] for i in ['all', 'integer']) and ("integer" in operators[operator]["files"]["neon"]): + file_list += operators[operator]["files"]["neon"]["integer"] + + if (not "sve" in env['arch'] or env['fat_binary']) and ("neon" in operators[operator]["files"]): + if any(i in env['data_type_support'] for i in ['all', 'fp16']) and ("fp16" in operators[operator]["files"]["neon"]): + file_list += operators[operator]["files"]["neon"]["fp16"] + if any(i in env['data_type_support'] for i in ['all', 'fp32']) and ("fp32" in operators[operator]["files"]["neon"]): + file_list += operators[operator]["files"]["neon"]["fp32"] + if any(i in env['data_layout_support'] for i in ['all', 'nchw']) and ("nchw" in operators[operator]["files"]["neon"]): + file_list += operators[operator]['files']['neon']['nchw'] + if ("all" in operators[operator]["files"]["neon"]): + file_list += operators[operator]["files"]["neon"]["all"] + if ("sve" in env['arch'] or env['fat_binary']) and ("sve" in operators[operator]["files"]): + if any(i in env['data_type_support'] for i in ['all', 'fp16']) and ("fp16" in operators[operator]["files"]["sve"]): + file_list_sve += operators[operator]["files"]["sve"]["fp16"] + if any(i in env['data_type_support'] for i in ['all', 'fp32']) and ("fp32" in operators[operator]["files"]["sve"]): + file_list_sve += operators[operator]["files"]["sve"]["fp32"] + if any(i in env['data_type_support'] for i in ['all', 'qasymm8']) and ("qasymm8" in operators[operator]["files"]["sve"]): + file_list_sve += operators[operator]["files"]["sve"]["qasymm8"] + if any(i in env['data_type_support'] for i in ['all', 'qasymm8_signed']) and ("qasymm8_signed" in operators[operator]["files"]["sve"]): + file_list_sve += operators[operator]["files"]["sve"]["qasymm8_signed"] + if any(i in env['data_type_support'] for i in ['all', 'qsymm16']) and ("qsymm16" in operators[operator]["files"]["sve"]): + file_list_sve += operators[operator]["files"]["sve"]["qsymm16"] + if any(i in env['data_type_support'] for i in ['all', 'integer']) and ("integer" in operators[operator]["files"]["sve"]): + file_list_sve += operators[operator]["files"]["sve"]["integer"] + if ("all" in operators[operator]["files"]["sve"]): + file_list_sve += operators[operator]["files"]["sve"]["all"] + + return file_list, file_list_sve + arm_compute_env = env.Clone() version_file = arm_compute_env.Command("src/core/arm_compute_version.embed", "", action=create_version_file) arm_compute_env.AlwaysBuild(version_file) @@ -208,6 +284,11 @@ if env['opencl']: # Common backend files core_files += filelist['common'] +# Initialize high priority core files +core_files_hp = core_files +core_files_sve_hp = [] +core_files = [] + runtime_files += Glob('src/runtime/CPP/SingleThreadScheduler.cpp') graph_files = Glob('src/graph/*.cpp') @@ -220,16 +301,6 @@ if env['openmp']: runtime_files += Glob('src/runtime/OMP/OMPScheduler.cpp') if env['opencl']: - cl_kernel_hp_files = ['src/core/gpu/cl/kernels/gemm/ClGemmHelpers.cpp', - 'src/core/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeBifrost.cpp', - 'src/core/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeMidgard.cpp', - 'src/core/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeValhall.cpp', - 'src/core/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedBifrost.cpp', - 'src/core/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedValhall.cpp', - 'src/core/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyBifrost.cpp', - 'src/core/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.cpp', - ] - core_files += cl_kernel_hp_files core_files += Glob('src/core/CL/*.cpp') core_files += Glob('src/core/gpu/cl/*.cpp') @@ -238,26 +309,24 @@ if env['opencl']: runtime_files += Glob('src/runtime/CL/gemm/*.cpp') runtime_files += Glob('src/runtime/CL/tuners/*.cpp') runtime_files += Glob('src/runtime/gpu/cl/*.cpp') - runtime_files += Glob('src/runtime/gpu/cl/operators/*.cpp') runtime_files += Glob('src/runtime/CL/mlgo/*.cpp') runtime_files += Glob('src/runtime/CL/gemm_auto_heuristics/*.cpp') runtime_files += Glob('src/gpu/cl/*.cpp') graph_files += Glob('src/graph/backends/CL/*.cpp') - core_files += filelist['gpu']['core']['kernels']['high_priority'] + filelist['gpu']['core']['kernels']['all'] + operators = filelist['gpu']['operators'] + for operator in operators: + runtime_files += get_gpu_runtime_files(operator) + if "kernel" in operators[operator]["files"]: + core_files += operators[operator]["files"]["kernel"] sve_o = [] core_files_sve = [] if env['neon']: core_files += Glob('src/core/NEON/*.cpp') - core_files += Glob('src/core/NEON/kernels/*.cpp') - - core_files += Glob('src/core/NEON/kernels/arm_gemm/*.cpp') # build winograd/depthwise sources for either v7a / v8a - core_files += Glob('src/core/NEON/kernels/convolution/*/*.cpp') - core_files += Glob('src/core/NEON/kernels/convolution/winograd/*/*.cpp') arm_compute_env.Append(CPPPATH = ["src/core/NEON/kernels/convolution/common/", "src/core/NEON/kernels/convolution/winograd/", "src/core/NEON/kernels/convolution/depthwise/", @@ -267,64 +336,22 @@ if env['neon']: graph_files += Glob('src/graph/backends/NEON/*.cpp') - if env['estate'] == '32': - core_files += Glob('src/core/NEON/kernels/arm_gemm/kernels/a32_*/*.cpp') - - if env['estate'] == '64': - core_files += Glob('src/core/NEON/kernels/assembly/*.cpp') - core_files += Glob('src/core/NEON/kernels/arm_conv/depthwise/*.cpp') - core_files += Glob('src/core/NEON/kernels/arm_conv/depthwise/kernels/cpp_*/*.cpp') - core_files += Glob('src/core/NEON/kernels/arm_conv/depthwise/interleaves/8b_mla.cpp') - core_files += Glob('src/core/NEON/kernels/arm_conv/pooling/*.cpp') - core_files += Glob('src/core/NEON/kernels/arm_conv/pooling/kernels/cpp_*/*.cpp') - - core_files += Glob('src/core/NEON/kernels/arm_gemm/kernels/a64_*/*.cpp') - core_files += Glob('src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_*.cpp') - core_files += Glob('src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_*/*.cpp') - core_files += Glob('src/core/NEON/kernels/arm_conv/pooling/kernels/a64_*/*.cpp') - if "sve" in env['arch'] or env['fat_binary']: - core_files_sve += filelist['cpu']['core']['sve']['all'] - core_files_sve += Glob('src/core/NEON/kernels/arm_gemm/kernels/sve_*/*.cpp') - core_files += Glob('src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_*.cpp') - core_files += Glob('src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_*/*.cpp') - core_files_sve += Glob('src/core/NEON/kernels/arm_conv/pooling/kernels/sve_*/*.cpp') - - if any(i in env['data_layout_support'] for i in ['all', 'nchw']): - core_files += filelist['cpu']['core']['neon']['nchw'] - - if any(i in env['data_type_support'] for i in ['all', 'fp16']): - if not "sve" in env['arch'] or env['fat_binary']: - core_files += filelist['cpu']['core']['neon']['fp16'] - if "sve" in env['arch'] or env['fat_binary']: - core_files_sve += filelist['cpu']['core']['sve']['fp16'] - if any(i in env['data_type_support'] for i in ['all', 'fp32']): - if not "sve" in env['arch'] or env['fat_binary']: - core_files += filelist['cpu']['core']['neon']['fp32'] - if "sve" in env['arch'] or env['fat_binary']: - core_files_sve += filelist['cpu']['core']['sve']['fp32'] - if any(i in env['data_type_support'] for i in ['all', 'qasymm8']): - core_files += filelist['cpu']['core']['neon']['qasymm8'] - core_files_sve += filelist['cpu']['core']['sve']['qasymm8'] - if any(i in env['data_type_support'] for i in ['all', 'qasymm8_signed']): - core_files += filelist['cpu']['core']['neon']['qasymm8_signed'] - core_files_sve += filelist['cpu']['core']['sve']['qasymm8_signed'] - if any(i in env['data_type_support'] for i in ['all', 'qsymm16']): - core_files += filelist['cpu']['core']['neon']['qsymm16'] - core_files_sve += filelist['cpu']['core']['sve']['qsymm16'] - if any(i in env['data_type_support'] for i in ['all', 'integer']): - if not "sve" in env['arch'] or env['fat_binary']: - core_files += filelist['cpu']['core']['neon']['integer'] - if "sve" in env['arch'] or env['fat_binary']: - core_files_sve += filelist['cpu']['core']['sve']['integer'] - - core_files += Glob('src/core/cpu/kernels/*/*.cpp') - core_files += filelist['cpu']['core']['kernels']['high_priority'] + filelist['cpu']['core']['kernels']['all'] + # Load files based on user's options + operators = filelist['cpu']['operators'] + for operator in operators: + runtime_files += get_cpu_runtime_files(operator) + if operator in filelist['cpu']['high_priority']: + file_list, file_list_sve = get_cpu_kernel_files(operator) + core_files_hp += file_list + core_files_sve_hp += file_list_sve + else: + file_list, file_list_sve = get_cpu_kernel_files(operator) + core_files += file_list + core_files_sve += file_list_sve runtime_files += Glob('src/runtime/NEON/*.cpp') runtime_files += Glob('src/runtime/NEON/functions/*.cpp') - runtime_files += Glob('src/runtime/NEON/functions/assembly/*.cpp') - runtime_files += filelist['cpu']['runtime']['all'] + filelist['cpu']['runtime']['operators']['high_priority'] \ - + filelist['cpu']['runtime']['operators']['all'] + filelist['cpu']['runtime']['operators']['internal'] + runtime_files += filelist['cpu']['all'] bootcode_o = [] if env['os'] == 'bare_metal': @@ -332,19 +359,27 @@ if env['os'] == 'bare_metal': bootcode_o = build_bootcode_objs(bootcode_files) Export('bootcode_o') +high_priority_o = build_objs(core_files_hp) +high_priority_sve_o = [] if (env['fat_binary']): sve_o = build_sve_objs(core_files_sve) - arm_compute_core_a = build_library('arm_compute_core-static', arm_compute_env, core_files + sve_o, static=True) + high_priority_sve_o = build_sve_objs(core_files_sve_hp) + arm_compute_core_a = build_library('arm_compute_core-static', arm_compute_env, core_files + high_priority_o + sve_o + high_priority_sve_o, static=True) else: - arm_compute_core_a = build_library('arm_compute_core-static', arm_compute_env, core_files + core_files_sve, static=True) + high_priority_o += build_objs(core_files_sve_hp) + arm_compute_core_a = build_library('arm_compute_core-static', arm_compute_env, core_files + core_files_sve + high_priority_o, static=True) +arm_compute_core_hp_a = build_library('arm_compute_core_hp-static', arm_compute_env, high_priority_o + high_priority_sve_o, static=True) Export('arm_compute_core_a') +Export('arm_compute_core_hp_a') if env['os'] != 'bare_metal' and not env['standalone']: if (env['fat_binary']): - arm_compute_core_so = build_library('arm_compute_core', arm_compute_env, core_files + sve_o, static=False) + arm_compute_core_so = build_library('arm_compute_core', arm_compute_env, core_files + high_priority_o + sve_o + high_priority_sve_o, static=False) else: - arm_compute_core_so = build_library('arm_compute_core', arm_compute_env, core_files + core_files_sve, static=False) + arm_compute_core_so = build_library('arm_compute_core', arm_compute_env, core_files + core_files_sve + high_priority_o, static=False) + arm_compute_core_so_hp = build_library('arm_compute_core_hp', arm_compute_env, high_priority_o + high_priority_sve_o, static=False) Export('arm_compute_core_so') + Export('arm_compute_core_so_hp') arm_compute_a = build_library('arm_compute-static', arm_compute_env, runtime_files, static=True, libs = [ arm_compute_core_a ]) Export('arm_compute_a') diff --git a/SConstruct b/SConstruct index f800d9d105..db6e3e0529 100644 --- a/SConstruct +++ b/SConstruct @@ -211,7 +211,7 @@ if 'v7a' in env['arch']: elif 'v8' in env['arch']: if 'sve2' in env['arch']: env.Append(CXXFLAGS = ['-march=armv8.2-a+sve2+fp16+dotprod']) - env.Append(CPPDEFINES = ['SVE2']) + env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_SVE2']) elif 'sve' in env['arch']: env.Append(CXXFLAGS = ['-march=armv8.2-a+sve+fp16+dotprod']) elif 'armv8r64' in env['arch']: @@ -221,10 +221,10 @@ elif 'v8' in env['arch']: else: env.Append(CXXFLAGS = ['-march=armv8-a']) - if 'v8.6-a' in env['arch']: - env.Append(CPPDEFINES = ['MMLA_INT8', 'V8P6', 'V8P6_BF', 'ARM_COMPUTE_FORCE_BF16']) + if 'v8.6-a' in env['arch'] or env['fat_binary']: + env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_I8MM', 'ARM_COMPUTE_ENABLE_BF16']) if "disable_mmla_fp" not in env['custom_options']: - env.Append(CPPDEFINES = ['MMLA_FP32']) + env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_SVEF32MM']) elif 'x86' in env['arch']: if env['estate'] == '32': env.Append(CCFLAGS = ['-m32']) @@ -257,9 +257,9 @@ if 'x86' not in env['arch']: prefix = "aarch64-tizen-linux-gnu-" if 'sve' in env['arch']: - env.Append(CXXFLAGS = ['-DENABLE_SVE']) + env.Append(CXXFLAGS = ['-DENABLE_SVE', '-DARM_COMPUTE_ENABLE_SVE']) else: - env.Append(CXXFLAGS = ['-DENABLE_NEON']) + env.Append(CXXFLAGS = ['-DENABLE_NEON', '-DARM_COMPUTE_ENABLE_NEON']) if env['build'] == 'native': prefix = "" @@ -308,8 +308,8 @@ if env['fat_binary']: if env['arch'] != 'armv8.2-a': print("Currently fat binary is only supported with armv8.2-a") Exit(1) - env.Append(CXXFLAGS = ['-DENABLE_SVE']) - env.Append(CXXFLAGS = ['-DENABLE_NEON']) + env.Append(CXXFLAGS = ['-DENABLE_SVE', '-DARM_COMPUTE_ENABLE_SVE']) + env.Append(CXXFLAGS = ['-DENABLE_NEON', '-DARM_COMPUTE_ENABLE_NEON']) if env['data_type_support']: if any(i in env['data_type_support'] for i in ['all', 'fp16']): diff --git a/arm_compute/core/CPP/CPPTypes.h b/arm_compute/core/CPP/CPPTypes.h index 11891937d1..4484271d63 100644 --- a/arm_compute/core/CPP/CPPTypes.h +++ b/arm_compute/core/CPP/CPPTypes.h @@ -56,18 +56,23 @@ enum class CPUModel class CPUInfo final { -public: - /** Constructor */ +protected: CPUInfo(); ~CPUInfo(); - /** Disable copy constructor and assignment operator to avoid copying the vector of CPUs each time - * CPUInfo is initialized once in the IScheduler and ThreadInfo will get a pointer to it. +public: + /** Access the KernelLibrary singleton. + * This method has been deprecated and will be removed in future releases + * @return The KernelLibrary instance. */ - CPUInfo &operator=(const CPUInfo &cpuinfo) = delete; - CPUInfo(const CPUInfo &cpuinfo) = delete; - CPUInfo &operator=(CPUInfo &&cpuinfo) = default; - CPUInfo(CPUInfo &&cpuinfo) = default; + static CPUInfo &get(); + + /* Delete move and copy constructors and assignment operator + s */ + CPUInfo(CPUInfo const &) = delete; // Copy construct + CPUInfo(CPUInfo &&) = delete; // Move construct + CPUInfo &operator=(CPUInfo const &) = delete; // Copy assign + CPUInfo &operator=(CPUInfo &&) = delete; // Move assign /** Checks if the cpu model supports fp16. * @@ -79,16 +84,41 @@ public: * @return true of the cpu supports bf16, false otherwise */ bool has_bf16() const; + /** Checks if the cpu model supports bf16. + * + * @return true of the cpu supports bf16, false otherwise + */ + bool has_svebf16() const; /** Checks if the cpu model supports dot product. * * @return true of the cpu supports dot product, false otherwise */ bool has_dotprod() const; + /** Checks if the cpu model supports floating-point matrix multiplication. + * + * @return true of the cpu supports floating-point matrix multiplication, false otherwise + */ + bool has_svef32mm() const; + /** Checks if the cpu model supports integer matrix multiplication. + * + * @return true of the cpu supports integer matrix multiplication, false otherwise + */ + bool has_i8mm() const; + /** Checks if the cpu model supports integer matrix multiplication. + * + * @return true of the cpu supports integer matrix multiplication, false otherwise + */ + bool has_svei8mm() const; /** Checks if the cpu model supports sve. * * @return true of the cpu supports sve, false otherwise */ bool has_sve() const; + /** Checks if the cpu model supports sve2. + * + * @return true of the cpu supports sve2, false otherwise + */ + bool has_sve2() const; /** Gets the cpu model for a given cpuid. * * @param[in] cpuid the id of the cpu core to be retrieved, @@ -111,17 +141,6 @@ public: * @return the size of the L1 cache */ unsigned int get_L2_cache_size() const; - /** Set fp16 support - * - * @param[in] fp16 whether the cpu supports fp16. - */ - void set_fp16(const bool fp16); - /** Set dot product support - * - * @param[in] dotprod whether the cpu supports dot product. - */ - void set_dotprod(const bool dotprod); - /** Return the maximum number of CPUs present * * @return Number of CPUs diff --git a/arm_compute/runtime/IScheduler.h b/arm_compute/runtime/IScheduler.h index 417c62cc9c..3759fee8a8 100644 --- a/arm_compute/runtime/IScheduler.h +++ b/arm_compute/runtime/IScheduler.h @@ -215,8 +215,6 @@ protected: */ void schedule_common(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors); - CPUInfo _cpu_info{}; - private: unsigned int _num_threads_hint = {}; }; diff --git a/filelist.json b/filelist.json index e30426bd19..0e17944e1d 100644 --- a/filelist.json +++ b/filelist.json @@ -1,297 +1,1775 @@ { - "common" : [ - "src/common/cpuinfo/target/CpuInfoSveUtils.cpp", - "src/common/cpuinfo/CpuInfo.cpp", - "src/common/cpuinfo/CpuModel.cpp", - "src/common/cpuinfo/CpuIsaInfo.cpp", - "src/common/utils/LegacySupport.cpp", - "src/common/AllocatorWrapper.cpp", - "src/common/ITensorV2.cpp", - "src/common/TensorPack.cpp" + "common": [ + "src/common/cpuinfo/CpuInfo.cpp", + "src/common/cpuinfo/CpuModel.cpp", + "src/common/cpuinfo/CpuIsaInfo.cpp", + "src/common/utils/LegacySupport.cpp", + "src/common/AllocatorWrapper.cpp", + "src/common/ITensorV2.cpp", + "src/common/TensorPack.cpp" + ], + "c_api": { + "cpu": [ + "src/c/AclContext.cpp", + "src/c/AclQueue.cpp", + "src/c/AclTensor.cpp", + "src/c/AclTensorPack.cpp", + "src/c/AclVersion.cpp" ], - "c_api" : - { - "cpu": [ - "src/c/AclContext.cpp", - "src/c/AclQueue.cpp", - "src/c/AclTensor.cpp", - "src/c/AclTensorPack.cpp", - "src/c/AclVersion.cpp" + "gpu": [ + "src/c/cl/AclOpenClExt.cpp" + ] + }, + "gpu": { + "high_priority": [ + "Activation", + "DepthwiseConv2d", + "DirectConv2d", + "Permute", + "Pool2d", + "Reshape" + ], + "operators": { + "Activation": { + "files": { + "operator": [ + "src/runtime/gpu/cl/operators/ClActivation.cpp" + ], + "kernel": [ + "src/core/gpu/cl/kernels/ClActivationKernel.cpp" + ] + } + }, + "Add": { + "files": { + "operator": [ + "src/runtime/gpu/cl/operators/ClAdd.cpp" + ] + } + }, + "Cast": { + "files": { + "operator": [ + "src/runtime/gpu/cl/operators/ClCast.cpp" + ], + "kernel": [ + "src/core/gpu/cl/kernels/ClCastKernel.cpp" + ] + } + }, + "Concatenate": { + "files": { + "operator": [ + "src/runtime/gpu/cl/operators/ClConcatenate.cpp" + ], + "kernel": [ + "src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp", + "src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp", + "src/core/gpu/cl/kernels/ClWidthConcatenateKernel.cpp", + "src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp", + "src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp", + "src/core/gpu/cl/kernels/ClBatchConcatenateKernel.cpp" + ] + } + }, + "DirectConv2d": { + "files": { + "operator": [ + "src/runtime/gpu/cl/operators/ClDirectConv2d.cpp" + ], + "kernel": [ + "src/core/gpu/cl/kernels/ClDirectConv2dKernel.cpp" + ] + } + }, + "ConvertFullyConnectedWeights": { + "files": { + "operator": [ + "src/runtime/gpu/cl/operators/ClConvertFullyConnectedWeights.cpp" + ], + "kernel": [ + "src/core/gpu/cl/kernels/ClConvertFullyConnectedWeightsKernel.cpp" + ] + } + }, + "Permute": { + "files": { + "operator": [ + "src/runtime/gpu/cl/operators/ClPermute.cpp" + ], + "kernel": [ + "src/core/gpu/cl/kernels/ClPermuteKernel.cpp" + ] + } + }, + "Pool2d": { + "files": { + "operator": [ + "src/runtime/gpu/cl/operators/ClPool2d.cpp" + ], + "kernel": [ + "src/core/gpu/cl/kernels/ClPool2dKernel.cpp" + ] + } + }, + "PRelu": { + "files": { + "operator": [ + "src/runtime/gpu/cl/operators/ClPRelu.cpp" + ] + } + }, + "Reshape": { + "files": { + "operator": [ + "src/runtime/gpu/cl/operators/ClReshape.cpp" + ], + "kernel": [ + "src/core/gpu/cl/kernels/ClReshapeKernel.cpp" + ] + } + }, + "Copy": { + "files": { + "operator": [ + "src/runtime/gpu/cl/operators/ClCopy.cpp" + ], + "kernel": [ + "src/core/gpu/cl/kernels/ClCopyKernel.cpp" + ] + } + }, + "Crop": { + "files": { + "operator": [ + "src/runtime/gpu/cl/operators/ClCrop.cpp" + ], + "kernel": [ + "src/core/gpu/cl/kernels/ClCropKernel.cpp" + ] + } + }, + "Dequantize": { + "files": { + "operator": [ + "src/runtime/gpu/cl/operators/ClDequantize.cpp" + ], + "kernel": [ + "src/core/gpu/cl/kernels/ClDequantizeKernel.cpp" + ] + } + }, + "Elementwise": { + "files": { + "operator": [ + "src/runtime/gpu/cl/operators/ClElementwiseOperations.cpp" + ], + "kernel": [ + "src/core/gpu/cl/kernels/ClElementwiseKernel.cpp" + ] + } + }, + "ElementwiseUnary": { + "files": { + "operator": [ + "src/runtime/gpu/cl/operators/ClElementwiseUnary.cpp" + ], + "kernel": [ + "src/core/gpu/cl/kernels/ClElementwiseUnaryKernel.cpp" + ] + } + }, + "Fill": { + "files": { + "operator": [ + "src/runtime/gpu/cl/operators/ClFill.cpp" + ], + "kernel": [ + "src/core/gpu/cl/kernels/ClFillKernel.cpp" + ] + } + }, + "Flatten": { + "files": { + "operator": [ + "src/runtime/gpu/cl/operators/ClFlatten.cpp" + ] + } + }, + "Floor": { + "files": { + "operator": [ + "src/runtime/gpu/cl/operators/ClFloor.cpp" + ], + "kernel": [ + "src/core/gpu/cl/kernels/ClFloorKernel.cpp" + ] + } + }, + "GEMM": { + "files": { + "operator": [ + "src/runtime/gpu/cl/operators/ClGemm.cpp" + ], + "kernel": [ + "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.cpp", + "src/core/gpu/cl/kernels/gemm/ClGemmHelpers.cpp", + "src/core/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeBifrost.cpp", + "src/core/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeMidgard.cpp", + "src/core/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeValhall.cpp", + "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.cpp", + "src/core/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedBifrost.cpp", + "src/core/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedValhall.cpp", + "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.cpp", + "src/core/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyBifrost.cpp", + "src/core/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.cpp", + "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyKernel.cpp", + "src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.cpp", + "src/core/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.cpp" + ] + } + }, + "Mul": { + "files": { + "operator": [ + "src/runtime/gpu/cl/operators/ClMul.cpp" + ], + "kernel": [ + "src/core/gpu/cl/kernels/ClMulKernel.cpp" + ] + } + }, + "Quantize": { + "files": { + "operator": [ + "src/runtime/gpu/cl/operators/ClQuantize.cpp" + ], + "kernel": [ + "src/core/gpu/cl/kernels/ClQuantizeKernel.cpp" + ] + } + }, + "Scale": { + "files": { + "operator": [ + "src/runtime/gpu/cl/operators/ClScale.cpp" + ], + "kernel": [ + "src/core/gpu/cl/kernels/ClScaleKernel.cpp" + ] + } + }, + "Softmax": { + "files": { + "operator": [ + "src/runtime/gpu/cl/operators/ClSoftmax.cpp" + ], + "kernel": [ + "src/core/gpu/cl/kernels/ClSoftmaxKernel.cpp" + ] + } + }, + "Sub": { + "files": { + "operator": [ + "src/runtime/gpu/cl/operators/ClSub.cpp" + ] + } + }, + "Transpose": { + "files": { + "operator": [ + "src/runtime/gpu/cl/operators/ClTranspose.cpp" + ], + "kernel": [ + "src/core/gpu/cl/kernels/ClTransposeKernel.cpp" + ] + } + }, + "GenerateProposals": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp" + ] + } + }, + "ArgMinMax": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp" + ] + } + }, + "BatchNormalization": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp" + ] + } + }, + "BatchToSpace": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp" + ] + } + }, + "Bitwise": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLBitwiseKernel.cpp" + ] + } + }, + "BoundingBoxTransform": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp" + ] + } + }, + "ChannelShuffleLayer": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp" + ] + } + }, + "GEMMConv2d": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLCol2ImKernel.cpp", + "src/core/CL/kernels/CLIm2ColKernel.cpp" + ] + } + }, + "Comparison": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLComparisonKernel.cpp" + ] + } + }, + "DeconvolutionLayerUpsample": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp" + ] + } + }, + "DeconvolutionReshapeOutput": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp" + ] + } + }, + "DepthToSpace": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp" + ] + } + }, + "DepthwiseConvolutionLayer3x3NCHW": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp" + ] + } + }, + "DepthwiseConvolutionLayer3x3NHWC": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp" + ] + } + }, + "DepthwiseConvolutionLayerNative": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp" + ] + } + }, + "FFTDigitReverse": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLFFTDigitReverseKernel.cpp" + ] + } + }, + "FFTRadixStage": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLFFTRadixStageKernel.cpp" + ] + } + }, + "FFTScale": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLFFTScaleKernel.cpp" + ] + } + }, + "FillBorder": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLFillBorderKernel.cpp" + ] + } + }, + "FuseBatchNormalization": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp" + ] + } + }, + "Gather": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLGatherKernel.cpp" + ] + } + }, + "GEMMLowpMatrixMultiplyNative": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp" + ] + } + }, + "GEMMLowpMatrixMultiplyReshaped": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp" + ] + } + }, + "GEMMLowpMatrixMultiplyReshapedOnlyRHS": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp" + ] + } + }, + "GEMMLowpOffsetContribution": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp" + ] + } + }, + "GEMMLowpOffsetContributionOutputStage": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp" + ] + } + }, + "GEMMLowpQuantizeDownInt32ScaleByFixedPoint": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.cpp" + ] + } + }, + "GEMMLowpQuantizeDownInt32ScaleByFloat": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp" + ] + } + }, + "GEMMLowpQuantizeDownInt32Scale": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp" + ] + } + }, + "GEMMLowpReduction": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp" + ] + } + }, + "InstanceNormalization": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp" + ] + } + }, + "L2Normalize": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp" + ] + } + }, + "LogicalNot": { + "files": { + "operator": [ + "src/runtime/gpu/cl/operators/ClLogicalNot.cpp" + ] + } + }, + "MaxUnpooling": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp" + ] + } + }, + "MeanStdDevNormalization": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp" + ] + } + }, + "MinMax": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLMinMaxLayerKernel.cpp" + ] + } + }, + "Normalization": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLNormalizationLayerKernel.cpp" + ] + } + }, + "NormalizePlanarYUV": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp" + ] + } + }, + "Pad": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLPadLayerKernel.cpp" + ] + } + }, + "PriorBox": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLPriorBoxLayerKernel.cpp" + ] + } + }, + "QLSTMLayerNormalization": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp" + ] + } + }, + "Range": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLRangeKernel.cpp" + ] + } + }, + "ReductionOperation": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLReductionOperationKernel.cpp" + ] + } + }, + "Remap": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLRemapKernel.cpp" + ] + } + }, + "Reorg": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLReorgLayerKernel.cpp" + ] + } + }, + "Reverse": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLReverseKernel.cpp" + ] + } + }, + "ROIAlign": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLROIAlignLayerKernel.cpp" + ] + } + }, + "ROIPooling": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLROIPoolingLayerKernel.cpp" + ] + } + }, + "Select": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLSelectKernel.cpp" + ] + } + }, + "SpaceToBatch": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp" + ] + } + }, + "SpaceToDepth": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp" + ] + } + }, + "Stack": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLStackLayerKernel.cpp" + ] + } + }, + "StridedSlice": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLStridedSliceKernel.cpp" + ] + } + }, + "Tile": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLTileKernel.cpp" + ] + } + }, + "WeightsReshape": { + "files": { + "kernel": [ + "src/core/CL/kernels/CLWeightsReshapeKernel.cpp" + ] + } + }, + "WinogradConv2d": { + "files": { + "operator": [ + "src/runtime/gpu/cl/operators/ClWinogradConv2d.cpp" + ], + "kernel": [ + "src/core/gpu/cl/kernels/ClWinogradFilterTransformKernel.cpp", + "src/core/gpu/cl/kernels/ClWinogradInputTransformKernel.cpp", + "src/core/gpu/cl/kernels/ClWinogradOutputTransformKernel.cpp" + ] + } + } + } + }, + "cpu": { + "all": [ + "src/cpu/CpuContext.cpp", + "src/cpu/CpuQueue.cpp", + "src/cpu/CpuTensor.cpp" + ], + "high_priority": [ + "Activation", + "DepthwiseConv2d", + "DirectConv2d", + "Permute", + "Pool2d", + "Reshape" + ], + "operators": { + "Activation": { + "files": { + "operator": [ + "src/runtime/cpu/operators/CpuActivation.cpp" + ], + "kernel": [ + "src/core/cpu/kernels/CpuActivationKernel.cpp" + ], + "sve": { + "fp32": [ + "src/core/cpu/kernels/activation/sve/fp32.cpp" + ], + "fp16": [ + "src/core/cpu/kernels/activation/sve/fp16.cpp" + ], + "qsymm16": [ + "src/core/cpu/kernels/activation/sve/qsymm16.cpp" + ], + "qasymm8": [ + "src/core/cpu/kernels/activation/sve/qasymm8.cpp" + ], + "qasymm8_signed": [ + "src/core/cpu/kernels/activation/sve/qasymm8_signed.cpp" + ] + }, + "neon": { + "fp32": [ + "src/core/cpu/kernels/activation/neon/fp32.cpp" + ], + "fp16": [ + "src/core/cpu/kernels/activation/neon/fp16.cpp" + ], + "qsymm16": [ + "src/core/cpu/kernels/activation/neon/qsymm16.cpp" + ], + "qasymm8": [ + "src/core/cpu/kernels/activation/neon/qasymm8.cpp" + ], + "qasymm8_signed": [ + "src/core/cpu/kernels/activation/neon/qasymm8_signed.cpp" + ] + } + } + }, + "Add": { + "files": { + "operator": [ + "src/runtime/cpu/operators/CpuAdd.cpp" + ], + "kernel": [ + "src/core/cpu/kernels/CpuAddKernel.cpp" + ], + "sve": { + "all": [ + "src/core/cpu/kernels/add/sve/impl.cpp" + ], + "qsymm16": [ + "src/core/cpu/kernels/add/sve/qsymm16.cpp" + ], + "qasymm8": [ + "src/core/cpu/kernels/add/sve/qasymm8.cpp" + ], + "qasymm8_signed": [ + "src/core/cpu/kernels/add/sve/qasymm8_signed.cpp" + ], + "integer": [ + "src/core/cpu/kernels/add/sve/integer.cpp" + ] + }, + "neon": { + "qsymm16": [ + "src/core/cpu/kernels/add/neon/qsymm16.cpp" + ], + "qasymm8": [ + "src/core/cpu/kernels/add/neon/qasymm8.cpp" + ], + "qasymm8_signed": [ + "src/core/cpu/kernels/add/neon/qasymm8_signed.cpp" + ], + "integer": [ + "src/core/cpu/kernels/add/neon/integer.cpp" + ] + } + } + }, + "BatchNorm": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp" + ], + "sve": { + "fp32": [ + "src/core/NEON/kernels/batchnormalization/impl/SVE/fp32.cpp" + ], + "fp16": [ + "src/core/NEON/kernels/batchnormalization/impl/SVE/fp16.cpp" + ] + }, + "neon": { + "fp32": [ + "src/core/NEON/kernels/batchnormalization/impl/NEON/fp32.cpp" + ], + "fp16": [ + "src/core/NEON/kernels/batchnormalization/impl/NEON/fp16.cpp" + ] + } + } + }, + "BatchToSpace": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp" + ] + } + }, + "BitwiseAnd": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NEBitwiseAndKernel.cpp" + ] + } + }, + "BitwiseNot": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NEBitwiseNotKernel.cpp" + ] + } + }, + "BitwiseOr": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NEBitwiseOrKernel.cpp" + ] + } + }, + "BitwiseXor": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NEBitwiseXorKernel.cpp" + ] + } + }, + "BoundingBoxTransform": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp" + ] + } + }, + "ChannelShuffleLayer": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp" + ] + } + }, + "Col2Im": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NECol2ImKernel.cpp" + ] + } + }, + "Cast": { + "files": { + "operator": [ + "src/runtime/cpu/operators/CpuCast.cpp" + ], + "kernel": [ + "src/core/cpu/kernels/CpuCastKernel.cpp" + ] + } + }, + "Concatenate": { + "files": { + "operator": [ + "src/runtime/cpu/operators/CpuConcatenate.cpp" + ], + "kernel": [ + "src/core/cpu/kernels/CpuConcatenateWidthKernel.cpp", + "src/core/cpu/kernels/CpuConcatenateBatchKernel.cpp", + "src/core/cpu/kernels/CpuConcatenateDepthKernel.cpp", + "src/core/cpu/kernels/CpuConcatenateHeightKernel.cpp" + ] + } + }, + "ConvertFullyConnectedWeights": { + "files": { + "operator": [ + "src/runtime/cpu/operators/CpuConvertFullyConnectedWeights.cpp" + ], + "kernel": [ + "src/core/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.cpp" + ] + } + }, + "ConvertQuantizedSignedness": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.cpp" + ] + } + }, + "Copy": { + "files": { + "operator": [ + "src/runtime/cpu/operators/CpuCopy.cpp" + ], + "kernel": [ + "src/core/cpu/kernels/CpuCopyKernel.cpp" + ] + } + }, + "Crop": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NECropKernel.cpp" + ] + } + }, + "DepthwiseConv2d": { + "deps": [ + "Activation", + "Permute" + ], + "files": { + "operator": [ + "src/runtime/cpu/operators/CpuDepthwiseConv2d.cpp", + "src/runtime/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp", + "src/core/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.cpp" + ], + "kernel": [ + "src/core/NEON/kernels/convolution/common/padding.cpp", + "src/core/NEON/kernels/convolution/common/qasymm8.cpp", + "src/core/NEON/kernels/convolution/common/qsymm8.cpp", + "src/core/NEON/kernels/convolution/common/utils.cpp", + "src/core/cpu/kernels/CpuDepthwiseConv2dNativeKernel.cpp" + ], + "sve": { + "all": [ + "src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_s8q_3x3_dot.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_u8q_3x3_dot.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst_strided/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp" + ] + }, + "neon": { + "estate64": [ + "src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp16.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8q.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8s8u8q.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/interleaves/8b_mla.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_s8q_3x3_dot.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_u8q_3x3_dot.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_generic_output9_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_generic_output9_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp" + ] + } + } + }, + "DepthToSpaceLayer": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp" + ] + } + }, + "Dequantize": { + "files": { + "operator": [ + "src/runtime/cpu/operators/CpuDequantize.cpp" + ], + "kernel": [ + "src/core/cpu/kernels/CpuDequantizeKernel.cpp" + ] + } + }, + "DirectConv2d": { + "deps": [ + "Activation", + "FillBorder" + ], + "files": { + "operator": [ + "src/runtime/cpu/operators/CpuDirectConv2d.cpp" + ], + "kernel": [ + "src/core/cpu/kernels/CpuDirectConv2dKernel.cpp", + "src/core/cpu/kernels/CpuDirectConv2dOutputStageKernel.cpp" + ] + } + }, + "Elementwise": { + "files": { + "operator": [ + "src/runtime/cpu/operators/CpuElementwise.cpp" + ], + "kernel": [ + "src/core/cpu/kernels/CpuElementwiseKernel.cpp" + ], + "sve": { + "all": [ + "src/core/cpu/kernels/elementwise/sve/elementwise.cpp" + ] + } + } + }, + "ElementwiseUnary": { + "files": { + "operator": [ + "src/runtime/cpu/operators/CpuElementwiseUnary.cpp" + ], + "kernel": [ + "src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp" + ], + "sve": { + "all": [ + "src/core/cpu/kernels/elementwise/sve/elementwise_unary.cpp" + ] + } + } + }, + "FFT1D": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NEFFTDigitReverseKernel.cpp", + "src/core/NEON/kernels/NEFFTRadixStageKernel.cpp", + "src/core/NEON/kernels/NEFFTScaleKernel.cpp" + ] + } + }, + "FillBorder": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NEFillBorderKernel.cpp" + ] + } + }, + "Flatten": { + "deps: ": [ + "Reshape" + ], + "files": { + "operator": [ + "src/runtime/cpu/operators/CpuFlatten.cpp" + ] + } + }, + "Fill": { + "files": { + "operator": [ + "src/runtime/cpu/operators/CpuFill.cpp" + ], + "kernel": [ + "src/core/cpu/kernels/CpuFillKernel.cpp" + ] + } + }, + "Floor": { + "files": { + "operator": [ + "src/runtime/cpu/operators/CpuFloor.cpp" + ], + "kernel": [ + "src/core/cpu/kernels/CpuFloorKernel.cpp" + ], + "neon": { + "fp32": [ + "src/core/cpu/kernels/floor/neon/fp32.cpp" + ], + "fp16": [ + "src/core/cpu/kernels/floor/neon/fp16.cpp" + ] + } + } + }, + "FuseBatchNormalization": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp" + ] + } + }, + "GEMM": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp", + "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp", + "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp", + "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp" + ] + } + }, + "GEMMLowp": { + "deps": [ + "GemmAssemblyDispatch" + ], + "files": { + "operator" : ["src/runtime/cpu/operators/CpuGemmLowpOutputStage.cpp"], + "kernel": [ + "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp", + "src/core/cpu/kernels/CpuGemmLowpQuantizeDownInt32ScaleKernel.cpp", + "src/core/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp", + "src/core/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp", + "src/core/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp", + "src/core/NEON/kernels/NEGEMMLowpReductionKernel.cpp", + "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp", + "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp" + ] + } + }, + "GEMMConvolution": { + "deps": [ + "Activation", + "Col2Im", + "Reshape", + "Im2Col", + "GEMMLowpOffsetContributionOutputStage", + "ConvertQuantizedSignedness" + ], + "files": { + "kernel": [ + "src/core/NEON/kernels/NEWeightsReshapeKernel.cpp" + ] + } + }, + "GemmAssemblyDispatch": { + "files": { + "operator": [ + "src/runtime/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp" + ], + "kernel": [ + "src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp", + "src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp", + "src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp", + "src/core/NEON/kernels/arm_gemm/gemm_int16.cpp", + "src/core/NEON/kernels/arm_gemm/gemm_int8.cpp", + "src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp", + "src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp", + "src/core/NEON/kernels/arm_gemm/gemm_uint16.cpp", + "src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp", + "src/core/NEON/kernels/arm_gemm/mergeresults.cpp", + "src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp", + "src/core/NEON/kernels/arm_gemm/misc.cpp", + "src/core/NEON/kernels/arm_gemm/quantized.cpp", + "src/core/NEON/kernels/arm_gemm/rowsum_indirect_s8.cpp", + "src/core/NEON/kernels/arm_gemm/rowsum_indirect_u8.cpp" + ], + "neon": { + "estate32": [ + "src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6/a53.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6/a55r1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6/generic.cpp" + ], + "estate64": [ + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s16_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/a55r1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/x1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u16_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/a55r1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/x1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemv_fp32_mla_32/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/a55r1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/x1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_dot_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_mmla_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a53.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55r1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/x1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x6/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemv_pretransposed/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_6x4/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_8x4/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_6x4/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_6x4/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_8x4/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_8x4/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_6x4/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_6x4/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/generic.cpp" + ] + }, + "sve": { + "all": [ + "src/core/NEON/kernels/arm_gemm/mergeresults-sve.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_gemv_fp32_mla_8VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp" + ] + } + } + }, + "GemmDirectConv2d": { + "deps": [ + "Activation", + "GemmAssemblyDispatch", + "Permute" + ], + "files": { + "operator": [ + "src/runtime/cpu/operators/CpuGemmDirectConv2d.cpp" + ] + } + }, + "Mul": { + "files": { + "operator": [ + "src/runtime/cpu/operators/CpuMul.cpp" + ], + "kernel": [ + "src/core/cpu/kernels/CpuMulKernel.cpp" + ] + } + }, + "Quantize": { + "files": { + "operator": [ + "src/runtime/cpu/operators/CpuQuantize.cpp" + ], + "kernel": [ + "src/core/cpu/kernels/CpuQuantizeKernel.cpp" + ] + } + }, + "Reshape": { + "files": { + "operator": [ + "src/runtime/cpu/operators/CpuReshape.cpp" + ], + "kernel": [ + "src/core/cpu/kernels/CpuReshapeKernel.cpp" + ] + } + }, + "Gather": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NEGatherKernel.cpp" + ] + } + }, + "GenerateProposalsLayer": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp" + ] + } + }, + "Im2Col": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NEIm2ColKernel.cpp" + ] + } + }, + "InstanceNormalization": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp" + ] + } + }, + "L2Normalize": { + "deps": [ + "Reduction" + ], + "files": { + "kernel": [ + "src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp" + ] + } + }, + "Logical": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NELogicalKernel.cpp" + ] + } + }, + "MaxUnpooling": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp" + ] + } + }, + "MeanStdDevNormalization": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp" + ] + } + }, + "MinMax": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NEMinMaxLayerKernel.cpp" + ] + } + }, + "Normalization": { + "deps": [ + "PixelWiseMultiplication" + ], + "files": { + "kernel": [ + "src/core/NEON/kernels/NENormalizationLayerKernel.cpp" + ] + } + }, + "Pad": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NEPadLayerKernel.cpp" + ] + } + }, + "Permute": { + "files": { + "operator": [ + "src/runtime/cpu/operators/CpuPermute.cpp" + ], + "kernel": [ + "src/core/cpu/kernels/CpuPermuteKernel.cpp" + ] + } + }, + "Pool2d": { + "files": { + "operator": [ + "src/runtime/cpu/operators/CpuPool2d.cpp" + ], + "kernel": [ + "src/core/cpu/kernels/CpuPool2dKernel.cpp", + "src/core/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/cpp_nhwc_1x1_stride_any_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/pooling_fp16.cpp", + "src/core/NEON/kernels/arm_conv/pooling/pooling_fp32.cpp", + "src/core/NEON/kernels/arm_conv/pooling/pooling_s8.cpp", + "src/core/NEON/kernels/arm_conv/pooling/pooling_s8q.cpp", + "src/core/NEON/kernels/arm_conv/pooling/pooling_u8.cpp", + "src/core/NEON/kernels/arm_conv/pooling/pooling_u8q.cpp" + ], + "neon": { + "nchw": [ + "src/core/cpu/kernels/pooling/neon/nchw/all.cpp" + ], + "fp32": [ + "src/core/cpu/kernels/pooling/neon/fp32.cpp" + ], + "fp16": [ + "src/core/cpu/kernels/pooling/neon/fp16.cpp" + ], + "qasymm8": [ + "src/core/cpu/kernels/pooling/neon/qasymm8.cpp" + ], + "qasymm8_signed": [ + "src/core/cpu/kernels/pooling/neon/qasymm8_signed.cpp" + ], + "estate64": [ + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_max_generic_depthfirst/generic.cpp" + ] + }, + "sve": { + "all": [ + "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_max_generic_depthfirst/generic.cpp" + ] + } + } + }, + "PriorBox": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp" + ] + } + }, + "QLSTMLayerNormalization": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.cpp" + ] + } + }, + "Range": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NERangeKernel.cpp" + ] + } + }, + "ReductionOperation": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NEReductionOperationKernel.cpp" + ] + } + }, + "Remap": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NERemapKernel.cpp" + ] + } + }, + "Reorg": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NEReorgLayerKernel.cpp" + ] + } + }, + "Reverse": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NEReverseKernel.cpp" + ] + } + }, + "ROIAlign": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NEROIAlignLayerKernel.cpp" + ] + } + }, + "ROIPooling": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp" + ] + } + }, + "Select": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NESelectKernel.cpp" + ] + } + }, + "SpaceToBatch": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp" + ] + } + }, + "SpaceToDepth": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp" + ] + } + }, + "Stack": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NEStackLayerKernel.cpp" + ] + } + }, + "StridedSlice": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NEStridedSliceKernel.cpp" + ] + } + }, + "Scale": { + "files": { + "operator": [ + "src/runtime/cpu/operators/CpuScale.cpp" + ], + "kernel": [ + "src/core/cpu/kernels/CpuScaleKernel.cpp" + ], + "sve": { + "fp32": [ + "src/core/cpu/kernels/scale/sve/fp32.cpp" + ], + "fp16": [ + "src/core/cpu/kernels/scale/sve/fp16.cpp" + ], + "qasymm8": [ + "src/core/cpu/kernels/scale/sve/qasymm8.cpp" + ], + "qasymm8_signed": [ + "src/core/cpu/kernels/scale/sve/qasymm8_signed.cpp" + ], + "integer": [ + "src/core/cpu/kernels/scale/sve/integer.cpp" + ] + }, + "neon": { + "fp16": [ + "src/core/cpu/kernels/scale/neon/fp16.cpp" + ], + "qasymm8": [ + "src/core/cpu/kernels/scale/neon/qasymm8.cpp" + ], + "qasymm8_signed": [ + "src/core/cpu/kernels/scale/neon/qasymm8_signed.cpp" + ], + "integer": [ + "src/core/cpu/kernels/scale/neon/integer.cpp" + ] + } + } + }, + "Softmax": { + "deps": [ + "Permute" + ], + "files": { + "operator": [ + "src/runtime/cpu/operators/CpuSoftmax.cpp" + ], + "kernel": [ + "src/core/cpu/kernels/CpuSoftmaxKernel.cpp" + ], + "sve": { + "all": [ + "src/core/cpu/kernels/softmax/impl/sve/impl.cpp" + ] + } + } + }, + "Sub": { + "files": { + "operator": [ + "src/runtime/cpu/operators/CpuSub.cpp" + ], + "kernel": [ + "src/core/cpu/kernels/CpuSubKernel.cpp" + ], + "neon": { + "qsymm16": [ + "src/core/cpu/kernels/sub/neon/qsymm16.cpp" + ], + "qasymm8": [ + "src/core/cpu/kernels/sub/neon/qasymm8.cpp" + ], + "qasymm8_signed": [ + "src/core/cpu/kernels/sub/neon/qasymm8_signed.cpp" + ], + "integer": [ + "src/core/cpu/kernels/sub/neon/integer.cpp" + ] + } + } + }, + "Transpose": { + "files": { + "operator": [ + "src/runtime/cpu/operators/CpuTranspose.cpp" + ], + "kernel": [ + "src/core/cpu/kernels/CpuTransposeKernel.cpp" + ] + } + }, + "Tile": { + "files": { + "kernel": [ + "src/core/NEON/kernels/NETileKernel.cpp" + ] + } + }, + "WinogradConvolution": { + "deps": [ + "Activation", + "Permute" ], - "gpu": [ - "src/c/cl/AclOpenClExt.cpp" - ] - }, - - "gpu" : - { - "core" : - { - "kernels" : - { - "high_priority" : [ - "src/core/gpu/cl/kernels/ClActivationKernel.cpp", - "src/core/gpu/cl/kernels/ClDirectConv2dKernel.cpp", - "src/core/gpu/cl/kernels/ClPermuteKernel.cpp", - "src/core/gpu/cl/kernels/ClPool2dKernel.cpp", - "src/core/gpu/cl/kernels/ClReshapeKernel.cpp" - ], - "all" : [ - "src/core/gpu/cl/kernels/ClBatchConcatenateKernel.cpp", - "src/core/gpu/cl/kernels/ClCastKernel.cpp", - "src/core/gpu/cl/kernels/ClConvertFullyConnectedWeightsKernel.cpp", - "src/core/gpu/cl/kernels/ClCopyKernel.cpp", - "src/core/gpu/cl/kernels/ClCropKernel.cpp", - "src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp", - "src/core/gpu/cl/kernels/ClDequantizeKernel.cpp", - "src/core/gpu/cl/kernels/ClElementwiseKernel.cpp", - "src/core/gpu/cl/kernels/ClElementwiseUnaryKernel.cpp", - "src/core/gpu/cl/kernels/ClFillKernel.cpp", - "src/core/gpu/cl/kernels/ClFloorKernel.cpp", - "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.cpp", - "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.cpp", - "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.cpp", - "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyKernel.cpp", - "src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.cpp", - "src/core/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.cpp", - "src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp", - "src/core/gpu/cl/kernels/ClMulKernel.cpp", - "src/core/gpu/cl/kernels/ClQuantizeKernel.cpp", - "src/core/gpu/cl/kernels/ClScaleKernel.cpp", - "src/core/gpu/cl/kernels/ClSoftmaxKernel.cpp", - "src/core/gpu/cl/kernels/ClTransposeKernel.cpp", - "src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp", - "src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp", - "src/core/gpu/cl/kernels/ClWidthConcatenateKernel.cpp", - "src/core/gpu/cl/kernels/ClWinogradFilterTransformKernel.cpp", - "src/core/gpu/cl/kernels/ClWinogradInputTransformKernel.cpp", - "src/core/gpu/cl/kernels/ClWinogradOutputTransformKernel.cpp", - "src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp", - "src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp", - "src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp", - "src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp", - "src/core/CL/kernels/CLBitwiseKernel.cpp", - "src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp", - "src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp", - "src/core/CL/kernels/CLCol2ImKernel.cpp", - "src/core/CL/kernels/CLComparisonKernel.cpp", - "src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp", - "src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp", - "src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp", - "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp", - "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp", - "src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp", - "src/core/CL/kernels/CLFFTDigitReverseKernel.cpp", - "src/core/CL/kernels/CLFFTRadixStageKernel.cpp", - "src/core/CL/kernels/CLFFTScaleKernel.cpp", - "src/core/CL/kernels/CLFillBorderKernel.cpp", - "src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp", - "src/core/CL/kernels/CLGatherKernel.cpp", - "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp", - "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp", - "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp", - "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp", - "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp", - "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.cpp", - "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp", - "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp", - "src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp", - "src/core/CL/kernels/CLIm2ColKernel.cpp", - "src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp", - "src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp", - "src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp", - "src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp", - "src/core/CL/kernels/CLMinMaxLayerKernel.cpp", - "src/core/CL/kernels/CLNormalizationLayerKernel.cpp", - "src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp", - "src/core/CL/kernels/CLPadLayerKernel.cpp", - "src/core/CL/kernels/CLPriorBoxLayerKernel.cpp", - "src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp", - "src/core/CL/kernels/CLRangeKernel.cpp", - "src/core/CL/kernels/CLReductionOperationKernel.cpp", - "src/core/CL/kernels/CLRemapKernel.cpp", - "src/core/CL/kernels/CLReorgLayerKernel.cpp", - "src/core/CL/kernels/CLReverseKernel.cpp", - "src/core/CL/kernels/CLROIAlignLayerKernel.cpp", - "src/core/CL/kernels/CLROIPoolingLayerKernel.cpp", - "src/core/CL/kernels/CLSelectKernel.cpp", - "src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp", - "src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp", - "src/core/CL/kernels/CLStackLayerKernel.cpp", - "src/core/CL/kernels/CLStridedSliceKernel.cpp", - "src/core/CL/kernels/CLTileKernel.cpp", - "src/core/CL/kernels/CLWeightsReshapeKernel.cpp" - ] - } - } - }, - "cpu" : - { - "runtime" : - { - "all" : [ - "src/cpu/CpuContext.cpp", - "src/cpu/CpuQueue.cpp", - "src/cpu/CpuTensor.cpp" - ], - "operators" : - { - "high_priority" : [ - "src/runtime/cpu/operators/CpuActivation.cpp", - "src/runtime/cpu/operators/CpuDepthwiseConv2d.cpp", - "src/runtime/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp", - "src/runtime/cpu/operators/CpuDirectConv2d.cpp", - "src/runtime/cpu/operators/CpuPermute.cpp", - "src/runtime/cpu/operators/CpuPool2d.cpp" - ], - "internal" : [ - "src/runtime/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp" - ], - "all" : [ - "src/runtime/cpu/operators/CpuAdd.cpp", - "src/runtime/cpu/operators/CpuCast.cpp", - "src/runtime/cpu/operators/CpuConcatenate.cpp", - "src/runtime/cpu/operators/CpuConvertFullyConnectedWeights.cpp", - "src/runtime/cpu/operators/CpuCopy.cpp", - "src/runtime/cpu/operators/CpuDequantize.cpp", - "src/runtime/cpu/operators/CpuElementwise.cpp", - "src/runtime/cpu/operators/CpuElementwiseUnary.cpp", - "src/runtime/cpu/operators/CpuFill.cpp", - "src/runtime/cpu/operators/CpuFlatten.cpp", - "src/runtime/cpu/operators/CpuFloor.cpp", - "src/runtime/cpu/operators/CpuGemmDirectConv2d.cpp", - "src/runtime/cpu/operators/CpuGemmLowpOutputStage.cpp", - "src/runtime/cpu/operators/CpuMul.cpp", - "src/runtime/cpu/operators/CpuQuantize.cpp", - "src/runtime/cpu/operators/CpuReshape.cpp", - "src/runtime/cpu/operators/CpuScale.cpp", - "src/runtime/cpu/operators/CpuSoftmax.cpp", - "src/runtime/cpu/operators/CpuSub.cpp", - "src/runtime/cpu/operators/CpuTranspose.cpp" - ] - } - }, - "core" : - { - "kernels" : - { - "high_priority" : [ - "src/core/cpu/kernels/CpuActivationKernel.cpp", - "src/core/cpu/kernels/CpuDepthwiseConv2dNativeKernel.cpp", - "src/core/cpu/kernels/CpuDirectConv2dKernel.cpp", - "src/core/cpu/kernels/CpuDirectConv2dOutputStageKernel.cpp", - "src/core/cpu/kernels/CpuPermuteKernel.cpp", - "src/core/cpu/kernels/CpuPool2dKernel.cpp", - "src/core/cpu/kernels/CpuReshapeKernel.cpp" - ], - "all" : [ - "src/core/cpu/kernels/CpuAddKernel.cpp", - "src/core/cpu/kernels/CpuCastKernel.cpp", - "src/core/cpu/kernels/CpuConcatenateBatchKernel.cpp", - "src/core/cpu/kernels/CpuConcatenateDepthKernel.cpp", - "src/core/cpu/kernels/CpuConcatenateHeightKernel.cpp", - "src/core/cpu/kernels/CpuConcatenateWidthKernel.cpp", - "src/core/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.cpp", - "src/core/cpu/kernels/CpuCopyKernel.cpp", - "src/core/cpu/kernels/CpuDequantizeKernel.cpp", - "src/core/cpu/kernels/CpuElementwiseKernel.cpp", - "src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp", - "src/core/cpu/kernels/CpuFillKernel.cpp", - "src/core/cpu/kernels/CpuFloorKernel.cpp", - "src/core/cpu/kernels/CpuGemmLowpQuantizeDownInt32ScaleKernel.cpp", - "src/core/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp", - "src/core/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp", - "src/core/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp", - "src/core/cpu/kernels/CpuMulKernel.cpp", - "src/core/cpu/kernels/CpuQuantizeKernel.cpp", - "src/core/cpu/kernels/CpuScaleKernel.cpp", - "src/core/cpu/kernels/CpuSoftmaxKernel.cpp", - "src/core/cpu/kernels/CpuSubKernel.cpp", - "src/core/cpu/kernels/CpuTransposeKernel.cpp" - ] - }, - - "sve" : - { - "all" : [ - "src/core/cpu/kernels/add/sve/impl.cpp", - "src/core/cpu/kernels/softmax/impl/sve/impl.cpp", - "src/core/cpu/kernels/elementwise/sve/elementwise.cpp", - "src/core/cpu/kernels/elementwise/sve/elementwise_unary.cpp" - ], - "fp32" : [ - "src/core/cpu/kernels/activation/sve/fp32.cpp", - "src/core/cpu/kernels/scale/sve/fp32.cpp", - "src/core/NEON/kernels/batchnormalization/impl/SVE/fp32.cpp" - ], - "fp16" : [ - "src/core/cpu/kernels/activation/sve/fp16.cpp", - "src/core/cpu/kernels/scale/sve/fp16.cpp", - "src/core/NEON/kernels/batchnormalization/impl/SVE/fp16.cpp" - ], - "qsymm16" : [ - "src/core/cpu/kernels/activation/sve/qsymm16.cpp", - "src/core/cpu/kernels/add/sve/qsymm16.cpp" - ], - "qasymm8" : [ - "src/core/cpu/kernels/activation/sve/qasymm8.cpp", - "src/core/cpu/kernels/add/sve/qasymm8.cpp", - "src/core/cpu/kernels/scale/sve/qasymm8.cpp" - ], - "qasymm8_signed" : [ - "src/core/cpu/kernels/activation/sve/qasymm8_signed.cpp", - "src/core/cpu/kernels/add/sve/qasymm8_signed.cpp", - "src/core/cpu/kernels/scale/sve/qasymm8_signed.cpp" - ], - "integer" : [ - "src/core/cpu/kernels/add/sve/integer.cpp", - "src/core/cpu/kernels/scale/sve/integer.cpp" - ] - }, - - "neon": - { - "nchw" : [ - "src/core/cpu/kernels/pooling/neon/nchw/all.cpp" - ], - "fp32" : [ - "src/core/cpu/kernels/activation/neon/fp32.cpp", - "src/core/cpu/kernels/floor/neon/fp32.cpp", - "src/core/cpu/kernels/pooling/neon/fp32.cpp", - "src/core/NEON/kernels/batchnormalization/impl/NEON/fp32.cpp" - ], - "fp16" : [ - "src/core/cpu/kernels/activation/neon/fp16.cpp", - "src/core/cpu/kernels/floor/neon/fp16.cpp", - "src/core/cpu/kernels/pooling/neon/fp16.cpp", - "src/core/cpu/kernels/scale/neon/fp16.cpp", - "src/core/NEON/kernels/batchnormalization/impl/NEON/fp16.cpp" - ], - "qsymm16" : [ - "src/core/cpu/kernels/activation/neon/qsymm16.cpp", - "src/core/cpu/kernels/add/neon/qsymm16.cpp", - "src/core/cpu/kernels/sub/neon/qsymm16.cpp" - - ], - "qasymm8" : [ - "src/core/cpu/kernels/activation/neon/qasymm8.cpp", - "src/core/cpu/kernels/add/neon/qasymm8.cpp", - "src/core/cpu/kernels/pooling/neon/qasymm8.cpp", - "src/core/cpu/kernels/scale/neon/qasymm8.cpp", - "src/core/cpu/kernels/sub/neon/qasymm8.cpp" - ], - "qasymm8_signed" : [ - "src/core/cpu/kernels/activation/neon/qasymm8_signed.cpp", - "src/core/cpu/kernels/add/neon/qasymm8_signed.cpp", - "src/core/cpu/kernels/pooling/neon/qasymm8_signed.cpp", - "src/core/cpu/kernels/scale/neon/qasymm8_signed.cpp", - "src/core/cpu/kernels/sub/neon/qasymm8_signed.cpp" - ], - "integer" : [ - "src/core/cpu/kernels/sub/neon/integer.cpp", - "src/core/cpu/kernels/add/neon/integer.cpp" - ] - } + "files": { + "kernel": [ + "src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp", + "src/core/NEON/kernels/convolution/winograd/padding.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_1x8_fp32_fp32_integers.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_4x4_fp16_fp16_integers.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_4x4_fp32_fp32_integers.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_6x6_fp16_fp16_integers.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_6x6_fp32_fp32_integers.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_2_7_fp32_fp32_integers.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_2x2_3x3_fp32_fp32_integers.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_2x2_5x5_fp32_fp32_integers.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_4_5_fp32_fp32_integers.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_4x4_3x3_fp16_fp16_integers.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_4x4_3x3_fp32_fp32_integers.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_6_3_fp32_fp32_integers.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_2_7_fp32_fp32_integers.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_2x2_3x3_fp32_fp32_integers.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_2x2_5x5_fp32_fp32_integers.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_4_5_fp32_fp32_integers.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_4x4_3x3_fp16_fp16_integers.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_4x4_3x3_fp32_fp32_integers.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_6_3_fp32_fp32_integers.cpp" + ] } + } } + } } \ No newline at end of file diff --git a/src/common/cpuinfo/CpuInfo.cpp b/src/common/cpuinfo/CpuInfo.cpp index 436e7ea803..32504acc44 100644 --- a/src/common/cpuinfo/CpuInfo.cpp +++ b/src/common/cpuinfo/CpuInfo.cpp @@ -25,7 +25,6 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/Log.h" -#include "src/common/cpuinfo/target/CpuInfoSveUtils.h" #include "support/StringSupport.h" #include "support/ToolchainSupport.h" @@ -260,6 +259,20 @@ int get_max_cpus() return max_cpus; } #endif /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */ + +#if defined(BARE_METAL) && defined(__aarch64__) +uint64_t get_sve_feature_reg() +{ + uint64_t svefr0 = 0; + __asm __volatile( + ".inst 0xd5380483 // mrs x3, ID_AA64ZFR0_EL1\n" + "MOV %0, X3" + : "=r"(svefr0) + : + : "x3"); + return svefr0; +} +#endif /* defined(BARE_METAL) && defined(__aarch64__) */ } // namespace CpuInfo::CpuInfo(CpuIsaInfo isa, std::vector cpus) diff --git a/src/common/cpuinfo/CpuInfo.h b/src/common/cpuinfo/CpuInfo.h index f3056d2faf..c04c9f4ec8 100644 --- a/src/common/cpuinfo/CpuInfo.h +++ b/src/common/cpuinfo/CpuInfo.h @@ -79,17 +79,25 @@ public: { return _isa.bf16; } + bool has_svebf16() const + { + return _isa.svebf16; + } bool has_dotprod() const { return _isa.dot; } - bool has_immla() const + bool has_i8mm() const + { + return _isa.i8mm; + } + bool has_svei8mm() const { - return _isa.immla; + return _isa.svei8mm; } - bool has_fmmla() const + bool has_svef32mm() const { - return _isa.fmmla; + return _isa.svef32mm; } CpuModel cpu_model(uint32_t cpuid) const; diff --git a/src/common/cpuinfo/CpuIsaInfo.cpp b/src/common/cpuinfo/CpuIsaInfo.cpp index d99f9aec29..14466ef4e7 100644 --- a/src/common/cpuinfo/CpuIsaInfo.cpp +++ b/src/common/cpuinfo/CpuIsaInfo.cpp @@ -90,6 +90,10 @@ void decode_hwcaps(CpuIsaInfo &isa, const uint32_t hwcaps, const uint32_t hwcaps { isa.bf16 = true; } + if(hwcaps2 & ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEBF16) + { + isa.svebf16 = true; + } // Instruction extensions if(hwcaps & ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDDP) @@ -98,11 +102,15 @@ void decode_hwcaps(CpuIsaInfo &isa, const uint32_t hwcaps, const uint32_t hwcaps } if(hwcaps2 & ARM_COMPUTE_CPU_FEATURE_HWCAP2_I8MM) { - isa.immla = true; + isa.i8mm = true; + } + if(hwcaps2 & ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEI8MM) + { + isa.svei8mm = true; } if(hwcaps2 & ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEF32MM) { - isa.fmmla = true; + isa.svef32mm = true; } } #else /* defined(__aarch64__) */ @@ -133,6 +141,10 @@ void decode_regs(CpuIsaInfo &isa, const uint64_t isar0, const uint64_t isar1, co { isa.bf16 = true; } + if((svefr0 >> 20) & 0xf) + { + isa.svebf16 = true; + } // Instruction extensions if((isar0 >> 44) & 0xf) @@ -141,11 +153,15 @@ void decode_regs(CpuIsaInfo &isa, const uint64_t isar0, const uint64_t isar1, co } if((isar1 >> 48) & 0xf) { - isa.immla = true; + isa.i8mm = true; + } + if((svefr0 >> 44) & 0xf) + { + isa.svei8mm = true; } if((svefr0 >> 52) & 0xf) { - isa.fmmla = true; + isa.svef32mm = true; } } diff --git a/src/common/cpuinfo/CpuIsaInfo.h b/src/common/cpuinfo/CpuIsaInfo.h index 1125f766dd..a2aace1b80 100644 --- a/src/common/cpuinfo/CpuIsaInfo.h +++ b/src/common/cpuinfo/CpuIsaInfo.h @@ -44,11 +44,13 @@ struct CpuIsaInfo /* Data-type extensions support */ bool fp16{ false }; bool bf16{ false }; + bool svebf16{ false }; /* Instruction support */ bool dot{ false }; - bool immla{ false }; - bool fmmla{ false }; + bool i8mm{ false }; + bool svei8mm{ false }; + bool svef32mm{ false }; }; /** Identify ISA related information through system information diff --git a/src/common/cpuinfo/target/CpuInfoSveUtils.cpp b/src/common/cpuinfo/target/CpuInfoSveUtils.cpp deleted file mode 100644 index 750a1b01d1..0000000000 --- a/src/common/cpuinfo/target/CpuInfoSveUtils.cpp +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/common/cpuinfo/target/CpuInfoSveUtils.h" - -namespace arm_compute -{ -namespace cpuinfo -{ -uint64_t get_sve_feature_reg() -{ - uint64_t reg = 0; -#if defined(ENABLE_SVE) - __asm __volatile("MRS %0, ID_AA64ZFR0_EL1" - : "=r"(reg)); -#endif /* defined(DENABLE_SVE) */ - return reg; -} -} // namespace cpuinfo -} // namespace arm_compute diff --git a/src/common/cpuinfo/target/CpuInfoSveUtils.h b/src/common/cpuinfo/target/CpuInfoSveUtils.h deleted file mode 100644 index 73862b131c..0000000000 --- a/src/common/cpuinfo/target/CpuInfoSveUtils.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_COMMON_CPUINFO_TARGET_CPUINFO_SVE_UTILS_H -#define SRC_COMMON_CPUINFO_TARGET_CPUINFO_SVE_UTILS_H - -#include - -namespace arm_compute -{ -namespace cpuinfo -{ -/** Returns the contents of the SVE feature register (ID_AA64ZFR0_EL1) - * - * @return uint64_t The value of the register - */ -uint64_t get_sve_feature_reg(); -} // namespace cpuinfo -} // namespace arm_compute -#endif /* SRC_COMMON_CPUINFO_CPUISAINFO_H */ diff --git a/src/core/CPP/CPPTypes.cpp b/src/core/CPP/CPPTypes.cpp index edcb9cb1ba..44cd000ada 100644 --- a/src/core/CPP/CPPTypes.cpp +++ b/src/core/CPP/CPPTypes.cpp @@ -36,6 +36,12 @@ struct CPUInfo::Impl unsigned int L2_cache_size = 262144; }; +CPUInfo &CPUInfo::get() +{ + static CPUInfo _cpuinfo; + return _cpuinfo; +} + CPUInfo::CPUInfo() : _impl(std::make_unique()) { @@ -49,11 +55,6 @@ unsigned int CPUInfo::get_cpu_num() const return _impl->info.num_cpus(); } -bool CPUInfo::has_sve() const -{ - return _impl->info.has_sve(); -} - bool CPUInfo::has_fp16() const { return _impl->info.has_fp16(); @@ -64,11 +65,41 @@ bool CPUInfo::has_bf16() const return _impl->info.has_bf16(); } +bool CPUInfo::has_svebf16() const +{ + return _impl->info.has_svebf16(); +} + bool CPUInfo::has_dotprod() const { return _impl->info.has_dotprod(); } +bool CPUInfo::has_svef32mm() const +{ + return _impl->info.has_svef32mm(); +} + +bool CPUInfo::has_i8mm() const +{ + return _impl->info.has_i8mm(); +} + +bool CPUInfo::has_svei8mm() const +{ + return _impl->info.has_svei8mm(); +} + +bool CPUInfo::has_sve() const +{ + return _impl->info.has_sve(); +} + +bool CPUInfo::has_sve2() const +{ + return _impl->info.has_sve2(); +} + CPUModel CPUInfo::get_cpu_model() const { return _impl->info.cpu_model(); diff --git a/src/core/NEON/SVEAsymm.h b/src/core/NEON/SVEAsymm.h index 4b0ecd9eea..40b8e64b67 100644 --- a/src/core/NEON/SVEAsymm.h +++ b/src/core/NEON/SVEAsymm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 Arm Limited. + * Copyright (c) 2020-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,7 +24,7 @@ #ifndef ARM_COMPUTE_SVEASYMM_H #define ARM_COMPUTE_SVEASYMM_H -#if defined(__ARM_FEATURE_SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE2) #include "src/core/NEON/SVEMath.h" #include @@ -258,5 +258,5 @@ inline svuint16x2_t svquantize_qasymm16_z(svbool_t pg, const svfloat32x4_t qv, c } } // namespace arm_compute #include "src/core/NEON/SVEAsymm.inl" -#endif /* defined(__ARM_FEATURE_SVE2) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ #endif // ARM_COMPUTE_NEASYMM_H diff --git a/src/core/NEON/SVEAsymm.inl b/src/core/NEON/SVEAsymm.inl index edf5733c36..e85cacd721 100644 --- a/src/core/NEON/SVEAsymm.inl +++ b/src/core/NEON/SVEAsymm.inl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 Arm Limited. + * Copyright (c) 2020-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,7 +23,7 @@ */ namespace arm_compute { -#if defined(__ARM_FEATURE_SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE2) inline svuint8_t svmla_qasymm8_z(svbool_t pg, svuint8_t vd, svfloat32_t vs, svfloat32_t vo) { // Convert uint8 vectors to uint16 vectors @@ -101,5 +101,5 @@ inline svint8_t svmla_qasymm8_signed_z(svbool_t pg, svint8_t vd, svfloat32_t vs, const auto res = svqxtnt_s16(svqxtnb_s16(vd_low_s16), vd_high_s16); return res; } -#endif /* (__ARM_FEATURE_SVE2) */ +#endif /* (ARM_COMPUTE_ENABLE_SVE2) */ } // namespace arm_compute diff --git a/src/core/NEON/SVEMath.h b/src/core/NEON/SVEMath.h index dde75e8088..5ada7ae0ff 100644 --- a/src/core/NEON/SVEMath.h +++ b/src/core/NEON/SVEMath.h @@ -24,7 +24,7 @@ #ifndef ARM_COMPUTE_SVEMATH_H #define ARM_COMPUTE_SVEMATH_H -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #include "src/core/NEON/wrapper/intrinsics/svcvt.h" #include "src/core/NEON/wrapper/intrinsics/svdup_n.h" #include "src/core/NEON/wrapper/intrinsics/svreinterpret.h" @@ -185,5 +185,5 @@ int_vec_type convert_float_to_int(const svfloat32_t &in_0, const svfloat32_t &in } // namespace arm_compute #include "src/core/NEON/SVEMath.inl" -#endif /* defined(ENABLE_SVE) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ #endif /* ARM_COMPUTE_SVEMATH_H */ \ No newline at end of file diff --git a/src/core/NEON/SVEMath.inl b/src/core/NEON/SVEMath.inl index 7625e5be34..5ebef5ad6a 100644 --- a/src/core/NEON/SVEMath.inl +++ b/src/core/NEON/SVEMath.inl @@ -24,7 +24,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE) && defined(ENABLE_SVE) +#if defined(__ARM_FEATURE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE) #ifndef M_PI #define M_PI (3.14159265358979323846) @@ -117,22 +117,22 @@ inline svfloat32_t svexp_f32_z(svbool_t pg, svfloat32_t x) inline svfloat16_t svexp_f16_z(svbool_t pg, svfloat16_t x) { auto bottom = svcvt_f32_z(pg, x); -#if defined(__ARM_FEATURE_SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE2) auto top = svcvtlt_f32_x(pg, x); auto pg_top = pg; -#else /* defined(__ARM_FEATURE_SVE2) */ +#else /* defined(ARM_COMPUTE_ENABLE_SVE2) */ auto pg_top = svptrue_b16(); auto top = svcvt_f32_z(pg_top, svreinterpret_f16(svrevh_z(svptrue_b16(), svreinterpret_u32(x)))); -#endif /* defined(__ARM_FEATURE_SVE2) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ bottom = svexp_f32_z(pg, bottom); top = svexp_f32_z(pg_top, top); -#if defined(__ARM_FEATURE_SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE2) return svcvtnt_f16_m(svcvt_f16_z(pg, bottom), pg_top, top); -#else /* defined(__ARM_FEATURE_SVE2) */ +#else /* defined(ARM_COMPUTE_ENABLE_SVE2) */ return svtrn1(svcvt_f16_z(pg, bottom), svcvt_f16_z(pg_top, top)); -#endif /* defined(__ARM_FEATURE_SVE2) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ } inline svfloat32_t svtanh_f32_z(svbool_t pg, svfloat32_t val) @@ -196,22 +196,22 @@ inline svfloat32_t svlog_f32_z(svbool_t pg, svfloat32_t x) inline svfloat16_t svlog_f16_z(svbool_t pg, svfloat16_t x) { auto bottom = svcvt_f32_z(pg, x); -#if defined(__ARM_FEATURE_SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE2) auto top = svcvtlt_f32_x(pg, x); auto pg_top = pg; -#else /* defined(__ARM_FEATURE_SVE2) */ +#else /* defined(ARM_COMPUTE_ENABLE_SVE2) */ auto pg_top = svptrue_b16(); auto top = svcvt_f32_z(pg_top, svreinterpret_f16(svrevh_z(svptrue_b16(), svreinterpret_u32(x)))); -#endif /* defined(__ARM_FEATURE_SVE2) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ bottom = svlog_f32_z(pg, bottom); top = svlog_f32_z(pg_top, top); -#if defined(__ARM_FEATURE_SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE2) return svcvtnt_f16_m(svcvt_f16_z(pg, bottom), pg_top, top); -#else /* defined(__ARM_FEATURE_SVE2) */ +#else /* defined(ARM_COMPUTE_ENABLE_SVE2) */ return svtrn1(svcvt_f16_z(pg, bottom), svcvt_f16_z(pg_top, top)); -#endif /* defined(__ARM_FEATURE_SVE2) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ } inline svfloat32_t svsin_f32_z(svbool_t pg, svfloat32_t val) @@ -269,22 +269,22 @@ inline svfloat32_t svsin_f32_z(svbool_t pg, svfloat32_t val) inline svfloat16_t svsin_f16_z(svbool_t pg, svfloat16_t val) { auto bottom = svcvt_f32_z(pg, val); -#if defined(__ARM_FEATURE_SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE2) auto top = svcvtlt_f32_x(pg, val); auto pg_top = pg; -#else /* defined(__ARM_FEATURE_SVE2) */ +#else /* defined(ARM_COMPUTE_ENABLE_SVE2) */ auto pg_top = svptrue_b16(); auto top = svcvt_f32_z(pg_top, svreinterpret_f16(svrevh_z(svptrue_b16(), svreinterpret_u32(val)))); -#endif /* defined(__ARM_FEATURE_SVE2) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ bottom = svsin_f32_z(pg, bottom); top = svsin_f32_z(pg_top, top); -#if defined(__ARM_FEATURE_SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE2) return svcvtnt_f16_m(svcvt_f16_z(pg, bottom), pg_top, top); -#else /* defined(__ARM_FEATURE_SVE2) */ +#else /* defined(ARM_COMPUTE_ENABLE_SVE2) */ return svtrn1(svcvt_f16_z(pg, bottom), svcvt_f16_z(pg_top, top)); -#endif /* defined(__ARM_FEATURE_SVE2) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ } inline svfloat32_t svpow_f32_z(svbool_t pg, svfloat32_t a, svfloat32_t b) @@ -297,27 +297,27 @@ inline svfloat16_t svpow_f16_z(svbool_t pg, svfloat16_t a, svfloat16_t b) auto a_bottom = svcvt_f32_z(pg, a); auto b_bottom = svcvt_f32_z(pg, b); -#if defined(__ARM_FEATURE_SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE2) auto pg_top = pg; auto a_top = svcvtlt_f32_x(pg, a); auto b_top = svcvtlt_f32_x(pg, b); -#else /* defined(__ARM_FEATURE_SVE2) */ +#else /* defined(ARM_COMPUTE_ENABLE_SVE2) */ auto pg_top = svptrue_b16(); auto a_top = svcvt_f32_z(pg_top, svreinterpret_f16(svrevh_z(svptrue_b16(), svreinterpret_u32(a)))); auto b_top = svcvt_f32_z(pg_top, svreinterpret_f16(svrevh_z(svptrue_b16(), svreinterpret_u32(b)))); -#endif /* defined(__ARM_FEATURE_SVE2) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ auto res_bottom = svpow_f32_z(pg, a_bottom, b_bottom); auto res_top = svpow_f32_z(pg_top, a_top, b_top); -#if defined(__ARM_FEATURE_SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE2) return svcvtnt_f16_m(svcvt_f16_z(pg, res_bottom), pg_top, res_top); -#else /* defined(__ARM_FEATURE_SVE2) */ +#else /* defined(ARM_COMPUTE_ENABLE_SVE2) */ return svtrn1(svcvt_f16_z(pg, res_bottom), svcvt_f16_z(pg_top, res_top)); -#endif /* defined(__ARM_FEATURE_SVE2) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ } -#if defined(__ARM_FEATURE_SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE2) template <> inline svuint8_t convert_float_to_int(const svfloat32_t &in_0, const svfloat32_t &in_1, const svfloat32_t &in_2, const svfloat32_t &in_3) { @@ -385,7 +385,7 @@ inline svint8_t convert_float_to_int(const svfloat32_t &in_0, const sv return out; } -#endif /* defined(__ARM_FEATURE_SVE2) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ } // namespace arm_compute -#endif /* defined(ENABLE_SVE) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ diff --git a/src/core/NEON/SVESymm.h b/src/core/NEON/SVESymm.h index 30e1e172a3..c71d273b67 100644 --- a/src/core/NEON/SVESymm.h +++ b/src/core/NEON/SVESymm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 Arm Limited. + * Copyright (c) 2020-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,7 +26,7 @@ #include "arm_compute/core/utils/quantization/AsymmHelpers.h" -#if defined(__ARM_FEATURE_SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE2) #include "src/core/NEON/SVEMath.h" #include @@ -123,5 +123,5 @@ inline svint16x2_t svquantize_qsymm16_z(svbool_t pg, const svfloat32x4_t qv, con } } // namespace arm_compute -#endif /* defined(__ARM_FEATURE_SVE2) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ #endif // ARM_COMPUTE_NESYMM_H \ No newline at end of file diff --git a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp index 92000bb2f6..46551553c9 100644 --- a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp @@ -48,7 +48,8 @@ namespace { struct BatchNormalizationSelectorData { - DataType dt; + DataType dt; + const CPUInfo &ci; }; using BatchNormalizationSelectorPtr = std::add_pointer::type; using BatchNormalizationKernelPtr = std::add_pointerdata_type() }); + const auto *uk = get_implementation(BatchNormalizationSelectorData{ input->data_type(), CPUInfo::get() }); ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr); if(act_info.enabled()) @@ -387,7 +388,7 @@ void NEBatchNormalizationLayerKernel::run(const Window &window, const ThreadInfo } else { - const auto *uk = get_implementation(BatchNormalizationSelectorData{ _input->info()->data_type() }); + const auto *uk = get_implementation(BatchNormalizationSelectorData{ _input->info()->data_type(), CPUInfo::get() }); uk->ukernel(_input, _output, _mean, _var, _beta, _gamma, _epsilon, _act_info, window); } } diff --git a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp16.cpp b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp16.cpp index fdb36fc1d1..6ba7c78e97 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp16.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp16.cpp @@ -33,13 +33,13 @@ #include "depthwise_implementation_constraints.hpp" #if defined(__aarch64__) -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #include "kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp" #include "kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp" #include "kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp" #include "kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp" #include "kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp" -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) #include "kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp" #include "kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp" #include "kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp" @@ -83,12 +83,13 @@ namespace static const DepthwiseImplementation<__fp16, __fp16> depthwise_fp16_methods[] = { #if defined(__aarch64__) -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) { DepthwiseMethod::DEPTHFIRST, "sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst", constraint(is_supported, - has_no_channel_multiplier), + has_no_channel_multiplier, + cpu_has_sve), cycle_estimate, [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * { return new DepthwiseDepthfirst(args); @@ -98,7 +99,8 @@ static const DepthwiseImplementation<__fp16, __fp16> depthwise_fp16_methods[] = DepthwiseMethod::DEPTHFIRST, "sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst", constraint(is_supported, - has_no_channel_multiplier), + has_no_channel_multiplier, + cpu_has_sve), cycle_estimate, [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * { return new DepthwiseDepthfirst(args); @@ -108,7 +110,8 @@ static const DepthwiseImplementation<__fp16, __fp16> depthwise_fp16_methods[] = DepthwiseMethod::DEPTHFIRST, "sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst", constraint(is_supported, - has_no_channel_multiplier), + has_no_channel_multiplier, + cpu_has_sve), cycle_estimate, [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * { return new DepthwiseDepthfirst(args); @@ -118,7 +121,8 @@ static const DepthwiseImplementation<__fp16, __fp16> depthwise_fp16_methods[] = DepthwiseMethod::DEPTHFIRST, "sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst", constraint(is_supported, - has_no_channel_multiplier), + has_no_channel_multiplier, + cpu_has_sve), cycle_estimate, [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * { return new DepthwiseDepthfirst(args); @@ -128,13 +132,14 @@ static const DepthwiseImplementation<__fp16, __fp16> depthwise_fp16_methods[] = DepthwiseMethod::DEPTHFIRST, "sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst", constraint(is_supported, - has_no_channel_multiplier), + has_no_channel_multiplier, + cpu_has_sve), cycle_estimate, [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * { return new DepthwiseDepthfirst(args); }, }, -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) { DepthwiseMethod::DEPTHFIRST, diff --git a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp index aea750a475..ac43df979c 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp @@ -33,7 +33,7 @@ #include "depthwise_implementation_constraints.hpp" #if defined(__aarch64__) -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #include "kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp" #include "kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp" #include "kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp" @@ -43,7 +43,7 @@ #include "kernels/sve_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst.hpp" #include "kernels/sve_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst.hpp" #include "kernels/sve_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst.hpp" -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) #include "kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp" #include "kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp" #include "kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp" @@ -85,12 +85,13 @@ namespace static const DepthwiseImplementation depthwise_fp32_methods[] = { #if defined(__aarch64__) -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) { DepthwiseMethod::DEPTHFIRST, "sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst", constraint(is_supported, - has_no_channel_multiplier), + has_no_channel_multiplier, + cpu_has_sve), cycle_estimate, [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon * { return new DepthwiseDepthfirst(args); @@ -100,7 +101,8 @@ static const DepthwiseImplementation depthwise_fp32_methods[] = { DepthwiseMethod::DEPTHFIRST, "sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst", constraint(is_supported, - has_no_channel_multiplier), + has_no_channel_multiplier, + cpu_has_sve), cycle_estimate, [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon * { return new DepthwiseDepthfirst(args); @@ -110,7 +112,8 @@ static const DepthwiseImplementation depthwise_fp32_methods[] = { DepthwiseMethod::DEPTHFIRST, "sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst", constraint(is_supported, - has_no_channel_multiplier), + has_no_channel_multiplier, + cpu_has_sve), cycle_estimate, [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon * { return new DepthwiseDepthfirst(args); @@ -120,7 +123,8 @@ static const DepthwiseImplementation depthwise_fp32_methods[] = { DepthwiseMethod::DEPTHFIRST, "sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst", constraint(is_supported, - has_no_channel_multiplier), + has_no_channel_multiplier, + cpu_has_sve), cycle_estimate, [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon * { return new DepthwiseDepthfirst(args); @@ -130,7 +134,8 @@ static const DepthwiseImplementation depthwise_fp32_methods[] = { DepthwiseMethod::DEPTHFIRST, "sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst", constraint(is_supported, - has_no_channel_multiplier), + has_no_channel_multiplier, + cpu_has_sve), cycle_estimate, [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon * { return new DepthwiseDepthfirst(args); @@ -139,7 +144,7 @@ static const DepthwiseImplementation depthwise_fp32_methods[] = { { DepthwiseMethod::DEPTHFIRST, "sve_fp32_nhwc_generic_output3x3_mla_depthfirst", - constraint(has_no_channel_multiplier), + constraint(has_no_channel_multiplier, cpu_has_sve), not_preferred, [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon * { return new DepthwiseDepthfirstGeneric(args); @@ -148,7 +153,7 @@ static const DepthwiseImplementation depthwise_fp32_methods[] = { { DepthwiseMethod::DEPTHFIRST, "sve_fp32_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst", - constraint(is_supported), + constraint(is_supported, cpu_has_sve), not_preferred_if_no_multiplier, [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon * { return new DepthwiseDepthfirstWithMultiplier(args); @@ -157,7 +162,7 @@ static const DepthwiseImplementation depthwise_fp32_methods[] = { { DepthwiseMethod::DEPTHFIRST, "sve_fp32_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst", - constraint(is_supported), + constraint(is_supported, cpu_has_sve), not_preferred_if_no_multiplier, [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon * { return new DepthwiseDepthfirstWithMultiplier(args); @@ -166,13 +171,13 @@ static const DepthwiseImplementation depthwise_fp32_methods[] = { { DepthwiseMethod::DEPTHFIRST, "sve_fp32_nhwc_generic_with_multiplier_output2x8_mla_depthfirst", - nullptr, + constraint(cpu_has_sve), not_preferred_if_no_multiplier, [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon * { return new DepthwiseDepthfirstGenericWithMultiplier(args); }, }, -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) { DepthwiseMethod::DEPTHFIRST, "a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst", diff --git a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_implementation_constraints.hpp b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_implementation_constraints.hpp index b4814bef92..6526d001b3 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_implementation_constraints.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_implementation_constraints.hpp @@ -85,6 +85,18 @@ bool cpu_has_dot_product(const DepthwiseArgs &args, const void *) return args.cpu_info->has_dotprod(); } +bool cpu_has_sve(const DepthwiseArgs &args, const void *) __attribute__ ((unused)); +bool cpu_has_sve(const DepthwiseArgs &args, const void *) +{ + return args.cpu_info->has_sve(); +} + +bool cpu_has_sve2(const DepthwiseArgs &args, const void *) __attribute__ ((unused)); +bool cpu_has_sve2(const DepthwiseArgs &args, const void *) +{ + return args.cpu_info->has_sve2(); +} + bool has_no_channel_multiplier(const DepthwiseArgs &args, const void *) __attribute__ ((unused)); bool has_no_channel_multiplier(const DepthwiseArgs &args, const void *) { diff --git a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp index 40370fe59e..f38912d257 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp @@ -33,7 +33,7 @@ #include "depthwise_implementation_constraints.hpp" #if defined(__aarch64__) -#if defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) #include "kernels/sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp" #include "kernels/sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp" #include "kernels/sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp" @@ -41,7 +41,7 @@ #include "kernels/sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp" #include "kernels/sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst.hpp" #include "kernels/sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst.hpp" -#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) #include "kernels/a64_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp" #include "kernels/a64_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp" #include "kernels/a64_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp" @@ -73,14 +73,15 @@ bool qp_weights_are_symmetric(const DepthwiseArgs &, const void *_qp) static const DepthwiseImplementation depthwise_s8q_methods[] = { #if defined(__aarch64__) -#if defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) { DepthwiseMethod::DEPTHFIRST, "sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst", constraint(is_supported, has_no_channel_multiplier, qp_has_no_left_shift, - qp_weights_are_symmetric), + qp_weights_are_symmetric, + cpu_has_sve2), nullptr, [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon * { return new DepthwiseDepthfirstQuantized(args, qp); @@ -91,7 +92,8 @@ static const DepthwiseImplementation depth "sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst", constraint(is_supported, has_no_channel_multiplier, - qp_has_no_left_shift), + qp_has_no_left_shift, + cpu_has_sve2), nullptr, [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon * { return new DepthwiseDepthfirstQuantized(args, qp); @@ -102,7 +104,8 @@ static const DepthwiseImplementation depth "sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst", constraint(is_supported, has_no_channel_multiplier, - qp_has_no_left_shift), + qp_has_no_left_shift, + cpu_has_sve2), nullptr, [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon * { return new DepthwiseDepthfirstQuantized(args, qp); @@ -113,7 +116,8 @@ static const DepthwiseImplementation depth "sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst", constraint(is_supported, has_no_channel_multiplier, - qp_has_no_left_shift), + qp_has_no_left_shift, + cpu_has_sve2), nullptr, [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon * { return new DepthwiseDepthfirstQuantized(args, qp); @@ -124,7 +128,8 @@ static const DepthwiseImplementation depth "sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst", constraint(is_supported, has_no_channel_multiplier, - qp_has_no_left_shift), + qp_has_no_left_shift, + cpu_has_sve2), nullptr, [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon * { return new DepthwiseDepthfirstQuantized(args, qp); @@ -134,7 +139,8 @@ static const DepthwiseImplementation depth DepthwiseMethod::DEPTHFIRST, "sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst", constraint(is_supported, - qp_has_no_left_shift), + qp_has_no_left_shift, + cpu_has_sve2), nullptr, [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon * { return new DepthwiseDepthfirstWithMultiplierQuantized(args, qp); @@ -144,13 +150,14 @@ static const DepthwiseImplementation depth DepthwiseMethod::DEPTHFIRST, "sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst", constraint(is_supported, - qp_has_no_left_shift), + qp_has_no_left_shift, + cpu_has_sve2), nullptr, [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon * { return new DepthwiseDepthfirstWithMultiplierQuantized(args, qp); }, }, -#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) { DepthwiseMethod::DEPTHFIRST, "a64_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst", diff --git a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8q.cpp b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8q.cpp index 3e190d242a..67713c5bcc 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8q.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8q.cpp @@ -33,14 +33,14 @@ #include "depthwise_implementation_constraints.hpp" #if defined(__aarch64__) -#if defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) #include "kernels/sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp" #include "kernels/sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp" #include "kernels/sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp" #include "kernels/sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp" #include "kernels/sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst.hpp" #include "kernels/sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst.hpp" -#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) #include "kernels/a64_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp" #include "kernels/a64_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp" #include "kernels/a64_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp" @@ -60,13 +60,14 @@ namespace depthwise { static const DepthwiseImplementation depthwise_u8q_methods[] = { #if defined(__aarch64__) -#if defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) { DepthwiseMethod::DEPTHFIRST, "sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst", constraint(is_supported, has_no_channel_multiplier, - qp_has_no_left_shift), + qp_has_no_left_shift, + cpu_has_sve2), nullptr, [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon * { return new DepthwiseDepthfirstQuantized(args, qp); @@ -77,7 +78,8 @@ static const DepthwiseImplementation de "sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst", constraint(is_supported, has_no_channel_multiplier, - qp_has_no_left_shift), + qp_has_no_left_shift, + cpu_has_sve2), nullptr, [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon * { return new DepthwiseDepthfirstQuantized(args, qp); @@ -88,7 +90,8 @@ static const DepthwiseImplementation de "sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst", constraint(is_supported, has_no_channel_multiplier, - qp_has_no_left_shift), + qp_has_no_left_shift, + cpu_has_sve2), nullptr, [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon * { return new DepthwiseDepthfirstQuantized(args, qp); @@ -99,7 +102,8 @@ static const DepthwiseImplementation de "sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst", constraint(is_supported, has_no_channel_multiplier, - qp_has_no_left_shift), + qp_has_no_left_shift, + cpu_has_sve2), nullptr, [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon * { return new DepthwiseDepthfirstQuantized(args, qp); @@ -109,7 +113,8 @@ static const DepthwiseImplementation de DepthwiseMethod::DEPTHFIRST, "sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst", constraint(is_supported, - qp_has_no_left_shift), + qp_has_no_left_shift, + cpu_has_sve2), nullptr, [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon * { return new DepthwiseDepthfirstWithMultiplierQuantized(args, qp); @@ -119,13 +124,14 @@ static const DepthwiseImplementation de DepthwiseMethod::DEPTHFIRST, "sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst", constraint(is_supported, - qp_has_no_left_shift), + qp_has_no_left_shift, + cpu_has_sve2), nullptr, [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon * { return new DepthwiseDepthfirstWithMultiplierQuantized(args, qp); }, }, -#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) { DepthwiseMethod::DEPTHFIRST, "a64_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst", diff --git a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8s8u8q.cpp b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8s8u8q.cpp index 537a7c5e01..af4426b69f 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8s8u8q.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8s8u8q.cpp @@ -33,11 +33,11 @@ #include "depthwise_implementation_constraints.hpp" #if defined(__aarch64__) -#if defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) #include "kernels/sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp" #include "kernels/sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp" #include "kernels/sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp" -#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) #include "kernels/a64_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp" #include "kernels/a64_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp" #include "kernels/a64_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp" @@ -54,13 +54,14 @@ namespace depthwise { static const DepthwiseImplementation depthwise_u8q_methods[] = { #if defined(__aarch64__) -#if defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) { DepthwiseMethod::DEPTHFIRST, "sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst", constraint(is_supported, has_no_channel_multiplier, - qp_has_no_left_shift), + qp_has_no_left_shift, + cpu_has_sve2), nullptr, [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon * { return new DepthwiseDepthfirstQuantized(args, qp); @@ -71,7 +72,8 @@ static const DepthwiseImplementation dep "sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst", constraint(is_supported, has_no_channel_multiplier, - qp_has_no_left_shift), + qp_has_no_left_shift, + cpu_has_sve2), nullptr, [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon * { return new DepthwiseDepthfirstQuantized(args, qp); @@ -82,13 +84,14 @@ static const DepthwiseImplementation dep "sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst", constraint(is_supported, has_no_channel_multiplier, - qp_has_no_left_shift), + qp_has_no_left_shift, + cpu_has_sve2), nullptr, [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon * { return new DepthwiseDepthfirstQuantized(args, qp); }, }, -#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) { DepthwiseMethod::DEPTHFIRST, "a64_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst", diff --git a/src/core/NEON/kernels/arm_conv/depthwise/interleaves/8b_mla.cpp b/src/core/NEON/kernels/arm_conv/depthwise/interleaves/8b_mla.cpp index 6c5ef23684..04b904275c 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/interleaves/8b_mla.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/interleaves/8b_mla.cpp @@ -110,14 +110,14 @@ void interleave_ ## ARCH ## _ ## TYPENAME ## _ ## KERN_ROWS ## x ## KERN_COLS # namespace arm_conv { namespace depthwise { -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) ADD_IMPLEMENTATION(sve, s8q, int8_t, SVE, 2, 3, 3) ADD_IMPLEMENTATION(sve, s8q, int8_t, SVE, 2, 5, 5) ADD_IMPLEMENTATION(sve, u8q, uint8_t, SVE, 2, 3, 3) ADD_IMPLEMENTATION(sve, u8q, uint8_t, SVE, 2, 5, 5) -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) ADD_IMPLEMENTATION(a64, s8q, int8_t, None, 2, 3, 3) ADD_IMPLEMENTATION(a64, s8q, int8_t, None, 2, 5, 5) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/interleaves/list.hpp b/src/core/NEON/kernels/arm_conv/depthwise/interleaves/list.hpp index 41f0495acf..cb49a243af 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/interleaves/list.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/interleaves/list.hpp @@ -27,7 +27,7 @@ namespace arm_conv { namespace depthwise { -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) class interleave_sve_u8q_3x3_dot { @@ -71,7 +71,7 @@ class interleave_sve_s8q_5x5_mla static size_t get_packed_size(const DepthwiseArgs &); }; -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) class interleave_a64_u8q_3x3_dot { diff --git a/src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_s8q_3x3_dot.cpp b/src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_s8q_3x3_dot.cpp index ea0c35b7ce..dfb6457ed9 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_s8q_3x3_dot.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_s8q_3x3_dot.cpp @@ -22,7 +22,7 @@ * SOFTWARE. */ -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #include "arm_gemm.hpp" #include "src/core/NEON/kernels/arm_gemm/utils.hpp" @@ -133,4 +133,4 @@ void interleave_sve_s8q_3x3_dot::pack_parameters(unsigned int n_channels, void * } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_u8q_3x3_dot.cpp b/src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_u8q_3x3_dot.cpp index edd32a43f5..6c16bdc2fb 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_u8q_3x3_dot.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_u8q_3x3_dot.cpp @@ -22,7 +22,7 @@ * SOFTWARE. */ -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #include "arm_gemm.hpp" #include "src/core/NEON/kernels/arm_gemm/utils.hpp" @@ -133,4 +133,4 @@ void interleave_sve_u8q_3x3_dot::pack_parameters(unsigned int n_channels, void * } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp index c444472c68..b8e59306d5 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp @@ -28,7 +28,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) namespace arm_conv { namespace depthwise { @@ -69,4 +69,4 @@ struct sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp index b788c705e5..a4c1a40100 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp @@ -25,7 +25,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) namespace arm_conv { namespace depthwise { @@ -321,4 +321,4 @@ void sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst_direct_impl( } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp index d8f905b33a..a845e7c0c6 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp @@ -25,7 +25,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) namespace arm_conv { namespace depthwise { @@ -281,4 +281,4 @@ void sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst_indirect_impl( } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp index f5d31e63f8..e1f23aae66 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp @@ -28,7 +28,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) namespace arm_conv { namespace depthwise { @@ -69,4 +69,4 @@ struct sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp index aebf0bf7ac..0708f578a8 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp @@ -25,7 +25,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) namespace arm_conv { namespace depthwise { @@ -475,4 +475,4 @@ void sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst_direct_impl( } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp index 65ecb6d218..770576c5da 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp @@ -25,7 +25,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) namespace arm_conv { namespace depthwise { @@ -492,4 +492,4 @@ void sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst_indirect_impl( } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp index f976842b7a..7d035f0571 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp @@ -28,7 +28,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) namespace arm_conv { namespace depthwise { @@ -69,4 +69,4 @@ struct sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp index 8f0fce7e96..93e1908df7 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp @@ -25,7 +25,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) namespace arm_conv { namespace depthwise { @@ -685,4 +685,4 @@ void sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst_direct_impl( } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp index 8148353f1a..8eaf0a46d6 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp @@ -25,7 +25,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) namespace arm_conv { namespace depthwise { @@ -743,4 +743,4 @@ void sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst_indirect_impl( } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp index 98f50f8436..a6a4afb3b5 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp @@ -28,7 +28,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) namespace arm_conv { namespace depthwise { @@ -69,4 +69,4 @@ struct sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp index e620604a16..2238bf08cd 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp @@ -25,7 +25,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) namespace arm_conv { namespace depthwise { @@ -342,4 +342,4 @@ void sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst_direct_impl( } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp index 3ed743e3ed..0d5d4176aa 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp @@ -25,7 +25,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) namespace arm_conv { namespace depthwise { @@ -342,4 +342,4 @@ void sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst_indirect_impl( } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp index 20f3ee0329..236f9bf43a 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp @@ -28,7 +28,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) namespace arm_conv { namespace depthwise { @@ -69,4 +69,4 @@ struct sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp index f1ee5c53ce..6b1564e6c9 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp @@ -25,7 +25,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) namespace arm_conv { namespace depthwise { @@ -528,4 +528,4 @@ void sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst_direct_impl( } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp index caa15a9816..be128b4aff 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp @@ -25,7 +25,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) namespace arm_conv { namespace depthwise { @@ -556,4 +556,4 @@ void sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst_indirect_impl( } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp index 74716ddf1f..05e82d4e76 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp @@ -28,7 +28,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace depthwise { @@ -69,4 +69,4 @@ struct sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp index d443855758..eddcffc196 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp @@ -25,7 +25,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace depthwise { @@ -252,4 +252,4 @@ void sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst_impl( } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp index d899255e84..eb632eb4fe 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp @@ -25,7 +25,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace depthwise { @@ -361,4 +361,4 @@ void sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst_direct_impl( } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp index e8a1539437..fb41ca0754 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp @@ -25,7 +25,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace depthwise { @@ -315,4 +315,4 @@ void sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst_indirect_impl( } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst_strided.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst_strided.hpp index 173fc631d8..65cb735bde 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst_strided.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst_strided.hpp @@ -26,7 +26,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace depthwise { @@ -63,4 +63,4 @@ struct sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst_strided } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst_strided/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst_strided/generic.cpp index cecc192c49..97c4d88119 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst_strided/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst_strided/generic.cpp @@ -25,7 +25,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace depthwise { @@ -244,4 +244,4 @@ void sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst_strided_impl( } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp index 5ec78aa05f..ef5f4187f9 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp @@ -28,7 +28,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace depthwise { @@ -69,4 +69,4 @@ struct sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp index 4d0bd311cc..6bc333be41 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp @@ -25,7 +25,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace depthwise { @@ -535,4 +535,4 @@ void sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst_direct_impl( } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp index 7c6fb306b7..3877ae2f03 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp @@ -25,7 +25,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace depthwise { @@ -544,4 +544,4 @@ void sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst_indirect_impl( } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp index a9823e3917..fc9588cd58 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp @@ -28,7 +28,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace depthwise { @@ -69,4 +69,4 @@ struct sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp index 4c24ad9c15..7df8e481c0 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp @@ -25,7 +25,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace depthwise { @@ -685,4 +685,4 @@ void sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst_direct_impl( } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp index ac0c4ec4e3..22e12a7b9a 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp @@ -25,7 +25,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace depthwise { @@ -817,4 +817,4 @@ void sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst_indirect_impl( } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp index f5b6a4f8ff..2119c06965 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp @@ -28,7 +28,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace depthwise { @@ -69,4 +69,4 @@ struct sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp index ad53872630..78e67e1be1 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp @@ -25,7 +25,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace depthwise { @@ -402,4 +402,4 @@ void sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst_direct_impl( } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp index 06b3575d4b..8555cfea7c 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp @@ -25,7 +25,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace depthwise { @@ -394,4 +394,4 @@ void sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst_indirect_impl( } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp index d49f7fdceb..6f1f187818 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp @@ -28,7 +28,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace depthwise { @@ -69,4 +69,4 @@ struct sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp index f751186dce..edafe82770 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp @@ -25,7 +25,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace depthwise { @@ -528,4 +528,4 @@ void sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst_direct_impl( } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp index 6e35ee86c5..1bfe7eb09c 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp @@ -25,7 +25,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace depthwise { @@ -630,4 +630,4 @@ void sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst_indirect_impl( } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_generic_output9_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_generic_output9_mla_depthfirst.hpp index dd2c519e3a..bd071d370c 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_generic_output9_mla_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_generic_output9_mla_depthfirst.hpp @@ -28,7 +28,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace depthwise { @@ -56,4 +56,4 @@ struct sve_fp32_nhwc_generic_output9_mla_depthfirst } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp index 370218e1d4..eac77516c2 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp @@ -25,7 +25,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace depthwise { @@ -163,4 +163,4 @@ void sve_fp32_nhwc_generic_output9_mla_depthfirst_impl( } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst.hpp index 5cf3314c65..563f0fc59f 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst.hpp @@ -28,7 +28,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace depthwise { @@ -67,4 +67,4 @@ struct sve_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp index ce640a207d..395b112460 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp @@ -25,7 +25,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace depthwise { @@ -256,4 +256,4 @@ void sve_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst_imp } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst.hpp index 3c2f77156d..e9378c2a12 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst.hpp @@ -28,7 +28,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace depthwise { @@ -67,4 +67,4 @@ struct sve_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp index 453b00c0db..e7193d625f 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp @@ -25,7 +25,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace depthwise { @@ -389,4 +389,4 @@ void sve_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst_imp } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst.hpp index 7a4bd1dd1e..6849e562bc 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst.hpp @@ -28,7 +28,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace depthwise { @@ -59,4 +59,4 @@ struct sve_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp index 0124370067..b23cec8593 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp @@ -25,7 +25,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace depthwise { @@ -451,4 +451,4 @@ void sve_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_im } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp index 295e1f6450..39974fde88 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp @@ -29,7 +29,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace depthwise { @@ -72,4 +72,4 @@ struct sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp index 90f924a8ed..8e9e5f4aeb 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp @@ -22,7 +22,7 @@ * SOFTWARE. */ -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #include "arm_gemm.hpp" #include @@ -454,4 +454,4 @@ void sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst_impl(const int8_t *const *cons } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp index 7dd241a8cf..f788829572 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp @@ -29,7 +29,7 @@ #pragma once -#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) namespace arm_conv { namespace depthwise { @@ -72,4 +72,4 @@ struct sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst } // namespace depthwise } // namespace arm_conv -#endif // defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp index 8bf5badfaf..87387960f1 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp @@ -27,7 +27,7 @@ #include #include -#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) namespace arm_conv { namespace depthwise { @@ -415,4 +415,4 @@ void sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst_impl( } // namespace depthwise } // namespace arm_conv -#endif // defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp index 89507ef9ea..5c2b4f6f53 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp @@ -29,7 +29,7 @@ #pragma once -#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) namespace arm_conv { namespace depthwise { @@ -72,4 +72,4 @@ struct sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst } // namespace depthwise } // namespace arm_conv -#endif // defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp index b773ca1fe6..b4a1026aaa 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp @@ -27,7 +27,7 @@ #include #include -#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) namespace arm_conv { namespace depthwise { @@ -456,4 +456,4 @@ void sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst_impl( } // namespace depthwise } // namespace arm_conv -#endif // defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp index 54ac1c2e0b..948c5ad2e7 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp @@ -29,7 +29,7 @@ #pragma once -#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) namespace arm_conv { namespace depthwise { @@ -72,4 +72,4 @@ struct sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst } // namespace depthwise } // namespace arm_conv -#endif // defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp index c02bb584e5..565c145f92 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp @@ -27,7 +27,7 @@ #include #include -#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) namespace arm_conv { namespace depthwise { @@ -657,4 +657,4 @@ void sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst_impl( } // namespace depthwise } // namespace arm_conv -#endif // defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst.hpp index 7ab83e8659..176c4f878e 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst.hpp @@ -28,7 +28,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace depthwise { @@ -67,4 +67,4 @@ struct sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp index f531912e72..ea7acf5b6e 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp @@ -27,7 +27,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace depthwise { @@ -350,4 +350,4 @@ void sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst_impl } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst.hpp index 2c33bdcd3a..10eee34d62 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst.hpp @@ -28,7 +28,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace depthwise { @@ -67,4 +67,4 @@ struct sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp index ffa2c6a7bc..6bc5935348 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp @@ -27,7 +27,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace depthwise { @@ -425,4 +425,4 @@ void sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst_impl } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp index 4098f6f660..b5c6e983ae 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp @@ -29,7 +29,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace depthwise { @@ -72,4 +72,4 @@ struct sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp index 3345449fe1..095c1de8f2 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp @@ -22,7 +22,7 @@ * SOFTWARE. */ -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #include "arm_gemm.hpp" #include @@ -385,4 +385,4 @@ void sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst_impl(const int8_t *const *con } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp index 72b26a50a0..a087e801dc 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp @@ -29,7 +29,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace depthwise { @@ -72,4 +72,4 @@ struct sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp index ca6af57171..0d4b9e6687 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp @@ -22,7 +22,7 @@ * SOFTWARE. */ -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #include "arm_gemm.hpp" #include @@ -454,4 +454,4 @@ void sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst_impl(const uint8_t *const *con } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp index 6174dd0e9f..c501c67a5b 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp @@ -29,7 +29,7 @@ #pragma once -#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) namespace arm_conv { namespace depthwise { @@ -72,4 +72,4 @@ struct sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst } // namespace depthwise } // namespace arm_conv -#endif // defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp index 2ec7f6e7ea..40220ad84e 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp @@ -27,7 +27,7 @@ #include #include -#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) namespace arm_conv { namespace depthwise { @@ -415,4 +415,4 @@ void sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst_impl( } // namespace depthwise } // namespace arm_conv -#endif // defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp index 1f470f78aa..981864270d 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp @@ -29,7 +29,7 @@ #pragma once -#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) namespace arm_conv { namespace depthwise { @@ -72,4 +72,4 @@ struct sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst } // namespace depthwise } // namespace arm_conv -#endif // defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp index bc8f0ac1d9..39ab3534f5 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp @@ -27,7 +27,7 @@ #include #include -#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) namespace arm_conv { namespace depthwise { @@ -456,4 +456,4 @@ void sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst_impl( } // namespace depthwise } // namespace arm_conv -#endif // defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp index f025b08a29..b1b16c55d3 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp @@ -29,7 +29,7 @@ #pragma once -#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) namespace arm_conv { namespace depthwise { @@ -72,4 +72,4 @@ struct sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst } // namespace depthwise } // namespace arm_conv -#endif // defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp index 95423186b8..7f4272672c 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp @@ -27,7 +27,7 @@ #include #include -#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) namespace arm_conv { namespace depthwise { @@ -657,4 +657,4 @@ void sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst_impl( } // namespace depthwise } // namespace arm_conv -#endif // defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst.hpp index 9226a96662..dbf70c3f8e 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst.hpp @@ -28,7 +28,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace depthwise { @@ -67,4 +67,4 @@ struct sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp index bb9931c20f..1c8b8f9d19 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp @@ -27,7 +27,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace depthwise { @@ -350,4 +350,4 @@ void sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst_impl } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst.hpp index 3023ed16e5..90fefdcda3 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst.hpp @@ -28,7 +28,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace depthwise { @@ -67,4 +67,4 @@ struct sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp index fc1e23e897..0085bbc6bc 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp @@ -27,7 +27,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace depthwise { @@ -425,4 +425,4 @@ void sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst_impl } // namespace depthwise } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp index 361f48bfbe..8ab2e5ba2a 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp @@ -29,7 +29,7 @@ #pragma once -#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) namespace arm_conv { namespace depthwise { @@ -72,4 +72,4 @@ struct sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst } // namespace depthwise } // namespace arm_conv -#endif // defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp index 4fc8999ea1..4b9be8f3e3 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp @@ -27,7 +27,7 @@ #include #include -#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) namespace arm_conv { namespace depthwise { @@ -415,4 +415,4 @@ void sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst_impl( } // namespace depthwise } // namespace arm_conv -#endif // defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp index dc33a3fe3f..f652e48e42 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp @@ -29,7 +29,7 @@ #pragma once -#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) namespace arm_conv { namespace depthwise { @@ -72,4 +72,4 @@ struct sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst } // namespace depthwise } // namespace arm_conv -#endif // defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp index 63960f08e1..400e62d248 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp @@ -27,7 +27,7 @@ #include #include -#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) namespace arm_conv { namespace depthwise { @@ -456,4 +456,4 @@ void sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst_impl( } // namespace depthwise } // namespace arm_conv -#endif // defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp index 906ef36c8f..f07ea13a03 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp @@ -29,7 +29,7 @@ #pragma once -#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) namespace arm_conv { namespace depthwise { @@ -72,4 +72,4 @@ struct sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst } // namespace depthwise } // namespace arm_conv -#endif // defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp index 6c321efa29..29582da0f6 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp @@ -27,7 +27,7 @@ #include #include -#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) namespace arm_conv { namespace depthwise { @@ -657,4 +657,4 @@ void sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst_impl( } // namespace depthwise } // namespace arm_conv -#endif // defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst.hpp index 8c7a497376..0167d78eb7 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst.hpp @@ -24,7 +24,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) namespace arm_conv { namespace pooling { @@ -57,4 +57,4 @@ struct sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst } // namespace pooling } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp index 3c1858633b..a1a530b94e 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp @@ -26,7 +26,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) namespace arm_conv { namespace pooling { @@ -204,4 +204,4 @@ void sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst_impl( } // namespace pooling } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_generic_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_generic_depthfirst.hpp index 391d47cf41..02f2ce87a9 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_generic_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_generic_depthfirst.hpp @@ -26,7 +26,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) namespace arm_conv { namespace pooling { @@ -51,4 +51,4 @@ struct sve_fp16_nhwc_avg_generic_depthfirst } // namespace pooling } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_generic_depthfirst/generic.cpp index 84a6acf80d..310df11e68 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_generic_depthfirst/generic.cpp @@ -24,7 +24,7 @@ #include -#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) namespace arm_conv { namespace pooling { @@ -228,4 +228,4 @@ void sve_fp16_nhwc_avg_generic_depthfirst_impl( } // namespace pooling } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst.hpp index 5fb297eb49..5e4327d6b7 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst.hpp @@ -24,7 +24,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) namespace arm_conv { namespace pooling { @@ -57,4 +57,4 @@ struct sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst } // namespace pooling } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp index f6e23215b8..9abd0f5c1c 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp @@ -26,7 +26,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) namespace arm_conv { namespace pooling { @@ -143,4 +143,4 @@ void sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst_impl( } // namespace pooling } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_generic_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_generic_depthfirst.hpp index 1c17c27619..44cdea31da 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_generic_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_generic_depthfirst.hpp @@ -26,7 +26,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) namespace arm_conv { namespace pooling { @@ -51,4 +51,4 @@ struct sve_fp16_nhwc_max_generic_depthfirst } // namespace pooling } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_generic_depthfirst/generic.cpp index 58ab915605..fae1f014e7 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_generic_depthfirst/generic.cpp @@ -24,7 +24,7 @@ #include -#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) namespace arm_conv { namespace pooling { @@ -220,4 +220,4 @@ void sve_fp16_nhwc_max_generic_depthfirst_impl( } // namespace pooling } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst.hpp index 9cbdb8a58d..55d2a47655 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst.hpp @@ -24,7 +24,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace pooling { @@ -57,4 +57,4 @@ struct sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst } // namespace pooling } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp index 50f5da4c3d..6cad63ee88 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp @@ -26,7 +26,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace pooling { @@ -204,4 +204,4 @@ void sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst_impl( } // namespace pooling } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_generic_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_generic_depthfirst.hpp index 0daa046a02..0fcdcb23dc 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_generic_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_generic_depthfirst.hpp @@ -26,7 +26,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace pooling { @@ -51,4 +51,4 @@ struct sve_fp32_nhwc_avg_generic_depthfirst } // namespace pooling } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_generic_depthfirst/generic.cpp index c2f5745adc..3e02570a4f 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_generic_depthfirst/generic.cpp @@ -24,7 +24,7 @@ #include -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace pooling { @@ -228,4 +228,4 @@ void sve_fp32_nhwc_avg_generic_depthfirst_impl( } // namespace pooling } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst.hpp index 086f49e957..b2c6912565 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst.hpp @@ -24,7 +24,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace pooling { @@ -57,4 +57,4 @@ struct sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst } // namespace pooling } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp index 250cc24226..786e477050 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp @@ -26,7 +26,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace pooling { @@ -143,4 +143,4 @@ void sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst_impl( } // namespace pooling } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_generic_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_generic_depthfirst.hpp index 17e3e5f0ba..5f65b7f340 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_generic_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_generic_depthfirst.hpp @@ -26,7 +26,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace pooling { @@ -51,4 +51,4 @@ struct sve_fp32_nhwc_max_generic_depthfirst } // namespace pooling } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_generic_depthfirst/generic.cpp index 8166379ce4..a2f4398465 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_generic_depthfirst/generic.cpp @@ -24,7 +24,7 @@ #include -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace pooling { @@ -220,4 +220,4 @@ void sve_fp32_nhwc_max_generic_depthfirst_impl( } // namespace pooling } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_avg_generic_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_avg_generic_depthfirst.hpp index 2ae38b5b2f..06582fe5ce 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_avg_generic_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_avg_generic_depthfirst.hpp @@ -26,7 +26,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) namespace arm_conv { namespace pooling { @@ -51,4 +51,4 @@ struct sve_s8_nhwc_avg_generic_depthfirst } // namespace pooling } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_avg_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_avg_generic_depthfirst/generic.cpp index 2ea5b90561..3581095e8b 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_avg_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_avg_generic_depthfirst/generic.cpp @@ -27,7 +27,7 @@ #include -#if defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) namespace arm_conv { namespace pooling { @@ -413,4 +413,4 @@ void sve_s8_nhwc_avg_generic_depthfirst_impl( } // namespace pooling } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst.hpp index 071e79c93d..46132f2864 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst.hpp @@ -24,7 +24,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace pooling { @@ -57,4 +57,4 @@ struct sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst } // namespace pooling } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp index bdf3f53292..beabe7b099 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp @@ -26,7 +26,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace pooling { @@ -143,4 +143,4 @@ void sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst_impl( } // namespace pooling } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_generic_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_generic_depthfirst.hpp index 428902ad61..168cbf53c1 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_generic_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_generic_depthfirst.hpp @@ -26,7 +26,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace pooling { @@ -51,4 +51,4 @@ struct sve_s8_nhwc_max_generic_depthfirst } // namespace pooling } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_generic_depthfirst/generic.cpp index 3e88c8729c..11195f59ed 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_generic_depthfirst/generic.cpp @@ -24,7 +24,7 @@ #include -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace pooling { @@ -220,4 +220,4 @@ void sve_s8_nhwc_max_generic_depthfirst_impl( } // namespace pooling } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_avg_generic_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_avg_generic_depthfirst.hpp index 1242eaf530..637940e957 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_avg_generic_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_avg_generic_depthfirst.hpp @@ -26,7 +26,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) namespace arm_conv { namespace pooling { @@ -51,4 +51,4 @@ struct sve_s8q_nhwc_avg_generic_depthfirst } // namespace pooling } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_avg_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_avg_generic_depthfirst/generic.cpp index 928eb412b5..75be96e283 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_avg_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_avg_generic_depthfirst/generic.cpp @@ -28,7 +28,7 @@ #include -#if defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) namespace arm_conv { namespace pooling { @@ -454,4 +454,4 @@ void sve_s8q_nhwc_avg_generic_depthfirst_impl( } // namespace pooling } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_max_generic_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_max_generic_depthfirst.hpp index 84aa0d3d6b..5aced30e52 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_max_generic_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_max_generic_depthfirst.hpp @@ -26,7 +26,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) namespace arm_conv { namespace pooling { @@ -51,4 +51,4 @@ struct sve_s8q_nhwc_max_generic_depthfirst } // namespace pooling } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_max_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_max_generic_depthfirst/generic.cpp index 3717f8cb30..7f00d46d9d 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_max_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_max_generic_depthfirst/generic.cpp @@ -25,7 +25,7 @@ #include "pooling.hpp" #include -#if defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) namespace arm_conv { namespace pooling { @@ -383,4 +383,4 @@ void sve_s8q_nhwc_max_generic_depthfirst_impl( } // namespace pooling } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_avg_generic_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_avg_generic_depthfirst.hpp index 299e55c9be..a2bfec746b 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_avg_generic_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_avg_generic_depthfirst.hpp @@ -26,7 +26,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) namespace arm_conv { namespace pooling { @@ -51,4 +51,4 @@ struct sve_u8_nhwc_avg_generic_depthfirst } // namespace pooling } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_avg_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_avg_generic_depthfirst/generic.cpp index 51a69a42be..4c72461dd7 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_avg_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_avg_generic_depthfirst/generic.cpp @@ -27,7 +27,7 @@ #include -#if defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) namespace arm_conv { namespace pooling { @@ -413,4 +413,4 @@ void sve_u8_nhwc_avg_generic_depthfirst_impl( } // namespace pooling } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst.hpp index 06df1515ad..11f485ceea 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst.hpp @@ -24,7 +24,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace pooling { @@ -57,4 +57,4 @@ struct sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst } // namespace pooling } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp index e921f345d5..92779d0d99 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp @@ -26,7 +26,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace pooling { @@ -143,4 +143,4 @@ void sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst_impl( } // namespace pooling } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_generic_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_generic_depthfirst.hpp index 59cd4b9c78..92be064053 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_generic_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_generic_depthfirst.hpp @@ -26,7 +26,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace pooling { @@ -51,4 +51,4 @@ struct sve_u8_nhwc_max_generic_depthfirst } // namespace pooling } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_generic_depthfirst/generic.cpp index 164847480b..de81d1c54c 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_generic_depthfirst/generic.cpp @@ -24,7 +24,7 @@ #include -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) namespace arm_conv { namespace pooling { @@ -220,4 +220,4 @@ void sve_u8_nhwc_max_generic_depthfirst_impl( } // namespace pooling } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_avg_generic_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_avg_generic_depthfirst.hpp index f6fc1a58c1..91a9925e14 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_avg_generic_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_avg_generic_depthfirst.hpp @@ -26,7 +26,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) namespace arm_conv { namespace pooling { @@ -51,4 +51,4 @@ struct sve_u8q_nhwc_avg_generic_depthfirst } // namespace pooling } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_avg_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_avg_generic_depthfirst/generic.cpp index 373848ad2b..abf911c9d3 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_avg_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_avg_generic_depthfirst/generic.cpp @@ -28,7 +28,7 @@ #include -#if defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) namespace arm_conv { namespace pooling { @@ -483,4 +483,4 @@ void sve_u8q_nhwc_avg_generic_depthfirst_impl( } // namespace pooling } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_max_generic_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_max_generic_depthfirst.hpp index c3c0edd0d5..0d04ae5978 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_max_generic_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_max_generic_depthfirst.hpp @@ -26,7 +26,7 @@ #pragma once -#if defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) namespace arm_conv { namespace pooling { @@ -51,4 +51,4 @@ struct sve_u8q_nhwc_max_generic_depthfirst } // namespace pooling } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_max_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_max_generic_depthfirst/generic.cpp index c1c1d29613..b632af9118 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_max_generic_depthfirst/generic.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_max_generic_depthfirst/generic.cpp @@ -25,7 +25,7 @@ #include "pooling.hpp" #include -#if defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) namespace arm_conv { namespace pooling { @@ -413,4 +413,4 @@ void sve_u8q_nhwc_max_generic_depthfirst_impl( } // namespace pooling } // namespace arm_conv -#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) diff --git a/src/core/NEON/kernels/arm_conv/pooling/pooling_fp16.cpp b/src/core/NEON/kernels/arm_conv/pooling/pooling_fp16.cpp index 094c6aa301..42f23a158e 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/pooling_fp16.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/pooling_fp16.cpp @@ -33,12 +33,12 @@ #include "kernels/cpp_nhwc_1x1_stride_any_depthfirst.hpp" #if defined(__aarch64__) -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #include "kernels/sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst.hpp" #include "kernels/sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst.hpp" #include "kernels/sve_fp16_nhwc_avg_generic_depthfirst.hpp" #include "kernels/sve_fp16_nhwc_max_generic_depthfirst.hpp" -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) #include "kernels/a64_fp16_nhwc_max_2x2_s1_output2x2_depthfirst.hpp" #include "kernels/a64_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst.hpp" #include "kernels/a64_fp16_nhwc_avg_generic_depthfirst.hpp" @@ -74,11 +74,13 @@ static const PoolingImplementation<__fp16, __fp16> pooling_fp16_methods[] = { }, }, #if defined(__aarch64__) -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) { PoolingMethod::DEPTHFIRST, "sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst", - is_supported, + [] (const PoolingArgs &args, const Nothing &unused) -> bool { + return args.cpu_info->has_sve() && is_supported(args, unused); + }, nullptr, [] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<__fp16, __fp16> * { return new PoolingDepthfirst(args); @@ -87,7 +89,9 @@ static const PoolingImplementation<__fp16, __fp16> pooling_fp16_methods[] = { { PoolingMethod::DEPTHFIRST, "sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst", - is_supported, + [] (const PoolingArgs &args, const Nothing &unused) -> bool { + return args.cpu_info->has_sve() && is_supported(args, unused); + }, nullptr, [] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<__fp16, __fp16> * { return new PoolingDepthfirst(args); @@ -96,7 +100,7 @@ static const PoolingImplementation<__fp16, __fp16> pooling_fp16_methods[] = { { PoolingMethod::DEPTHFIRST, "sve_fp16_nhwc_avg_generic_depthfirst", - [] (const PoolingArgs &args, const Nothing &) -> bool { return args.pool_type == PoolingType::AVERAGE; }, + [] (const PoolingArgs &args, const Nothing &) -> bool { return args.cpu_info->has_sve() && args.pool_type == PoolingType::AVERAGE; }, nullptr, [] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<__fp16, __fp16> * { return new PoolingDepthfirstGeneric(args); @@ -105,18 +109,20 @@ static const PoolingImplementation<__fp16, __fp16> pooling_fp16_methods[] = { { PoolingMethod::DEPTHFIRST, "sve_fp16_nhwc_max_generic_depthfirst", - [] (const PoolingArgs &args, const Nothing &) -> bool { return args.pool_type == PoolingType::MAX; }, + [] (const PoolingArgs &args, const Nothing &) -> bool { return args.cpu_info->has_sve() && args.pool_type == PoolingType::MAX; }, nullptr, [] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<__fp16, __fp16> * { return new PoolingDepthfirstGeneric(args); }, }, -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) { PoolingMethod::DEPTHFIRST, "a64_fp16_nhwc_max_2x2_s1_output2x2_depthfirst", - is_supported, + [] (const PoolingArgs &args, const Nothing &unused) -> bool { + return args.cpu_info->has_fp16() && is_supported(args, unused); + }, nullptr, [] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<__fp16, __fp16> * { return new PoolingDepthfirst(args); @@ -125,7 +131,9 @@ static const PoolingImplementation<__fp16, __fp16> pooling_fp16_methods[] = { { PoolingMethod::DEPTHFIRST, "a64_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst", - is_supported, + [] (const PoolingArgs &args, const Nothing &unused) -> bool { + return args.cpu_info->has_fp16() && is_supported(args, unused); + }, nullptr, [] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<__fp16, __fp16> * { return new PoolingDepthfirst(args); @@ -134,7 +142,7 @@ static const PoolingImplementation<__fp16, __fp16> pooling_fp16_methods[] = { { PoolingMethod::DEPTHFIRST, "a64_fp16_nhwc_avg_generic_depthfirst", - [] (const PoolingArgs &args, const Nothing &) -> bool { return args.pool_type == PoolingType::AVERAGE; }, + [] (const PoolingArgs &args, const Nothing &) -> bool { return args.cpu_info->has_fp16() && args.pool_type == PoolingType::AVERAGE; }, nullptr, [] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<__fp16, __fp16> * { return new PoolingDepthfirstGeneric(args); @@ -143,7 +151,7 @@ static const PoolingImplementation<__fp16, __fp16> pooling_fp16_methods[] = { { PoolingMethod::DEPTHFIRST, "a64_fp16_nhwc_max_generic_depthfirst", - [] (const PoolingArgs &args, const Nothing &) -> bool { return args.pool_type == PoolingType::MAX; }, + [] (const PoolingArgs &args, const Nothing &) -> bool { return args.cpu_info->has_fp16() && args.pool_type == PoolingType::MAX; }, nullptr, [] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<__fp16, __fp16> * { return new PoolingDepthfirstGeneric(args); diff --git a/src/core/NEON/kernels/arm_conv/pooling/pooling_fp32.cpp b/src/core/NEON/kernels/arm_conv/pooling/pooling_fp32.cpp index 002115d78c..1905e1e9d6 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/pooling_fp32.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/pooling_fp32.cpp @@ -30,12 +30,12 @@ #include "kernels/cpp_nhwc_1x1_stride_any_depthfirst.hpp" #if defined(__aarch64__) -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #include "kernels/sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst.hpp" #include "kernels/sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst.hpp" #include "kernels/sve_fp32_nhwc_avg_generic_depthfirst.hpp" #include "kernels/sve_fp32_nhwc_max_generic_depthfirst.hpp" -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) #include "kernels/a64_fp32_nhwc_max_2x2_s1_output2x2_depthfirst.hpp" #include "kernels/a64_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst.hpp" #include "kernels/a64_fp32_nhwc_avg_generic_depthfirst.hpp" @@ -71,11 +71,13 @@ static const PoolingImplementation pooling_fp32_methods[] = { }, }, #if defined(__aarch64__) -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) { PoolingMethod::DEPTHFIRST, "sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst", - is_supported, + [] (const PoolingArgs &args, const Nothing &unused) -> bool { + return args.cpu_info->has_sve() && is_supported(args, unused); + }, nullptr, [] (const PoolingArgs &args, const Nothing &) -> PoolingCommon * { return new PoolingDepthfirst(args); @@ -84,7 +86,9 @@ static const PoolingImplementation pooling_fp32_methods[] = { { PoolingMethod::DEPTHFIRST, "sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst", - is_supported, + [] (const PoolingArgs &args, const Nothing &unused) -> bool { + return args.cpu_info->has_sve() && is_supported(args, unused); + }, nullptr, [] (const PoolingArgs &args, const Nothing &) -> PoolingCommon * { return new PoolingDepthfirst(args); @@ -93,7 +97,9 @@ static const PoolingImplementation pooling_fp32_methods[] = { { PoolingMethod::DEPTHFIRST, "sve_fp32_nhwc_avg_generic_depthfirst", - [] (const PoolingArgs &args, const Nothing &) -> bool { return args.pool_type == PoolingType::AVERAGE; }, + [] (const PoolingArgs &args, const Nothing &) -> bool { + return args.cpu_info->has_sve() && args.pool_type == PoolingType::AVERAGE; + }, nullptr, [] (const PoolingArgs &args, const Nothing &) -> PoolingCommon * { return new PoolingDepthfirstGeneric(args); @@ -102,13 +108,15 @@ static const PoolingImplementation pooling_fp32_methods[] = { { PoolingMethod::DEPTHFIRST, "sve_fp32_nhwc_max_generic_depthfirst", - [] (const PoolingArgs &args, const Nothing &) -> bool { return args.pool_type == PoolingType::MAX; }, + [] (const PoolingArgs &args, const Nothing &) -> bool { + return args.cpu_info->has_sve() && args.pool_type == PoolingType::MAX; + }, nullptr, [] (const PoolingArgs &args, const Nothing &) -> PoolingCommon * { return new PoolingDepthfirstGeneric(args); }, }, -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) { PoolingMethod::DEPTHFIRST, "a64_fp32_nhwc_max_2x2_s1_output2x2_depthfirst", diff --git a/src/core/NEON/kernels/arm_conv/pooling/pooling_s8.cpp b/src/core/NEON/kernels/arm_conv/pooling/pooling_s8.cpp index 490fc0d863..1cad674e6e 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/pooling_s8.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/pooling_s8.cpp @@ -30,13 +30,13 @@ #include "kernels/cpp_nhwc_1x1_stride_any_depthfirst.hpp" #if defined(__aarch64__) -#if defined(__ARM_FEATURE_SVE) -#if defined(SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE2) #include "kernels/sve_s8_nhwc_avg_generic_depthfirst.hpp" -#endif // defined(SVE2) +#endif // defined(ARM_COMPUTE_ENABLE_SVE2) #include "kernels/sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst.hpp" #include "kernels/sve_s8_nhwc_max_generic_depthfirst.hpp" -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) #include "kernels/a64_s8_nhwc_max_2x2_s1_output2x2_depthfirst.hpp" #include "kernels/a64_s8_nhwc_avg_generic_depthfirst.hpp" #include "kernels/a64_s8_nhwc_max_generic_depthfirst.hpp" @@ -73,22 +73,24 @@ static const PoolingImplementation pooling_s8_methods[] = { }, }, #if defined(__aarch64__) -#if defined(__ARM_FEATURE_SVE) -#if defined(SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE2) { PoolingMethod::DEPTHFIRST, "sve_s8_nhwc_avg_generic_depthfirst", - [] (const PoolingArgs &args, const Nothing &) -> bool { return args.pool_type == PoolingType::AVERAGE; }, + [] (const PoolingArgs &args, const Nothing &) -> bool { return args.cpu_info->has_sve2() && args.pool_type == PoolingType::AVERAGE; }, nullptr, [] (const PoolingArgs &args, const Nothing &) -> PoolingCommon * { return new PoolingDepthfirstGeneric(args); }, }, -#endif // defined(SVE2) +#endif // defined(ARM_COMPUTE_ENABLE_SVE2) { PoolingMethod::DEPTHFIRST, "sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst", - is_supported, + [] (const PoolingArgs &args, const Nothing &unused) -> bool { + return args.cpu_info->has_sve() && is_supported(args, unused); + }, nullptr, [] (const PoolingArgs &args, const Nothing &) -> PoolingCommon * { return new PoolingDepthfirst(args); @@ -97,13 +99,13 @@ static const PoolingImplementation pooling_s8_methods[] = { { PoolingMethod::DEPTHFIRST, "sve_s8_nhwc_max_generic_depthfirst", - [] (const PoolingArgs &args, const Nothing &) -> bool { return args.pool_type == PoolingType::MAX; }, + [] (const PoolingArgs &args, const Nothing &) -> bool { return args.cpu_info->has_sve() && args.pool_type == PoolingType::MAX; }, nullptr, [] (const PoolingArgs &args, const Nothing &) -> PoolingCommon * { return new PoolingDepthfirstGeneric(args); }, }, -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) { PoolingMethod::DEPTHFIRST, "a64_s8_nhwc_max_2x2_s1_output2x2_depthfirst", diff --git a/src/core/NEON/kernels/arm_conv/pooling/pooling_s8q.cpp b/src/core/NEON/kernels/arm_conv/pooling/pooling_s8q.cpp index fd4e045035..bfc4dc0f15 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/pooling_s8q.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/pooling_s8q.cpp @@ -28,10 +28,10 @@ #include "pooling_depthfirst_generic_quantized.hpp" #if defined(__aarch64__) -#if defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) #include "kernels/sve_s8q_nhwc_avg_generic_depthfirst.hpp" #include "kernels/sve_s8q_nhwc_max_generic_depthfirst.hpp" -#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) #include "kernels/a64_s8q_nhwc_avg_generic_depthfirst.hpp" #include "kernels/a64_s8q_nhwc_max_generic_depthfirst.hpp" #endif // defined(__aarch64__) @@ -43,12 +43,12 @@ namespace pooling { static const PoolingImplementation pooling_u8_methods[] = { #if defined(__aarch64__) -#if defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) { PoolingMethod::DEPTHFIRST, "sve_s8q_nhwc_avg_generic_depthfirst", [] (const PoolingArgs &args, const Requantize32 &) -> bool { - return args.pool_type == PoolingType::AVERAGE; + return args.cpu_info->has_sve2() && args.pool_type == PoolingType::AVERAGE; }, nullptr, [] (const PoolingArgs &args, const Requantize32 &rq) -> PoolingCommon * { @@ -58,13 +58,13 @@ static const PoolingImplementation pooling_u8_meth { PoolingMethod::DEPTHFIRST, "sve_s8q_nhwc_max_generic_depthfirst", - [] (const PoolingArgs &args, const Requantize32 &) -> bool { return args.pool_type == PoolingType::MAX; }, + [] (const PoolingArgs &args, const Requantize32 &) -> bool { return args.cpu_info->has_sve2() && args.pool_type == PoolingType::MAX; }, nullptr, [] (const PoolingArgs &args, const Requantize32 &rq) -> PoolingCommon * { return new PoolingDepthfirstGenericQuantized(args, rq); }, }, -#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) { PoolingMethod::DEPTHFIRST, "a64_s8q_nhwc_avg_generic_depthfirst", diff --git a/src/core/NEON/kernels/arm_conv/pooling/pooling_u8.cpp b/src/core/NEON/kernels/arm_conv/pooling/pooling_u8.cpp index 052354922e..f6ea98002c 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/pooling_u8.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/pooling_u8.cpp @@ -30,13 +30,13 @@ #include "kernels/cpp_nhwc_1x1_stride_any_depthfirst.hpp" #if defined(__aarch64__) -#if defined(__ARM_FEATURE_SVE) -#if defined(SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE2) #include "kernels/sve_u8_nhwc_avg_generic_depthfirst.hpp" -#endif // defined(SVE2) +#endif // defined(ARM_COMPUTE_ENABLE_SVE2) #include "kernels/sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst.hpp" #include "kernels/sve_u8_nhwc_max_generic_depthfirst.hpp" -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) #include "kernels/a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst.hpp" #include "kernels/a64_u8_nhwc_avg_generic_depthfirst.hpp" #include "kernels/a64_u8_nhwc_max_generic_depthfirst.hpp" @@ -73,8 +73,8 @@ static const PoolingImplementation pooling_u8_methods[] = { }, }, #if defined(__aarch64__) -#if defined(__ARM_FEATURE_SVE) -#if defined(SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE2) { PoolingMethod::DEPTHFIRST, "sve_u8_nhwc_avg_generic_depthfirst", @@ -82,7 +82,7 @@ static const PoolingImplementation pooling_u8_methods[] = { // This kernel can only be used when there is either no padding, or we don't care // about the value of the padding. Otherwise, we would need to pass in the zero-point // for the quantization regime. - return (args.exclude_padding || + return args.cpu_info->has_sve2() && (args.exclude_padding || (args.padding.top == 0 && args.padding.bottom == 0 && args.padding.left == 0 && args.padding.right == 0) ) && args.pool_type == PoolingType::AVERAGE; @@ -92,11 +92,13 @@ static const PoolingImplementation pooling_u8_methods[] = { return new PoolingDepthfirstGeneric(args); }, }, -#endif // defined(SVE2) +#endif // defined(ARM_COMPUTE_ENABLE_SVE2) { PoolingMethod::DEPTHFIRST, "sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst", - is_supported, + [] (const PoolingArgs &args, const Nothing &unused) -> bool { + return args.cpu_info->has_sve() && is_supported(args, unused); + }, nullptr, [] (const PoolingArgs &args, const Nothing &) -> PoolingCommon * { return new PoolingDepthfirst(args); @@ -105,13 +107,13 @@ static const PoolingImplementation pooling_u8_methods[] = { { PoolingMethod::DEPTHFIRST, "sve_u8_nhwc_max_generic_depthfirst", - [] (const PoolingArgs &args, const Nothing &) -> bool { return args.pool_type == PoolingType::MAX; }, + [] (const PoolingArgs &args, const Nothing &) -> bool { return args.cpu_info->has_sve() && args.pool_type == PoolingType::MAX; }, nullptr, [] (const PoolingArgs &args, const Nothing &) -> PoolingCommon * { return new PoolingDepthfirstGeneric(args); }, }, -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) { PoolingMethod::DEPTHFIRST, "a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst", diff --git a/src/core/NEON/kernels/arm_conv/pooling/pooling_u8q.cpp b/src/core/NEON/kernels/arm_conv/pooling/pooling_u8q.cpp index 41303fb418..647e319c82 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/pooling_u8q.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/pooling_u8q.cpp @@ -28,10 +28,10 @@ #include "pooling_depthfirst_generic_quantized.hpp" #if defined(__aarch64__) -#if defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) #include "kernels/sve_u8q_nhwc_avg_generic_depthfirst.hpp" #include "kernels/sve_u8q_nhwc_max_generic_depthfirst.hpp" -#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) #include "kernels/a64_u8q_nhwc_avg_generic_depthfirst.hpp" #include "kernels/a64_u8q_nhwc_max_generic_depthfirst.hpp" #endif // defined(__aarch64__) @@ -43,12 +43,12 @@ namespace pooling { static const PoolingImplementation pooling_u8_methods[] = { #if defined(__aarch64__) -#if defined(__ARM_FEATURE_SVE) && defined(SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) { PoolingMethod::DEPTHFIRST, "sve_u8q_nhwc_avg_generic_depthfirst", [] (const PoolingArgs &args, const Requantize32 &) -> bool { - return args.pool_type == PoolingType::AVERAGE; + return args.cpu_info->has_sve2() && args.pool_type == PoolingType::AVERAGE; }, nullptr, [] (const PoolingArgs &args, const Requantize32 &rq) -> PoolingCommon * { @@ -58,13 +58,13 @@ static const PoolingImplementation pooling_u8_me { PoolingMethod::DEPTHFIRST, "sve_u8q_nhwc_max_generic_depthfirst", - [] (const PoolingArgs &args, const Requantize32 &) -> bool { return args.pool_type == PoolingType::MAX; }, + [] (const PoolingArgs &args, const Requantize32 &) -> bool { return args.cpu_info->has_sve2() && args.pool_type == PoolingType::MAX; }, nullptr, [] (const PoolingArgs &args, const Requantize32 &rq) -> PoolingCommon * { return new PoolingDepthfirstGenericQuantized(args, rq); }, }, -#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) { PoolingMethod::DEPTHFIRST, "a64_u8q_nhwc_avg_generic_depthfirst", diff --git a/src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp b/src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp index d8134c4bb5..8244523696 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp @@ -44,26 +44,26 @@ namespace arm_gemm { static const GemmImplementation gemm_bf16_methods[] = { -#ifdef V8P6_BF -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_BF16 +#ifdef ARM_COMPUTE_ENABLE_SVE { // gemm_bf16_interleaved GemmMethod::GEMM_INTERLEAVED, "sve_interleaved_bf16fp32_mmla_8x3VL", - [](const GemmArgs &args) { return args._ci->has_sve() && (args._Ksize>4); }, + [](const GemmArgs &args) { return args._ci->has_svebf16() && (args._Ksize>4); }, [](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN; }, [](const GemmArgs &args) { return new GemmInterleaved(args); } }, { GemmMethod::GEMM_HYBRID, "sve_hybrid_bf16fp32_dot_6x4VL", - [](const GemmArgs &args) { return args._ci->has_sve(); }, + [](const GemmArgs &args) { return args._ci->has_svebf16(); }, [](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN && ((args._Ksize <= 128) && (args._Nsize <= 128)); }, [](const GemmArgs &args) { return new GemmHybridIndirect(args); } }, { // gemm_bf16_interleaved GemmMethod::GEMM_INTERLEAVED, "sve_interleaved_bf16fp32_dot_8x3VL", - [](const GemmArgs &args) { return args._ci->has_sve() && (args._Ksize>2); }, + [](const GemmArgs &args) { return args._ci->has_svebf16() && (args._Ksize>2); }, [](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN; }, [](const GemmArgs &args) { return new GemmInterleaved(args); } }, @@ -71,25 +71,25 @@ static const GemmImplementation gemm_bf16_methods[] = { // gemm_bf16_interleaved GemmMethod::GEMM_INTERLEAVED, "a64_interleaved_bf16fp32_mmla_8x12", - [](const GemmArgs &args) { return (args._Ksize>4); }, + [](const GemmArgs &args) { return args._ci->has_bf16() && (args._Ksize>4); }, nullptr, [](const GemmArgs &args) { return new GemmInterleaved(args); } }, { GemmMethod::GEMM_HYBRID, "a64_hybrid_bf16fp32_dot_6x16", - nullptr, + [](const GemmArgs &args) { return args._ci->has_bf16(); }, nullptr, [](const GemmArgs &args) { return new GemmHybridIndirect(args); } }, { // gemm_bf16_interleaved GemmMethod::GEMM_INTERLEAVED, "a64_interleaved_bf16fp32_dot_8x12", - [](const GemmArgs &args) { return (args._Ksize>2); }, + [](const GemmArgs &args) { return args._ci->has_bf16() && (args._Ksize>2); }, nullptr, [](const GemmArgs &args) { return new GemmInterleaved(args); } }, -#endif // V8P6_BF +#endif // ARM_COMPUTE_ENABLE_BF16 #ifdef __aarch64__ { GemmMethod::GEMM_INTERLEAVED, diff --git a/src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp b/src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp index 8e355c8f2c..b41d8dd097 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp @@ -43,7 +43,7 @@ namespace arm_gemm { static const GemmImplementation<__fp16, __fp16> gemm_fp16_methods[] = { -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) { GemmMethod::GEMM_HYBRID, "sve_hybrid_fp16_mla_6x4VL", diff --git a/src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp b/src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp index d94814fb4c..1632e301ac 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp @@ -59,7 +59,7 @@ static const GemmImplementation gemm_fp32_methods[] = [](const GemmArgs &args) { return new GemvBatched(args); } }, #ifdef __aarch64__ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE { GemmMethod::GEMM_HYBRID, "sve_gemv_fp32_mla_8VL", @@ -77,17 +77,17 @@ static const GemmImplementation gemm_fp32_methods[] = }, // MMLA next due to higher throughput (SVE only) -#if defined(__ARM_FEATURE_SVE) && defined(MMLA_FP32) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVEF32MM) { GemmMethod::GEMM_INTERLEAVED, "sve_interleaved_fp32_mmla_8x3VL", - [](const GemmArgs &args) { return args._ci->has_sve() && (args._Ksize>4); }, + [](const GemmArgs &args) { return args._ci->has_svef32mm() && (args._Ksize>4); }, [](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN; }, [](const GemmArgs &args) { return new GemmInterleaved(args); } }, -#endif // __ARM_FEATURE_SVE && MMLA_FP32 +#endif // ARM_COMPUTE_ENABLE_SVE && ARM_COMPUTE_ENABLE_SVEF32MM -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE // SVE smallk / hybrid methods { GemmMethod::GEMM_HYBRID, @@ -110,7 +110,7 @@ static const GemmImplementation gemm_fp32_methods[] = [](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN && (((args._Ksize <= 256) && (args._Nsize <= 256)) || ((args._nmulti > 1) && ((args._Msize / args._maxthreads) < 8))); }, [](const GemmArgs &args) { return new GemmHybridIndirect(args); } }, -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE // Cortex-A35 specific kernel - use for any problem on A35, and never in any other cases. { GemmMethod::GEMM_INTERLEAVED, @@ -148,7 +148,7 @@ GemmImplementation::with_estimate( [](const GemmArgs &args) { return GemmHybridIndirect::estimate_cycles(args, cls_a64_hybrid_fp32_mla_6x16::get_performance_parameters(args._ci)); }, [](const GemmArgs &args) { return new GemmHybridIndirect(args); } ), -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE { GemmMethod::GEMM_INTERLEAVED, "sve_interleaved_fp32_mla_8x3VL", @@ -156,7 +156,7 @@ GemmImplementation::with_estimate( [](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN; }, [](const GemmArgs &args) { return new GemmInterleaved(args); } }, -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE GemmImplementation::with_estimate( GemmMethod::GEMM_INTERLEAVED, "a64_sgemm_8x12", diff --git a/src/core/NEON/kernels/arm_gemm/gemm_int8.cpp b/src/core/NEON/kernels/arm_gemm/gemm_int8.cpp index 60cf82f9c6..bfb3ca901f 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_int8.cpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_int8.cpp @@ -46,16 +46,16 @@ namespace arm_gemm { static const GemmImplementation gemm_s8_methods[] = { -#ifdef __ARM_FEATURE_SVE -#ifdef MMLA_INT8 +#ifdef ARM_COMPUTE_ENABLE_SVE +#ifdef ARM_COMPUTE_ENABLE_I8MM { GemmMethod::GEMM_INTERLEAVED, "sve_interleaved_s8s32_mmla_8x3VL", - [](const GemmArgs &args) { return args._ci->has_sve() && (args._Ksize>8); }, + [](const GemmArgs &args) { return args._ci->has_svei8mm() && (args._Ksize>8); }, [](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN; }, [](const GemmArgs &args) { return new GemmInterleaved(args); } }, -#endif +#endif // ARM_COMPUTE_ENABLE_I8MM { GemmMethod::GEMM_HYBRID, "sve_smallK_hybrid_s8s32_dot_8x1VL", @@ -78,15 +78,15 @@ static const GemmImplementation gemm_s8_methods[] = { [](const GemmArgs &args) { return new GemmInterleaved(args); } }, #endif // SVE -#ifdef MMLA_INT8 +#ifdef ARM_COMPUTE_ENABLE_I8MM { GemmMethod::GEMM_INTERLEAVED, "a64_interleaved_s8s32_mmla_8x12", - [](const GemmArgs &args) { return (args._Ksize>8); }, + [](const GemmArgs &args) { return args._ci->has_svei8mm() && (args._Ksize>8); }, nullptr, [](const GemmArgs &args) { return new GemmInterleaved(args); } }, -#endif +#endif // ARM_COMPUTE_ENABLE_I8MM { GemmMethod::GEMM_HYBRID, "a64_smallK_hybrid_s8s32_dot_8x4", diff --git a/src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp b/src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp index 094b6fdff4..985567f6f3 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp @@ -53,16 +53,16 @@ namespace arm_gemm { static const GemmImplementation gemm_qint8_methods[] = { -#ifdef __ARM_FEATURE_SVE -#ifdef MMLA_INT8 +#ifdef ARM_COMPUTE_ENABLE_SVE +#ifdef ARM_COMPUTE_ENABLE_I8MM { GemmMethod::GEMM_INTERLEAVED, "sve_interleaved_s8s32_mmla_8x3VL", - [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_sve() && (args._Ksize>8); }, + [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_svei8mm() && (args._Ksize>8); }, [](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; }, [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized(args, qp); } }, -#endif +#endif // ARM_COMPUTE_ENABLE_I8MM { GemmMethod::GEMM_HYBRID_QUANTIZED, "sve_smallK_hybrid_s8s32_dot_8x1VL", @@ -70,22 +70,22 @@ static const GemmImplementation gemm_qint8_methods [](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; }, [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridQuantized(args, qp); } }, -#ifdef SVE2 +#ifdef ARM_COMPUTE_ENABLE_SVE2 { GemmMethod::GEMM_HYBRID, "sve_hybrid_s8qs_dot_6x4VL", - [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sve() && quant_hybrid_symmetric(qp); }, + [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sve2() && quant_hybrid_symmetric(qp); }, [](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; }, [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect(args, qp); } }, { GemmMethod::GEMM_HYBRID, "sve_hybrid_s8qa_dot_4x4VL", - [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sve() && quant_hybrid_asymmetric(qp); }, + [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sve2() && quant_hybrid_asymmetric(qp); }, [](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; }, [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect(args, qp); } }, -#endif +#endif // ARM_COMPUTE_ENABLE_SVE2 { GemmMethod::GEMM_HYBRID, "sve_hybrid_s8s32_dot_6x4VL", @@ -101,15 +101,15 @@ static const GemmImplementation gemm_qint8_methods [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized(args, qp); } }, #endif // SVE -#ifdef MMLA_INT8 +#ifdef ARM_COMPUTE_ENABLE_I8MM { GemmMethod::GEMM_INTERLEAVED, "a64_interleaved_s8s32_mmla_8x12", - [](const GemmArgs &args, const Requantize32 &) { return (args._Ksize>8); }, + [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_i8mm() && (args._Ksize>8); }, nullptr, [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized(args, qp); } }, -#endif +#endif // ARM_COMPUTE_ENABLE_I8MM { GemmMethod::GEMM_HYBRID_QUANTIZED, "a64_smallK_hybrid_s8s32_dot_8x4", diff --git a/src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp b/src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp index be27b3a117..f3f2f335fd 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp @@ -50,12 +50,12 @@ namespace arm_gemm { static const GemmImplementation gemm_quint8_methods[] = { -#ifdef __ARM_FEATURE_SVE -#ifdef MMLA_INT8 +#ifdef ARM_COMPUTE_ENABLE_SVE +#ifdef ARM_COMPUTE_ENABLE_I8MM { GemmMethod::GEMM_INTERLEAVED, "sve_interleaved_u8u32_mmla_8x3VL", - [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_sve() && (args._Ksize>8); }, + [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_svei8mm() && (args._Ksize>8); }, [](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; }, [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized(args, qp); } }, @@ -67,15 +67,15 @@ static const GemmImplementation gemm_quint8_meth [](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; }, [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridQuantized(args, qp); } }, -#ifdef SVE2 // Requantizing kernels include some SVE2 only instructions (SQRDMULH, SRSHL) +#ifdef ARM_COMPUTE_ENABLE_SVE2 // Requantizing kernels include some SVE2 only instructions (SQRDMULH, SRSHL) { GemmMethod::GEMM_HYBRID, "sve_hybrid_u8qa_dot_4x4VL", - [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sve() && quant_hybrid_asymmetric(qp); }, + [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sve2() && quant_hybrid_asymmetric(qp); }, [](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; }, [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect(args, qp); } }, -#endif +#endif // ARM_COMPUTE_ENABLE_SVE2 { GemmMethod::GEMM_HYBRID, "sve_hybrid_u8u32_dot_6x4VL", @@ -91,11 +91,11 @@ static const GemmImplementation gemm_quint8_meth [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized(args, qp); } }, #endif -#ifdef MMLA_INT8 +#ifdef ARM_COMPUTE_ENABLE_I8MM { GemmMethod::GEMM_INTERLEAVED, "a64_interleaved_u8u32_mmla_8x12", - [](const GemmArgs &args, const Requantize32 &) { return (args._Ksize>8); }, + [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_i8mm() && (args._Ksize>8); }, [](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; }, [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized(args, qp); } }, diff --git a/src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp b/src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp index 4de3d2b18a..4c05fd1b73 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp @@ -46,12 +46,12 @@ namespace arm_gemm { static const GemmImplementation gemm_u8_methods[] = { -#ifdef __ARM_FEATURE_SVE -#ifdef MMLA_INT8 +#ifdef ARM_COMPUTE_ENABLE_SVE +#ifdef ARM_COMPUTE_ENABLE_I8MM { GemmMethod::GEMM_INTERLEAVED, "sve_interleaved_u8u32_mmla_8x3VL", - [](const GemmArgs &args) { return args._ci->has_sve() && (args._Ksize>8); }, + [](const GemmArgs &args) { return args._ci->has_svei8mm() && (args._Ksize>8); }, [](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN; }, [](const GemmArgs &args) { return new GemmInterleaved(args); } }, @@ -78,11 +78,11 @@ static const GemmImplementation gemm_u8_methods[] = { [](const GemmArgs &args) { return new GemmInterleaved(args); } }, #endif -#ifdef MMLA_INT8 +#ifdef ARM_COMPUTE_ENABLE_I8MM { GemmMethod::GEMM_INTERLEAVED, "a64_interleaved_u8u32_mmla_8x12", - [](const GemmArgs &args) { return (args._Ksize>8); }, + [](const GemmArgs &args) { return args._ci->has_i8mm() && (args._Ksize>8); }, nullptr, [](const GemmArgs &args) { return new GemmInterleaved(args); } }, diff --git a/src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp b/src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp index 0d56b46e19..a6b1269927 100644 --- a/src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp +++ b/src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp @@ -320,12 +320,12 @@ template void IndirectInterleave<8, 1, VLType::None>(float *, const float * cons template void ConvolutionInterleave<8, 1, VLType::None>(float *, const float *, size_t, const convolver &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void Interleave<8, 1, VLType::None>(float *, const float *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); -#if defined(__ARM_FEATURE_SVE) && defined(MMLA_FP32) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVEF32MM) /* FMMLA */ template void IndirectInterleave<8, 2, VLType::None>(float *, const float * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void ConvolutionInterleave<8, 2, VLType::None>(float *, const float *, size_t, const convolver &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void Interleave<8, 2, VLType::None>(float *, const float *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); -#endif // SVE && MMLA_FP32 +#endif // ARM_COMPUTE_ENABLE_SVE && ARM_COMPUTE_ENABLE_SVEF32MM /* FP16 */ #if defined(FP16_KERNELS) || defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) @@ -340,7 +340,7 @@ template void Interleave<8, 1, VLType::None>(float *, const __fp16 *, size_t, un /* BF16 */ /* Arm® Neon™/SVE BFDOT */ -#ifdef V8P6_BF +#ifdef ARM_COMPUTE_ENABLE_BF16 template void IndirectInterleave<8, 2, VLType::None>(bfloat16 *, const bfloat16 * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void ConvolutionInterleave<8, 2, VLType::None>(bfloat16 *, const bfloat16 *, size_t, const convolver &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void Interleave<8, 2, VLType::None>(bfloat16 *, const bfloat16 *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); @@ -348,7 +348,7 @@ template void Interleave<8, 2, VLType::None>(bfloat16 *, const bfloat16 *, size_ template void IndirectInterleave<8, 4, VLType::None>(bfloat16 *, const bfloat16 * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void ConvolutionInterleave<8, 4, VLType::None>(bfloat16 *, const bfloat16 *, size_t, const convolver &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void Interleave<8, 4, VLType::None>(bfloat16 *, const bfloat16 *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); -#endif // V8P6_BF +#endif // ARM_COMPUTE_ENABLE_BF16 /* Arm® Neon™/SVE using FP32 kernel */ template void IndirectInterleave<8, 1, VLType::None>(float *, const bfloat16 * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); @@ -375,12 +375,12 @@ template void IndirectInterleave<8, 4, VLType::None>(int8_t *, const int8_t * co template void ConvolutionInterleave<8, 4, VLType::None>(int8_t *, const int8_t *, size_t, const convolver &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void Interleave<8, 4, VLType::None>(int8_t *, const int8_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); -#ifdef MMLA_INT8 +#ifdef ARM_COMPUTE_ENABLE_I8MM /* MMLA SMMLA (height 8, block 8) */ template void IndirectInterleave<8, 8, VLType::None>(int8_t *, const int8_t * const * const *, unsigned int, unsigned int, unsigned int y0, unsigned int ymax, unsigned int k0, unsigned int kmax, bool, int32_t); template void ConvolutionInterleave<8, 8, VLType::None>(int8_t *, const int8_t *, size_t, const convolver &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void Interleave<8, 8, VLType::None>(int8_t *, const int8_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); -#endif // MMLA_INT8 +#endif // ARM_COMPUTE_ENABLE_I8MM /* Arm® Neon™ SDOT (height 8, block 1) */ template void IndirectInterleave<8, 1, VLType::None>(int16_t *, const int8_t * const * const *, unsigned int, unsigned int, unsigned int y0, unsigned int ymax, unsigned int k0, unsigned int kmax, bool, int32_t); @@ -397,12 +397,12 @@ template void IndirectInterleave<8, 4, VLType::None>(uint8_t *, const uint8_t * template void ConvolutionInterleave<8, 4, VLType::None>(uint8_t *, const uint8_t *, size_t, const convolver &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void Interleave<8, 4, VLType::None>(uint8_t *, const uint8_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); -#ifdef MMLA_INT8 +#ifdef ARM_COMPUTE_ENABLE_I8MM /* MMLA SMMLA (height 8, block 8) */ template void IndirectInterleave<8, 8, VLType::None>(uint8_t *, const uint8_t * const * const *, unsigned int, unsigned int, unsigned int y0, unsigned int ymax, unsigned int k0, unsigned int kmax, bool, int32_t); template void ConvolutionInterleave<8, 8, VLType::None>(uint8_t *, const uint8_t *, size_t, const convolver &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void Interleave<8, 8, VLType::None>(uint8_t *, const uint8_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); -#endif // MMLA_INT8 +#endif // ARM_COMPUTE_ENABLE_I8MM /* Arm® Neon™ 16-bit (height 8, block 1) */ template void IndirectInterleave<8, 1, VLType::None>(uint16_t *, const uint8_t * const * const *, unsigned int, unsigned int, unsigned int y0, unsigned int ymax, unsigned int k0, unsigned int kmax, bool, int32_t); diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_gemv_fp32_mla_8VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_gemv_fp32_mla_8VL/generic.cpp index c62e31936c..78387de90c 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_gemv_fp32_mla_8VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_gemv_fp32_mla_8VL/generic.cpp @@ -21,7 +21,7 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "arm_gemm.hpp" #include "../../utils.hpp" diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL.hpp index 066bff4602..7b0282fa32 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL.hpp @@ -22,7 +22,7 @@ * IN THE SOFTWARE. */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../std_transforms_sve.hpp" #include "../bfloat.hpp" @@ -81,4 +81,4 @@ public: } // namespace arm_gemm #undef ARGLIST -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL/generic.cpp index 1233a98531..34a657f64f 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL/generic.cpp @@ -21,7 +21,7 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "arm_gemm.hpp" #include "../../utils.hpp" @@ -2153,4 +2153,4 @@ void sve_hybrid_bf16fp32_dot_6x4VL ( } } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL.hpp index 5c8563952f..f98ccdc7d3 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL.hpp @@ -22,7 +22,7 @@ * IN THE SOFTWARE. */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../std_transforms_sve.hpp" @@ -80,4 +80,4 @@ public: } // namespace arm_gemm #undef ARGLIST -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/generic.cpp index 7cc03bbfb5..c151179a1f 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/generic.cpp @@ -21,7 +21,7 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "arm_gemm.hpp" #include "../../utils.hpp" @@ -3094,4 +3094,4 @@ void sve_hybrid_fp16_mla_6x4VL ( } } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL.hpp index b696e73637..4c0a3a11e0 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL.hpp @@ -22,7 +22,7 @@ * IN THE SOFTWARE. */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../std_transforms_sve.hpp" @@ -80,4 +80,4 @@ public: } // namespace arm_gemm #undef ARGLIST -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL/generic.cpp index dee9a107ff..25d65826b9 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL/generic.cpp @@ -21,7 +21,7 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "arm_gemm.hpp" #include "../../utils.hpp" @@ -2152,4 +2152,4 @@ void sve_hybrid_fp32_mla_6x4VL ( } } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL.hpp index 2273d97d5f..87f063d224 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL.hpp @@ -22,7 +22,7 @@ * IN THE SOFTWARE. */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../std_transforms_sve.hpp" @@ -80,4 +80,4 @@ public: } // namespace arm_gemm #undef ARGLIST -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL/generic.cpp index 863325f7f5..943e0ac148 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL/generic.cpp @@ -21,7 +21,7 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "arm_gemm.hpp" #include "../../utils.hpp" @@ -1616,4 +1616,4 @@ void sve_hybrid_fp32_mla_8x1VL ( } } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL.hpp index bc93ced25b..c278b3fc6b 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL.hpp @@ -22,7 +22,7 @@ * IN THE SOFTWARE. */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../std_transforms_sve.hpp" @@ -80,4 +80,4 @@ public: } // namespace arm_gemm #undef ARGLIST -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL/generic.cpp index 50b9ba524d..8a7465ba6b 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL/generic.cpp @@ -21,7 +21,7 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "arm_gemm.hpp" #include "../../utils.hpp" @@ -1529,4 +1529,4 @@ void sve_hybrid_s8qa_dot_4x4VL ( } } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL.hpp index 61927236ad..57056b4c2a 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL.hpp @@ -22,7 +22,7 @@ * IN THE SOFTWARE. */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../std_transforms_sve.hpp" @@ -80,4 +80,4 @@ public: } // namespace arm_gemm #undef ARGLIST -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL/generic.cpp index f901a814f9..0328c107e2 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL/generic.cpp @@ -21,7 +21,7 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "arm_gemm.hpp" #include "../../utils.hpp" @@ -2665,4 +2665,4 @@ void sve_hybrid_s8qs_dot_6x4VL ( } } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL.hpp index b2c376196f..37258978d3 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL.hpp @@ -22,7 +22,7 @@ * IN THE SOFTWARE. */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../std_transforms_sve.hpp" @@ -80,4 +80,4 @@ public: } // namespace arm_gemm #undef ARGLIST -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL/generic.cpp index 8862b3665a..9cddee941e 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL/generic.cpp @@ -21,7 +21,7 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "arm_gemm.hpp" #include "../../utils.hpp" @@ -1819,4 +1819,4 @@ void sve_hybrid_s8s32_dot_6x4VL ( } } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL.hpp index cfb8adfc87..3de8d178cd 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL.hpp @@ -22,7 +22,7 @@ * IN THE SOFTWARE. */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../std_transforms_sve.hpp" @@ -80,4 +80,4 @@ public: } // namespace arm_gemm #undef ARGLIST -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL/generic.cpp index 373d82930b..0bfc28776f 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL/generic.cpp @@ -21,7 +21,7 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "arm_gemm.hpp" #include "../../utils.hpp" @@ -1529,4 +1529,4 @@ void sve_hybrid_u8qa_dot_4x4VL ( } } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL.hpp index 4ea1d17c4e..a2883bfa30 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL.hpp @@ -22,7 +22,7 @@ * IN THE SOFTWARE. */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../std_transforms_sve.hpp" @@ -80,4 +80,4 @@ public: } // namespace arm_gemm #undef ARGLIST -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL/generic.cpp index 97f6665d85..413bc65288 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL/generic.cpp @@ -21,7 +21,7 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "arm_gemm.hpp" #include "../../utils.hpp" @@ -1819,4 +1819,4 @@ void sve_hybrid_u8u32_dot_6x4VL ( } } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL.hpp index 12bb758b68..d717b745c9 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL.hpp @@ -23,7 +23,7 @@ */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../bfloat.hpp" #include "../std_transforms_sve.hpp" @@ -69,4 +69,4 @@ public: } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL/generic.cpp index adee900337..4f774b133f 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL/generic.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../../bfloat.hpp" #include "../../asmlib.hpp" @@ -326,4 +326,4 @@ void sve_interleaved_bf16fp32_dot_8x3VL(const bfloat16 *Apanel, const bfloat16 * } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL.hpp index 2889dd7f0f..b7fc515341 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL.hpp @@ -23,7 +23,7 @@ */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../bfloat.hpp" #include "../std_transforms_sve.hpp" @@ -69,4 +69,4 @@ public: } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL/generic.cpp index e43404e608..c720942140 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL/generic.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../../bfloat.hpp" #include "../../asmlib.hpp" @@ -394,4 +394,4 @@ void sve_interleaved_bf16fp32_mmla_8x3VL(const bfloat16 *Apanel, const bfloat16 } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL.hpp index eb946d9dfa..b797b8bec1 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL.hpp @@ -23,7 +23,7 @@ */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../std_transforms_sve.hpp" @@ -69,4 +69,4 @@ public: } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL/generic.cpp index 46b8770409..0f1937acc5 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL/generic.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../../asmlib.hpp" @@ -316,4 +316,4 @@ void sve_interleaved_fp16_mla_8x3VL(const __fp16 *Apanel, const __fp16 *Bpanel, } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL.hpp index b84ba83b6a..f4bb809fe8 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL.hpp @@ -23,7 +23,7 @@ */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../std_transforms_sve.hpp" @@ -69,4 +69,4 @@ public: } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL/generic.cpp index 1e05a308b5..10feaa130b 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL/generic.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../../asmlib.hpp" @@ -325,4 +325,4 @@ void sve_interleaved_fp32_mla_8x3VL(const float *Apanel, const float *Bpanel, fl } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL.hpp index 96216960ff..a355262fe2 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL.hpp @@ -23,7 +23,7 @@ */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../std_transforms_sve.hpp" @@ -69,4 +69,4 @@ public: } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL/generic.cpp index 39daf0ff20..a985a91b90 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL/generic.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../../asmlib.hpp" @@ -394,4 +394,4 @@ void sve_interleaved_fp32_mmla_8x3VL(const float *Apanel, const float *Bpanel, f } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL.hpp index 3e16915cd4..aa6d9e7ec8 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL.hpp @@ -23,7 +23,7 @@ */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include #include "../std_transforms_sve.hpp" @@ -70,4 +70,4 @@ public: } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL/generic.cpp index 674c2400bf..01c0f8cddc 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL/generic.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include #include "../../asmlib.hpp" @@ -326,4 +326,4 @@ void sve_interleaved_s8s32_dot_8x3VL(const int8_t *Apanel, const int8_t *Bpanel, } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL.hpp index 02b3451c54..671946b262 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL.hpp @@ -23,7 +23,7 @@ */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include #include "../std_transforms_sve.hpp" @@ -70,4 +70,4 @@ public: } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL/generic.cpp index 578aa01732..9420210aae 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL/generic.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include #include "../../asmlib.hpp" @@ -394,4 +394,4 @@ void sve_interleaved_s8s32_mmla_8x3VL(const int8_t *Apanel, const int8_t *Bpanel } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL.hpp index 832a224199..7d39485164 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL.hpp @@ -23,7 +23,7 @@ */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include #include "../std_transforms_sve.hpp" @@ -70,4 +70,4 @@ public: } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL/generic.cpp index 891869c767..2139bab69d 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL/generic.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include #include "../../asmlib.hpp" @@ -326,4 +326,4 @@ void sve_interleaved_u8u32_dot_8x3VL(const uint8_t *Apanel, const uint8_t *Bpane } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL.hpp index 4fdaab84bd..ca9cadd6d7 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL.hpp @@ -23,7 +23,7 @@ */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include #include "../std_transforms_sve.hpp" @@ -70,4 +70,4 @@ public: } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL/generic.cpp index fa08a9d091..d42385789c 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL/generic.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include #include "../../asmlib.hpp" @@ -394,4 +394,4 @@ void sve_interleaved_u8u32_mmla_8x3VL(const uint8_t *Apanel, const uint8_t *Bpan } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL.hpp index 2097d76a54..ab225589e1 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL.hpp @@ -23,7 +23,7 @@ */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE @@ -85,4 +85,4 @@ public: } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL/generic.cpp index e07cfa8218..cdad98c5f1 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL/generic.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include @@ -18804,4 +18804,4 @@ void sve_smallK_hybrid_fp32_mla_8x1VL(const float *A, int lda, const float *B, f } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL.hpp index e50c05ba39..e735567e95 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL.hpp @@ -23,7 +23,7 @@ */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include @@ -85,4 +85,4 @@ public: } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp index 98004e98a5..cd01411722 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include @@ -8968,4 +8968,4 @@ void sve_smallK_hybrid_s8s32_dot_8x1VL(const int8_t *A, int lda, const int8_t *B } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL.hpp index 60184be043..25dd10019d 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL.hpp @@ -23,7 +23,7 @@ */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include @@ -85,4 +85,4 @@ public: } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp index 6a8553216b..99a287b4f5 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include @@ -8968,4 +8968,4 @@ void sve_smallK_hybrid_u8u32_dot_8x1VL(const uint8_t *A, int lda, const uint8_t } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/mergeresults-sve.cpp b/src/core/NEON/kernels/arm_gemm/mergeresults-sve.cpp new file mode 100644 index 0000000000..77d86b7dd8 --- /dev/null +++ b/src/core/NEON/kernels/arm_gemm/mergeresults-sve.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* As some of the merges need these headers, but are all included in the + * arm_gemm namespace, put these headers here. */ +#include + +#include + +#include "arm_gemm.hpp" +#include "asmlib.hpp" +#include "utils.hpp" + +#include "mergeresults.hpp" + +namespace arm_gemm { + +#include "merges/list-sve.hpp" + +} // namespace arm_gemm \ No newline at end of file diff --git a/src/core/NEON/kernels/arm_gemm/mergeresults.cpp b/src/core/NEON/kernels/arm_gemm/mergeresults.cpp index 17566db375..bbfe8f23d9 100644 --- a/src/core/NEON/kernels/arm_gemm/mergeresults.cpp +++ b/src/core/NEON/kernels/arm_gemm/mergeresults.cpp @@ -37,9 +37,13 @@ namespace arm_gemm { template void MergeResults(Tout * out, const Tin * in, int ldc, int y0, int ymax, int x0, int xmax, const Tout *bias, Activation act, bool append) { + // NOTE: The following code is disabled to avoid calling get_vector_length(), so templated MergeResults will not + // be correct for SVE cases. This is OK as we have specialisations for all needed SVE cases anyway. + // // For SVE cases, multiply the width up by the vector length. // Use the *input* type to determine this, since this will be what the kernel operated on. - const int width = twidth * (sve ? get_vector_length() : 1); + // const int width = twidth * (sve ? get_vector_length() : 1); + const int width = twidth; const int full_y_blocks = (ymax - y0) / height; const int y_remainder = (ymax - y0) % height; diff --git a/src/core/NEON/kernels/arm_gemm/merges/list-sve.hpp b/src/core/NEON/kernels/arm_gemm/merges/list-sve.hpp new file mode 100644 index 0000000000..aded4b3b8c --- /dev/null +++ b/src/core/NEON/kernels/arm_gemm/merges/list-sve.hpp @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "sve_merge_fp16_3VLx8.hpp" +#include "sve_merge_fp32_3VLx8.hpp" +#include "sve_merge_s32_3VLx8.hpp" +#include "sve_merge_u32_3VLx8.hpp" \ No newline at end of file diff --git a/src/core/NEON/kernels/arm_gemm/merges/list.hpp b/src/core/NEON/kernels/arm_gemm/merges/list.hpp index 825c2fd020..dae874ef94 100644 --- a/src/core/NEON/kernels/arm_gemm/merges/list.hpp +++ b/src/core/NEON/kernels/arm_gemm/merges/list.hpp @@ -27,8 +27,4 @@ #include "a64_merge_s32_12x8.hpp" #include "a64_merge_s32_4x4.hpp" #include "a64_merge_u32_12x8.hpp" -#include "a64_merge_u32_4x4.hpp" -#include "sve_merge_fp16_3VLx8.hpp" -#include "sve_merge_fp32_3VLx8.hpp" -#include "sve_merge_s32_3VLx8.hpp" -#include "sve_merge_u32_3VLx8.hpp" +#include "a64_merge_u32_4x4.hpp" \ No newline at end of file diff --git a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp16_3VLx8.hpp b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp16_3VLx8.hpp index cf1d10329b..4da32b459c 100644 --- a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp16_3VLx8.hpp +++ b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp16_3VLx8.hpp @@ -23,7 +23,7 @@ */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE template<> void MergeResults<3, 8, true>(__fp16 *out, const __fp16 *in, const int ldout, const int y0, const int ymax, const int x0, const int xmax, const __fp16 *bias, Activation act, bool append) @@ -1872,4 +1872,4 @@ void MergeResults<3, 8, true>(__fp16 *out, const __fp16 *in, const int ldout, co } } -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp32_3VLx8.hpp b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp32_3VLx8.hpp index b0d10c085d..5505f1efe4 100644 --- a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp32_3VLx8.hpp +++ b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp32_3VLx8.hpp @@ -23,7 +23,7 @@ */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE template<> void MergeResults<3, 8, true>(float *out, const float *in, const int ldout, const int y0, const int ymax, const int x0, const int xmax, const float *bias, Activation act, bool append) @@ -1872,4 +1872,4 @@ void MergeResults<3, 8, true>(float *out, const float *in, const int ldout, cons } } -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_s32_3VLx8.hpp b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_s32_3VLx8.hpp index 34b6fe3ef5..c009881254 100644 --- a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_s32_3VLx8.hpp +++ b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_s32_3VLx8.hpp @@ -23,7 +23,7 @@ */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE template<> void MergeResults<3, 8, true>(int32_t *out, const int32_t *in, const int ldout, const int y0, const int ymax, const int x0, const int xmax, const int32_t *bias, Activation , bool append) @@ -1394,4 +1394,4 @@ void MergeResults<3, 8, true>(int32_t *out, const int32_t *in, const int ldout, } } -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_u32_3VLx8.hpp b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_u32_3VLx8.hpp index c4b2bb56d6..e992f6722c 100644 --- a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_u32_3VLx8.hpp +++ b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_u32_3VLx8.hpp @@ -23,7 +23,7 @@ */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE template<> void MergeResults<3, 8, true>(uint32_t *out, const uint32_t *in, const int ldout, const int y0, const int ymax, const int x0, const int xmax, const uint32_t *bias, Activation , bool append) @@ -1394,4 +1394,4 @@ void MergeResults<3, 8, true>(uint32_t *out, const uint32_t *in, const int ldout } } -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/utils.hpp b/src/core/NEON/kernels/arm_gemm/utils.hpp index 1269ef62a6..e648ce2fb5 100644 --- a/src/core/NEON/kernels/arm_gemm/utils.hpp +++ b/src/core/NEON/kernels/arm_gemm/utils.hpp @@ -141,52 +141,36 @@ struct IndirectInputArg { }; namespace utils { -namespace { - -#ifdef __ARM_FEATURE_SVE -template -inline unsigned long get_vector_length_sz() { - unsigned long v; - - __asm ( - "cntb %0" - : "=r" (v) - ); - - return v / sz; -} - -#define VEC_LEN_SPEC(sz, opcode) template <> inline unsigned long get_vector_length_sz() { unsigned long v; __asm ( opcode " %0" : "=r" (v)); return v; } - -VEC_LEN_SPEC(8, "cntd") -VEC_LEN_SPEC(4, "cntw") -VEC_LEN_SPEC(2, "cnth") -VEC_LEN_SPEC(1, "cntb") -#endif - -} // anonymous namespace - template inline unsigned long get_vector_length() { -#ifdef __ARM_FEATURE_SVE - return get_vector_length_sz(); -#else +#if defined(ARM_COMPUTE_ENABLE_SVE) + uint64_t vl; + + __asm __volatile ( + ".inst 0x0420e3e0\n" // CNTB X0, ALL, MUL #1 + "mov %0, X0\n" + : "=r" (vl) + : + : "x0" + ); + + return vl / sizeof(T); +#else // !defined(ARM_COMPUTE_ENABLE_SVE) return 16 / sizeof(T); -#endif +#endif // defined(ARM_COMPUTE_ENABLE_SVE) } template inline unsigned long get_vector_length(VLType vl_type) { switch (vl_type) { -#ifdef __ARM_FEATURE_SVE +#if defined(ARM_COMPUTE_ENABLE_SVE) case VLType::SVE: - return get_vector_length_sz(); -#endif + return get_vector_length(); +#endif // defined(ARM_COMPUTE_ENABLE_SVE) default: return 16 / sizeof(T); } } - } // utils namespace } // arm_gemm namespace diff --git a/src/core/NEON/kernels/batchnormalization/impl/SVE/fp16.cpp b/src/core/NEON/kernels/batchnormalization/impl/SVE/fp16.cpp index a715b9d3ee..c7cfd7457d 100644 --- a/src/core/NEON/kernels/batchnormalization/impl/SVE/fp16.cpp +++ b/src/core/NEON/kernels/batchnormalization/impl/SVE/fp16.cpp @@ -29,7 +29,7 @@ #include #include -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #include namespace arm_compute diff --git a/src/core/NEON/kernels/batchnormalization/impl/SVE/fp32.cpp b/src/core/NEON/kernels/batchnormalization/impl/SVE/fp32.cpp index 7cc570d8aa..b8a540158b 100644 --- a/src/core/NEON/kernels/batchnormalization/impl/SVE/fp32.cpp +++ b/src/core/NEON/kernels/batchnormalization/impl/SVE/fp32.cpp @@ -29,7 +29,7 @@ #include #include -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #include namespace arm_compute diff --git a/src/core/NEON/wrapper/svtraits.h b/src/core/NEON/wrapper/svtraits.h index 8d2d660659..1d599a246c 100644 --- a/src/core/NEON/wrapper/svtraits.h +++ b/src/core/NEON/wrapper/svtraits.h @@ -23,7 +23,7 @@ */ #ifndef SRC_CORE_NEON_WRAPPER_SVTRAITS_H #define SRC_CORE_NEON_WRAPPER_SVTRAITS_H -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #include "src/core/NEON/SVEMath.h" #include @@ -66,5 +66,5 @@ DEFINE_TYPES(bfloat16_t) } // namespace wrapper } // namespace arm_compute -#endif /* defined(ENABLE_SVE) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ #endif /* #ifndef SRC_CORE_NEON_WRAPPER_SVTRAITS_H */ diff --git a/src/core/NEON/wrapper/traits.h b/src/core/NEON/wrapper/traits.h index 81685140f1..ebb64d9d76 100644 --- a/src/core/NEON/wrapper/traits.h +++ b/src/core/NEON/wrapper/traits.h @@ -26,9 +26,9 @@ #include -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #include -#endif /* defined(ENABLE_SVE) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ namespace arm_compute { @@ -116,13 +116,13 @@ template <> struct neon_bitvector{ using type = float #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) /** Create the appropriate SVE vector given its type */ template struct sve_vector; template <> struct sve_vector{ using scalar_type = uint8_t; using type = svuint8_t; }; template <> struct sve_vector{ using scalar_type = int8_t; using type = svint8_t; }; -#endif /* defined(ENABLE_SVE) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ #endif /* DOXYGEN_SKIP_THIS */ diff --git a/src/core/common/Registrars.h b/src/core/common/Registrars.h index 44ddf9808d..65f6c7093d 100644 --- a/src/core/common/Registrars.h +++ b/src/core/common/Registrars.h @@ -26,17 +26,17 @@ #if defined(ENABLE_FP16_KERNELS) -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #define REGISTER_FP16_SVE(func_name) &(func_name) -#else /* !defined(ENABLE_SVE) */ +#else /* !defined(ARM_COMPUTE_ENABLE_SVE) */ #define REGISTER_FP16_SVE(func_name) nullptr -#endif /* defined(ENABLE_SVE) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ -#if defined(ENABLE_NEON) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +#if defined(ARM_COMPUTE_ENABLE_NEON) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) #define REGISTER_FP16_NEON(func_name) &(func_name) -#else /* !defined(ENABLE_NEON) */ +#else /* !defined(ARM_COMPUTE_ENABLE_NEON) */ #define REGISTER_FP16_NEON(func_name) nullptr -#endif /* defined(ENABLE_NEON) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */ +#endif /* defined(ARM_COMPUTE_ENABLE_NEON) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */ #else /* !defined(ENABLE_FP16_KERNELS) */ #define REGISTER_FP16_NEON(func_name) nullptr @@ -45,17 +45,17 @@ #if defined(ENABLE_FP32_KERNELS) -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #define REGISTER_FP32_SVE(func_name) &(func_name) -#else /* !defined(ENABLE_SVE) */ +#else /* !defined(ARM_COMPUTE_ENABLE_SVE) */ #define REGISTER_FP32_SVE(func_name) nullptr -#endif /* defined(ENABLE_SVE) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ -#if defined(ENABLE_NEON) +#if defined(ARM_COMPUTE_ENABLE_NEON) #define REGISTER_FP32_NEON(func_name) &(func_name) -#else /* !defined(ENABLE_NEON) */ +#else /* !defined(ARM_COMPUTE_ENABLE_NEON) */ #define REGISTER_FP32_NEON(func_name) nullptr -#endif /* defined(ENABLE_NEON) */ +#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */ #else /* defined(ENABLE_FP32_KERNELS) */ #define REGISTER_FP32_NEON(func_name) nullptr @@ -66,11 +66,11 @@ #define REGISTER_QASYMM8_SIGNED_NEON(func_name) &(func_name) -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #define REGISTER_QASYMM8_SIGNED_SVE(func_name) &(func_name) -#else /* !defined(ENABLE_SVE) */ +#else /* !defined(ARM_COMPUTE_ENABLE_SVE) */ #define REGISTER_QASYMM8_SIGNED_SVE(func_name) nullptr -#endif /* defined(ENABLE_SVE) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ #else /* defined(ENABLE_QASYMM8_SIGNED_KERNELS) */ #define REGISTER_QASYMM8_SIGNED_NEON(func_name) nullptr @@ -80,11 +80,11 @@ #if defined(ENABLE_QASYMM8_KERNELS) #define REGISTER_QASYMM8_NEON(func_name) &(func_name) -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #define REGISTER_QASYMM8_SVE(func_name) &(func_name) -#else /* !defined(ENABLE_SVE) */ +#else /* !defined(ARM_COMPUTE_ENABLE_SVE) */ #define REGISTER_QASYMM8_SVE(func_name) nullptr -#endif /* defined(ENABLE_SVE) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ #else /* defined(ENABLE_QASYMM8_KERNELS) */ #define REGISTER_QASYMM8_NEON(func_name) nullptr @@ -95,11 +95,11 @@ #define REGISTER_QSYMM16_NEON(func_name) &(func_name) -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #define REGISTER_QSYMM16_SVE(func_name) &(func_name) -#else /* !defined(ENABLE_SVE) */ +#else /* !defined(ARM_COMPUTE_ENABLE_SVE) */ #define REGISTER_QSYMM16_SVE(func_name) nullptr -#endif /* defined(ENABLE_SVE) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ #else /* defined(ENABLE_QSYMM16_KERNELS) */ #define REGISTER_QSYMM16_NEON(func_name) nullptr @@ -108,17 +108,17 @@ #if defined(ENABLE_INTEGER_KERNELS) -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #define REGISTER_INTEGER_SVE(func_name) &(func_name) -#else /* !defined(ENABLE_SVE) */ +#else /* !defined(ARM_COMPUTE_ENABLE_SVE) */ #define REGISTER_INTEGER_SVE(func_name) nullptr -#endif /* defined(ENABLE_SVE) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ -#if defined(ENABLE_NEON) +#if defined(ARM_COMPUTE_ENABLE_NEON) #define REGISTER_INTEGER_NEON(func_name) &(func_name) -#else /* !defined(ENABLE_NEON) */ +#else /* !defined(ARM_COMPUTE_ENABLE_NEON) */ #define REGISTER_INTEGER_NEON(func_name) nullptr -#endif /* defined(ENABLE_NEON) */ +#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */ #else /* defined(ENABLE_INTEGER_KERNELS) */ #define REGISTER_INTEGER_NEON(func_name) nullptr diff --git a/src/core/cpu/kernels/CpuActivationKernel.cpp b/src/core/cpu/kernels/CpuActivationKernel.cpp index 8a57a3b529..24642f1efb 100644 --- a/src/core/cpu/kernels/CpuActivationKernel.cpp +++ b/src/core/cpu/kernels/CpuActivationKernel.cpp @@ -45,7 +45,8 @@ namespace { struct ActivationSelectorData { - DataType dt; + DataType dt; + const CPUInfo &ci; }; using ActivationSelectorPtr = std::add_pointer::type; @@ -60,19 +61,19 @@ struct ActivationKernel static const ActivationKernel available_kernels[] = { -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) { "fp16_sve_activation", - [](const ActivationSelectorData & data) { return data.dt == DataType::F16; }, + [](const ActivationSelectorData & data) { return data.dt == DataType::F16 && data.ci.has_sve(); }, REGISTER_FP16_SVE(arm_compute::cpu::fp16_sve_activation) }, { "fp32_sve_activation", - [](const ActivationSelectorData & data) { return data.dt == DataType::F32; }, + [](const ActivationSelectorData & data) { return data.dt == DataType::F32 && data.ci.has_sve(); }, REGISTER_FP32_SVE(arm_compute::cpu::fp32_sve_activation) }, -#endif /* defined(ENABLE_SVE) */ -#if defined(ENABLE_NEON) +#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ +#if defined(ARM_COMPUTE_ENABLE_NEON) { "fp16_neon_activation", [](const ActivationSelectorData & data) { return data.dt == DataType::F16; }, @@ -83,24 +84,24 @@ static const ActivationKernel available_kernels[] = [](const ActivationSelectorData & data) { return data.dt == DataType::F32; }, REGISTER_FP32_NEON(arm_compute::cpu::fp32_neon_activation) }, -#endif /* defined(ENABLE_NEON) */ -#if defined(__ARM_FEATURE_SVE2) +#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */ +#if defined(ARM_COMPUTE_ENABLE_SVE2) { "qasymm8_sve_activation", - [](const ActivationSelectorData & data) { return data.dt == DataType::QASYMM8; }, + [](const ActivationSelectorData & data) { return data.dt == DataType::QASYMM8 && data.ci.has_sve2(); }, REGISTER_QASYMM8_SVE(arm_compute::cpu::qasymm8_sve_activation) }, { "qasymm8_signed_sve_activation", - [](const ActivationSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; }, + [](const ActivationSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED && data.ci.has_sve2(); }, REGISTER_QASYMM8_SIGNED_SVE(arm_compute::cpu::qasymm8_signed_sve_activation) }, { "qsymm16_sve_activation", - [](const ActivationSelectorData & data) { return data.dt == DataType::QSYMM16; }, + [](const ActivationSelectorData & data) { return data.dt == DataType::QSYMM16 && data.ci.has_sve2(); }, REGISTER_QSYMM16_SVE(arm_compute::cpu::qsymm16_sve_activation) }, -#else /* !defined(__ARM_FEATURE_SVE2) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ { "qasymm8_neon_activation", [](const ActivationSelectorData & data) { return data.dt == DataType::QASYMM8; }, @@ -116,7 +117,6 @@ static const ActivationKernel available_kernels[] = [](const ActivationSelectorData & data) { return data.dt == DataType::QSYMM16; }, REGISTER_QSYMM16_NEON(arm_compute::cpu::qsymm16_neon_activation) }, -#endif /* defined(__ARM_FEATURE_SVE2) */ }; const ActivationKernel *get_implementation(const ActivationSelectorData &data) @@ -155,7 +155,7 @@ Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst, const ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::QSYMM16, DataType::F16, DataType::F32); - const auto *uk = get_implementation(ActivationSelectorData{ src->data_type() }); + const auto *uk = get_implementation(ActivationSelectorData{ src->data_type(), CPUInfo::get() }); ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr); const DataType data_type = src->data_type(); @@ -243,7 +243,7 @@ void CpuActivationKernel::run_op(ITensorPack &tensors, const Window &window, con const ITensor *src = tensors.get_const_tensor(TensorType::ACL_SRC); ITensor *dst = tensors.get_tensor(TensorType::ACL_DST); - const auto *uk = get_implementation(ActivationSelectorData{ src->info()->data_type() }); + const auto *uk = get_implementation(ActivationSelectorData{ src->info()->data_type(), CPUInfo::get() }); uk->ukernel(src, dst, _act_info, window); } diff --git a/src/core/cpu/kernels/CpuAddKernel.cpp b/src/core/cpu/kernels/CpuAddKernel.cpp index 7afdceae38..8d74b4027b 100644 --- a/src/core/cpu/kernels/CpuAddKernel.cpp +++ b/src/core/cpu/kernels/CpuAddKernel.cpp @@ -45,9 +45,15 @@ namespace { struct AddSelectorData { - DataType dt1; - DataType dt2; - DataType dt3; + /* Data types for all ITensorInfos: + dt1 -> src0 + dt2 -> src1 + dt3 -> dst + */ + DataType dt1; + DataType dt2; + DataType dt3; + const CPUInfo &ci; }; using AddSelectorPtr = std::add_pointer::type; @@ -61,49 +67,99 @@ struct AddKernel static const AddKernel available_kernels[] = { -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE2) + { + "add_qasymm8_sve", + [](const AddSelectorData & data) + { + return ((data.dt1 == data.dt2) && (data.dt1 == DataType::QASYMM8)) && data.ci.has_sve(); + }, + REGISTER_QASYMM8_SVE(arm_compute::cpu::add_qasymm8_sve) + }, + { + "add_qasymm8_signed_sve", + [](const AddSelectorData & data) + { + return ((data.dt1 == data.dt2) && (data.dt1 == DataType::QASYMM8_SIGNED)) && data.ci.has_sve(); + }, + REGISTER_QASYMM8_SIGNED_SVE(arm_compute::cpu::add_qasymm8_signed_sve) + }, + { + "add_qsymm16_sve", + [](const AddSelectorData & data) + { + return ((data.dt1 == data.dt2) && (data.dt1 == DataType::QSYMM16)) && data.ci.has_sve(); + }, + REGISTER_QSYMM16_SVE(arm_compute::cpu::add_qsymm16_sve) + }, +#endif /* !defined(ARM_COMPUTE_ENABLE_SVE2) */ +#if defined(ARM_COMPUTE_ENABLE_SVE) { "add_same_sve", - [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == DataType::F32)); }, + [](const AddSelectorData & data) + { + return ((data.dt1 == data.dt2) && (data.dt1 == DataType::F32)) && data.ci.has_sve(); + }, REGISTER_FP32_SVE(arm_compute::cpu::add_same_sve) }, { "add_same_sve", - [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == DataType::F16)); }, + [](const AddSelectorData & data) + { + return ((data.dt1 == data.dt2) && (data.dt1 == DataType::F16)) && data.ci.has_sve(); + }, REGISTER_FP16_SVE(arm_compute::cpu::add_same_sve) }, { "add_same_sve", - [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == data.dt3) && (data.dt1 == DataType::U8)); }, + [](const AddSelectorData & data) + { + return ((data.dt1 == data.dt2) && (data.dt1 == data.dt3) && (data.dt1 == DataType::U8)) && data.ci.has_sve(); + }, REGISTER_INTEGER_SVE(arm_compute::cpu::add_same_sve) }, { "add_same_sve", - [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == data.dt3) && (data.dt1 == DataType::S16)); }, + [](const AddSelectorData & data) + { + return ((data.dt1 == data.dt2) && (data.dt1 == data.dt3) && (data.dt1 == DataType::S16)) && data.ci.has_sve(); + }, REGISTER_INTEGER_SVE(arm_compute::cpu::add_same_sve) }, { "add_same_sve", - [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == data.dt3) && (data.dt1 == DataType::S32)); }, + [](const AddSelectorData & data) + { + return ((data.dt1 == data.dt2) && (data.dt1 == data.dt3) && (data.dt1 == DataType::S32)) && data.ci.has_sve(); + }, REGISTER_INTEGER_SVE(arm_compute::cpu::add_same_sve) }, { "add_u8_s16_s16_sve", - [](const AddSelectorData & data) { return ((data.dt1 == DataType::U8) && (data.dt2 == DataType::S16)); }, + [](const AddSelectorData & data) + { + return ((data.dt1 == DataType::U8) && (data.dt2 == DataType::S16)) && data.ci.has_sve(); + }, REGISTER_INTEGER_SVE(arm_compute::cpu::add_u8_s16_s16_sve) }, { "add_s16_u8_s16_sve", - [](const AddSelectorData & data) { return ((data.dt1 == DataType::S16) && (data.dt2 == DataType::U8)); }, + [](const AddSelectorData & data) + { + return ((data.dt1 == DataType::S16) && (data.dt2 == DataType::U8)) && data.ci.has_sve(); + }, REGISTER_INTEGER_SVE(arm_compute::cpu::add_s16_u8_s16_sve) }, { "add_u8_u8_s16_sve", - [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt3 == DataType::S16)); }, + [](const AddSelectorData & data) + { + return ((data.dt1 == data.dt2) && (data.dt3 == DataType::S16)) && data.ci.has_sve(); + }, REGISTER_INTEGER_SVE(arm_compute::cpu::add_u8_u8_s16_sve) }, -#endif /* defined(ENABLE_SVE) */ -#if defined(ENABLE_NEON) +#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ +#if defined(ARM_COMPUTE_ENABLE_NEON) { "add_same_neon", [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == DataType::F32)); }, @@ -112,7 +168,10 @@ static const AddKernel available_kernels[] = #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) { "add_same_neon", - [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == DataType::F16)); }, + [](const AddSelectorData & data) + { + return ((data.dt1 == data.dt2) && (data.dt1 == DataType::F16)) && data.ci.has_fp16(); + }, REGISTER_FP16_NEON(arm_compute::cpu::add_same_neon) }, #endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */ @@ -146,24 +205,8 @@ static const AddKernel available_kernels[] = [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt3 == DataType::S16)); }, REGISTER_INTEGER_NEON(arm_compute::cpu::add_u8_u8_s16_neon) }, -#endif /* defined(ENABLE_NEON) */ -#if defined(__ARM_FEATURE_SVE2) - { - "add_qasymm8_sve", - [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == DataType::QASYMM8)); }, - REGISTER_QASYMM8_SVE(arm_compute::cpu::add_qasymm8_sve) - }, - { - "add_qasymm8_signed_sve", - [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == DataType::QASYMM8_SIGNED)); }, - REGISTER_QASYMM8_SIGNED_SVE(arm_compute::cpu::add_qasymm8_signed_sve) - }, - { - "add_qsymm16_sve", - [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == DataType::QSYMM16)); }, - REGISTER_QSYMM16_SVE(arm_compute::cpu::add_qsymm16_sve) - }, -#else /* !defined(__ARM_FEATURE_SVE2) */ +#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */ +#if defined(ARM_COMPUTE_ENABLE_NEON) || defined(ARM_COMPUTE_ENABLE_SVE) { "add_qasymm8_neon", [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == DataType::QASYMM8)); }, @@ -179,8 +222,7 @@ static const AddKernel available_kernels[] = [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == DataType::QSYMM16)); }, REGISTER_QSYMM16_NEON(arm_compute::cpu::add_qsymm16_neon) }, -#endif /* defined(ENABLE_NEON) */ - +#endif /* defined(ARM_COMPUTE_ENABLE_NEON) || defined(ARM_COMPUTE_ENABLE_SVE) */ }; /** Micro-kernel selector @@ -189,11 +231,11 @@ static const AddKernel available_kernels[] = * * @return A matching micro-kernel else nullptr */ -const AddKernel *get_implementation(DataType dt1, DataType dt2, DataType dt3) +const AddKernel *get_implementation(const CPUInfo &cpuinfo, DataType dt1, DataType dt2, DataType dt3) { for(const auto &uk : available_kernels) { - if(uk.is_selected({ dt1, dt2, dt3 })) + if(uk.is_selected({ dt1, dt2, dt3, cpuinfo })) { return &uk; } @@ -241,7 +283,7 @@ Status validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, cons "Wrong shape for dst"); } - const auto *uk = get_implementation(src0.data_type(), src1.data_type(), dst.data_type()); + const auto *uk = get_implementation(CPUInfo::get(), src0.data_type(), src1.data_type(), dst.data_type()); ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr); return Status{}; @@ -327,7 +369,7 @@ void CpuAddKernel::run_op(ITensorPack &tensors, const Window &window, const Thre const ITensor *src1 = tensors.get_const_tensor(TensorType::ACL_SRC_1); ITensor *dst = tensors.get_tensor(TensorType::ACL_DST); - const auto *uk = get_implementation(src0->info()->data_type(), src1->info()->data_type(), dst->info()->data_type()); + const auto *uk = get_implementation(CPUInfo::get(), src0->info()->data_type(), src1->info()->data_type(), dst->info()->data_type()); ARM_COMPUTE_ERROR_ON(uk == nullptr || uk->ukernel == nullptr); uk->ukernel(src0, src1, dst, _policy, window); diff --git a/src/core/cpu/kernels/CpuElementwiseKernel.cpp b/src/core/cpu/kernels/CpuElementwiseKernel.cpp index 643a870540..dc0c5b210d 100644 --- a/src/core/cpu/kernels/CpuElementwiseKernel.cpp +++ b/src/core/cpu/kernels/CpuElementwiseKernel.cpp @@ -43,7 +43,13 @@ namespace kernels { namespace { -using ElementwiseSelector = std::add_pointer::type; +struct ElementwiseSelectorData +{ + DataType dt; + const CPUInfo &ci; +}; + +using ElementwiseSelector = std::add_pointer::type; using UKernelType = CpuElementwiseKernel::ElementwiseFunction; struct ElementwiseKernel { @@ -52,23 +58,6 @@ struct ElementwiseKernel UKernelType *ukernel; }; -template -inline bool is_selected(DataType data_type) -{ - return dt == data_type; -} - -template -static ElementwiseKernel generate_kernel(UKernelType *ukernel) -{ - std::string kernel_name("op_"); - kernel_name += string_from_data_type(input_data_type) + "_"; - kernel_name += string_from_data_type(input_data_type) + "_"; - kernel_name += string_from_data_type(output_data_type); - - return { kernel_name.c_str(), is_selected, ukernel }; -} - template std::function configure_arithm_func(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst) @@ -76,36 +65,85 @@ configure_arithm_func(const ITensorInfo *src0, const ITensorInfo *src1, ITensorI ARM_COMPUTE_UNUSED(src1, dst); static ElementwiseKernel kernels[] = { -#if defined(ENABLE_SVE) - generate_kernel(REGISTER_FP32_SVE((arm_compute::cpu::elementwise_arithmetic_op))), - generate_kernel(REGISTER_INTEGER_SVE((arm_compute::cpu::elementwise_arithmetic_op))), - generate_kernel(REGISTER_INTEGER_SVE((arm_compute::cpu::elementwise_arithmetic_op))), -#endif /* defined(ENABLE_SVE) */ -#if defined(ENABLE_NEON) - generate_kernel(REGISTER_FP32_NEON((arm_compute::cpu::elementwise_arithm_op>))), - generate_kernel(REGISTER_INTEGER_NEON((arm_compute::cpu::elementwise_arithm_op>))), -#endif /* defined(ENABLE_NEON) */ -#if defined(__ARM_FEATURE_SVE2) - generate_kernel(REGISTER_QASYMM8_SVE((arm_compute::cpu::elementwise_arithmetic_quantized_op))), - generate_kernel(REGISTER_QASYMM8_SIGNED_SVE((arm_compute::cpu::elementwise_arithmetic_quantized_op))), -#else /* !defined(__ARM_FEATURE_SVE2) */ - generate_kernel(REGISTER_QASYMM8_NEON((arm_compute::cpu::elementwise_arithm_op_quantized))), - generate_kernel(REGISTER_QASYMM8_SIGNED_NEON((arm_compute::cpu::elementwise_arithm_op_quantized_signed))), -#endif /* defined(__ARM_FEATURE_SVE2) */ -#if defined(ENABLE_SVE) - generate_kernel(REGISTER_FP16_SVE((arm_compute::cpu::elementwise_arithmetic_op))), -#endif /* defined(ENABLE_SVE) */ -#if defined(ENABLE_NEON) +#if defined(ARM_COMPUTE_ENABLE_SVE) + { + "sve_elementwise_fp32", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::F32 && data.ci.has_sve(); }, + REGISTER_FP32_SVE((arm_compute::cpu::elementwise_arithmetic_op)) + }, + { + "sve_elementwise_s32", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::S32 && data.ci.has_sve(); }, + REGISTER_INTEGER_SVE((arm_compute::cpu::elementwise_arithmetic_op)) + }, + { + "sve_elementwise_s16", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::S16 && data.ci.has_sve(); }, + REGISTER_INTEGER_SVE((arm_compute::cpu::elementwise_arithmetic_op)) + }, +#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ +#if defined(ARM_COMPUTE_ENABLE_NEON) + { + "neon_elementwise_f32", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::F32; }, + REGISTER_FP32_NEON((arm_compute::cpu::elementwise_arithm_op>)) + }, + { + "neon_elementwise_s32", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::S32; }, + REGISTER_INTEGER_NEON((arm_compute::cpu::elementwise_arithm_op>)) + }, +#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */ +#if defined(ARM_COMPUTE_ENABLE_SVE2) + { + "sve2_elementwise_qu8", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::QASYMM8 && data.ci.has_sve2(); }, + REGISTER_QASYMM8_SVE((arm_compute::cpu::elementwise_arithmetic_quantized_op)) + }, + { + "sve2_elementwise_qs8", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED && data.ci.has_sve2(); }, + REGISTER_QASYMM8_SIGNED_SVE((arm_compute::cpu::elementwise_arithmetic_quantized_op)) + }, +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ +#if defined(ARM_COMPUTE_ENABLE_NEON) || defined(ARM_COMPUTE_ENABLE_SVE) + { + "neon_elementwise_qu8", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::QASYMM8; }, + REGISTER_QASYMM8_NEON((arm_compute::cpu::elementwise_arithm_op_quantized)) + }, + { + "neon_elementwise_qs8", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; }, + REGISTER_QASYMM8_SIGNED_NEON((arm_compute::cpu::elementwise_arithm_op_quantized_signed)) + }, +#endif /* defined(ARM_COMPUTE_ENABLE_NEON) || defined(ARM_COMPUTE_ENABLE_SVE) */ +#if defined(ARM_COMPUTE_ENABLE_SVE) + { + "sve_elementwise_f16", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::F16 && data.ci.has_sve(); }, + REGISTER_FP16_SVE((arm_compute::cpu::elementwise_arithmetic_op)) + }, +#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ +#if defined(ARM_COMPUTE_ENABLE_NEON) #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) - generate_kernel(REGISTER_FP16_NEON((arm_compute::cpu::elementwise_arithm_op>))), + { + "neon_elementwise_f16", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::F16 && data.ci.has_fp16(); }, + REGISTER_FP16_NEON((arm_compute::cpu::elementwise_arithm_op>)) + }, #endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */ - generate_kernel(REGISTER_INTEGER_NEON((arm_compute::cpu::elementwise_arithm_op>))), -#endif /* defined(ENABLE_NEON) */ + { + "neon_elementwise_s16", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::S16; }, + REGISTER_INTEGER_NEON((arm_compute::cpu::elementwise_arithm_op>)) + }, +#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */ }; for(const auto &uk : kernels) { - if(uk.is_selected(src0->data_type())) + if(uk.is_selected({ src0->data_type(), CPUInfo::get() })) { return uk.ukernel; } @@ -121,36 +159,93 @@ configure_comp_func(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInf ARM_COMPUTE_UNUSED(src1, dst); static ElementwiseKernel kernels[] = { -#if defined(ENABLE_SVE) - generate_kernel(REGISTER_INTEGER_SVE((arm_compute::cpu::elementwise_comparison_op))), - generate_kernel(REGISTER_FP32_SVE((arm_compute::cpu::elementwise_comparison_op))), - generate_kernel(REGISTER_INTEGER_SVE((arm_compute::cpu::elementwise_comparison_op))), - generate_kernel(REGISTER_INTEGER_SVE((arm_compute::cpu::elementwise_comparison_op))), -#endif /* defined(ENABLE_SVE) */ -#if defined(ENABLE_NEON) - generate_kernel(REGISTER_INTEGER_NEON((arm_compute::cpu::elementwise_comp_op_8))), - generate_kernel(REGISTER_FP32_NEON((arm_compute::cpu::elementwise_comp_op_32))), - generate_kernel(REGISTER_INTEGER_NEON((arm_compute::cpu::elementwise_comp_op_16))), - generate_kernel(REGISTER_INTEGER_NEON((arm_compute::cpu::elementwise_comp_op_32))), -#endif /* defined(ENABLE_NEON) */ -#if defined(__ARM_FEATURE_SVE2) - generate_kernel(REGISTER_QASYMM8_SIGNED_SVE((arm_compute::cpu::elementwise_comparison_quantized_op))), - generate_kernel(REGISTER_QASYMM8_SVE((arm_compute::cpu::elementwise_comparison_quantized_op))), -#else /* !defined(__ARM_FEATURE_SVE2) */ - generate_kernel(REGISTER_QASYMM8_SIGNED_NEON((arm_compute::cpu::elementwise_comp_op_quantized_signed))), - generate_kernel(REGISTER_QASYMM8_NEON((arm_compute::cpu::elementwise_comp_op_quantized))), -#endif /* defined(__ARM_FEATURE_SVE2) */ -#if defined(ENABLE_SVE) - generate_kernel(REGISTER_FP16_SVE((arm_compute::cpu::elementwise_comparison_op))), -#endif /* defined(ENABLE_SVE) */ -#if defined(ENABLE_NEON) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) - generate_kernel(REGISTER_FP16_NEON((arm_compute::cpu::elementwise_comp_op_16))), -#endif /* defined(ENABLE_NEON) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */ +#if defined(ARM_COMPUTE_ENABLE_SVE) + { + "sve_comparison_u8", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::U8 && data.ci.has_sve(); }, + REGISTER_INTEGER_SVE((arm_compute::cpu::elementwise_comparison_op)) + }, + { + "sve_comparison_f32", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::F32 && data.ci.has_sve(); }, + REGISTER_FP32_SVE((arm_compute::cpu::elementwise_comparison_op)) + }, + { + "sve_comparison_s16", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::S16 && data.ci.has_sve(); }, + REGISTER_INTEGER_SVE((arm_compute::cpu::elementwise_comparison_op)) + }, + { + "sve_comparison_s32", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::S32 && data.ci.has_sve(); }, + REGISTER_INTEGER_SVE((arm_compute::cpu::elementwise_comparison_op)) + }, +#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ +#if defined(ARM_COMPUTE_ENABLE_NEON) + { + "neon_comparison_u8", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::U8; }, + REGISTER_INTEGER_NEON((arm_compute::cpu::elementwise_comp_op_8)) + }, + { + "neon_comparison_f32", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::F32; }, + REGISTER_FP32_NEON((arm_compute::cpu::elementwise_comp_op_32)) + }, + { + "neon_comparison_s16", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::S16; }, + REGISTER_INTEGER_NEON((arm_compute::cpu::elementwise_comp_op_16)) + }, + { + "neon_comparison_s32", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::S32; }, + REGISTER_INTEGER_NEON((arm_compute::cpu::elementwise_comp_op_32)) + }, +#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */ +#if defined(ARM_COMPUTE_ENABLE_SVE2) + { + "sve_comparison_qu8", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::QASYMM8 && data.ci.has_sve2(); }, + REGISTER_QASYMM8_SVE((arm_compute::cpu::elementwise_comparison_quantized_op)) + }, + { + "sve_comparison_qs8", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED && data.ci.has_sve2(); }, + REGISTER_QASYMM8_SIGNED_SVE((arm_compute::cpu::elementwise_comparison_quantized_op)) + }, +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ +#if defined(ARM_COMPUTE_ENABLE_NEON) || defined(ARM_COMPUTE_ENABLE_SVE) + { + "neon_comparison_qu8", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::QASYMM8; }, + REGISTER_QASYMM8_NEON((arm_compute::cpu::elementwise_comp_op_quantized)) + }, + { + "neon_comparison_qs8", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; }, + REGISTER_QASYMM8_SIGNED_NEON((arm_compute::cpu::elementwise_comp_op_quantized_signed)) + }, +#endif /* defined(ARM_COMPUTE_ENABLE_NEON) || defined(ARM_COMPUTE_ENABLE_SVE) */ +#if defined(ARM_COMPUTE_ENABLE_SVE) + { + "sve_comparison_f16", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::F16 && data.ci.has_sve(); }, + REGISTER_FP16_SVE((arm_compute::cpu::elementwise_comparison_op)) + }, +#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ +#if defined(ARM_COMPUTE_ENABLE_NEON) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + { + "neon_comparison_f16", + [](const ElementwiseSelectorData & data) { return data.dt == DataType::F16 && data.ci.has_fp16(); }, + REGISTER_FP16_NEON((arm_compute::cpu::elementwise_comp_op_16)) + }, +#endif /* defined(ARM_COMPUTE_ENABLE_NEON) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */ }; for(const auto &uk : kernels) { - if(uk.is_selected(src0->data_type())) + if(uk.is_selected({ src0->data_type(), CPUInfo::get() })) { return uk.ukernel; } diff --git a/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp b/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp index 2600a49b70..91fa75ebaf 100644 --- a/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp +++ b/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp @@ -54,7 +54,7 @@ struct ElementwiseUnaryKernel static const ElementwiseUnaryKernel available_kernels[] = { -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) { "fp32_sve_elementwise_unary", [](DataType dt) { return dt == DataType::F32; }, @@ -70,8 +70,8 @@ static const ElementwiseUnaryKernel available_kernels[] = [](DataType dt) { return dt == DataType::S32; }, REGISTER_INTEGER_SVE(arm_compute::cpu::elementwise_sve_op), }, -#endif // defined(ENABLE_SVE) -#if defined(ENABLE_NEON) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_NEON) { "fp32_neon_elementwise_unary", [](DataType dt) { return dt == DataType::F32; }, @@ -89,7 +89,7 @@ static const ElementwiseUnaryKernel available_kernels[] = [](DataType dt) { return dt == DataType::S32; }, REGISTER_INTEGER_NEON(arm_compute::cpu::elementwise_op), }, -#endif // defined(ENABLE_NEON) +#endif // defined(ARM_COMPUTE_ENABLE_NEON) }; const ElementwiseUnaryKernel *get_implementation(DataType dt) diff --git a/src/core/cpu/kernels/CpuScaleKernel.cpp b/src/core/cpu/kernels/CpuScaleKernel.cpp index 29475fa63f..a072dbd896 100644 --- a/src/core/cpu/kernels/CpuScaleKernel.cpp +++ b/src/core/cpu/kernels/CpuScaleKernel.cpp @@ -50,7 +50,8 @@ namespace { struct ScaleSelectorData { - DataType dt; + DataType dt; + const CPUInfo &ci; }; using ScaleSelectorPtr = std::add_pointer::type; using ScaleKernelPtr = std::add_pointer) }, #endif /* !defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */ @@ -129,7 +130,7 @@ static const ScaleKernel available_kernels[] = [](const ScaleSelectorData & data) { return data.dt == DataType::S16; }, REGISTER_INTEGER_NEON(arm_compute::cpu::common_neon_scale) }, -#endif /* defined(ENABLE_NEON) */ +#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */ }; /** Micro-kernel selector @@ -153,7 +154,7 @@ const ScaleKernel *get_implementation(const ScaleSelectorData &data) Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dx, const ITensorInfo *dy, const ITensorInfo *offsets, ITensorInfo *dst, const ScaleKernelInfo &info) { - const auto *uk = get_implementation(ScaleSelectorData{ src->data_type() }); + const auto *uk = get_implementation(ScaleSelectorData{ src->data_type(), CPUInfo::get() }); ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr); ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(dst); @@ -607,7 +608,7 @@ void CpuScaleKernel::run_op(ITensorPack &tensors, const Window &window, const Th } else { - const auto *uk = get_implementation(ScaleSelectorData{ src->info()->data_type() }); + const auto *uk = get_implementation(ScaleSelectorData{ src->info()->data_type(), CPUInfo::get() }); uk->ukernel(src, dst, offsets, dx, dy, _policy, _border_mode, _constant_border_value, _sampling_offset, _align_corners, window); } } diff --git a/src/core/cpu/kernels/CpuSoftmaxKernel.cpp b/src/core/cpu/kernels/CpuSoftmaxKernel.cpp index 8ea186b16a..1e00e12050 100644 --- a/src/core/cpu/kernels/CpuSoftmaxKernel.cpp +++ b/src/core/cpu/kernels/CpuSoftmaxKernel.cpp @@ -47,7 +47,8 @@ namespace { struct SoftmaxSelectorData { - DataType dt; + DataType dt; + const CPUInfo &ci; }; using SoftmaxSelectorPtr = std::add_pointer::type; using SoftmaxLogits1DMaxKernelPtr = std::add_pointer::type; @@ -69,20 +70,20 @@ struct SoftmaxLogits1DMaxKernel static const SoftmaxLogits1DKernel available_logits_1d_kernels[] = { -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) { "sve_softmax_logits_1d_float", - [](const SoftmaxSelectorData & data) { return (data.dt == DataType::F32); }, + [](const SoftmaxSelectorData & data) { return (data.dt == DataType::F32) && data.ci.has_sve(); }, REGISTER_FP32_SVE(arm_compute::cpu::sve_softmax_logits_1d_float) }, { "sve_softmax_logits_1d_float", - [](const SoftmaxSelectorData & data) { return (data.dt == DataType::F16); }, + [](const SoftmaxSelectorData & data) { return (data.dt == DataType::F16) && data.ci.has_sve(); }, REGISTER_FP16_SVE(arm_compute::cpu::sve_softmax_logits_1d_float) }, -#endif /* defined(ENABLE_SVE) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ -#if defined(ENABLE_NEON) +#if defined(ARM_COMPUTE_ENABLE_NEON) { "neon_softmax_logits_1d_float", [](const SoftmaxSelectorData & data) { return (data.dt == DataType::F32); }, @@ -95,20 +96,20 @@ static const SoftmaxLogits1DKernel available_logits_1d_kernels[] = REGISTER_FP16_NEON(arm_compute::cpu::neon_softmax_logits_1d_float) }, #endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */ -#endif /* !defined(ENABLE_NEON) */ +#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */ -#if defined(__ARM_FEATURE_SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE2) { "sve_softmax_logits_1d_quantized", - [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8); }, + [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8) && data.ci.has_sve2(); }, REGISTER_QASYMM8_SVE(arm_compute::cpu::sve_softmax_logits_1d_quantized) }, { "sve_softmax_logits_1d_quantized", - [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED); }, + [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED) && data.ci.has_sve2(); }, REGISTER_QASYMM8_SIGNED_SVE(arm_compute::cpu::sve_softmax_logits_1d_quantized) }, -#else /* !defined(__ARM_FEATURE_SVE2) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ { "neon_softmax_logits_1d_quantized", [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8); }, @@ -119,35 +120,33 @@ static const SoftmaxLogits1DKernel available_logits_1d_kernels[] = [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED); }, REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::neon_softmax_logits_1d_quantized) }, -#endif /* defined(__ARM_FEATURE_SVE2) */ - }; static const SoftmaxLogits1DMaxKernel available_logits_1d_max_kernels[] = { -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) { "sve_logits_1d_max", - [](const SoftmaxSelectorData & data) { return (data.dt == DataType::F32); }, + [](const SoftmaxSelectorData & data) { return (data.dt == DataType::F32) && data.ci.has_sve(); }, REGISTER_FP32_SVE(arm_compute::cpu::sve_logits_1d_max) }, { "sve_logits_1d_max", - [](const SoftmaxSelectorData & data) { return (data.dt == DataType::F16); }, + [](const SoftmaxSelectorData & data) { return (data.dt == DataType::F16) && data.ci.has_sve(); }, REGISTER_FP16_SVE(arm_compute::cpu::sve_logits_1d_max) }, { "sve_logits_1d_max", - [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8); }, + [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8) && data.ci.has_sve(); }, REGISTER_QASYMM8_SVE(arm_compute::cpu::sve_logits_1d_max) }, { "sve_logits_1d_max", - [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED); }, + [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED) && data.ci.has_sve(); }, REGISTER_QASYMM8_SIGNED_SVE(arm_compute::cpu::sve_logits_1d_max) }, -#endif /* defined(ENABLE_SVE) */ -#if defined(ENABLE_NEON) +#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ +#if defined(ARM_COMPUTE_ENABLE_NEON) { "neon_logits_1d_max", [](const SoftmaxSelectorData & data) { return (data.dt == DataType::F32); }, @@ -170,14 +169,14 @@ static const SoftmaxLogits1DMaxKernel available_logits_1d_max_kernels[] = [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED); }, REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::neon_logits_1d_max) }, -#endif /* defined(ENABLE_NEON) */ +#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */ }; const SoftmaxLogits1DKernel *get_implementation_logits(const SoftmaxSelectorData &data) { for(const auto &uk : available_logits_1d_kernels) { - if(uk.is_selected({ data.dt })) + if(uk.is_selected({ data.dt, CPUInfo::get() })) { return &uk; } @@ -189,7 +188,7 @@ const SoftmaxLogits1DMaxKernel *get_implementation_logits_max(const SoftmaxSelec { for(const auto &uk : available_logits_1d_max_kernels) { - if(uk.is_selected({ data.dt })) + if(uk.is_selected({ data.dt, CPUInfo::get() })) { return &uk; } @@ -253,7 +252,7 @@ void CpuLogits1DMaxKernel::run_op(ITensorPack &tensors, const Window &window, co const auto src = tensors.get_const_tensor(TensorType::ACL_SRC); auto dst = tensors.get_tensor(TensorType::ACL_DST); - const auto *uk = get_implementation_logits_max(SoftmaxSelectorData{ src->info()->data_type() }); + const auto *uk = get_implementation_logits_max(SoftmaxSelectorData{ src->info()->data_type(), CPUInfo::get() }); uk->ukernel(src, dst, window); } @@ -364,7 +363,7 @@ void CpuLogits1DSoftmaxKernel::run_op(ITensorPack &tensors, const Window void *tmp_for_thread = tmp->buffer() + (info.thread_id * tmp_size_for_thread); - const auto *uk = get_implementation_logits(SoftmaxSelectorData{ src->info()->data_type() }); + const auto *uk = get_implementation_logits(SoftmaxSelectorData{ src->info()->data_type(), CPUInfo::get() }); uk->ukernel(src, max, tmp_for_thread, dst, _beta, IS_LOG, window); } diff --git a/src/core/cpu/kernels/activation/sve/qasymm8.cpp b/src/core/cpu/kernels/activation/sve/qasymm8.cpp index 228b4ae530..69fffd96c5 100644 --- a/src/core/cpu/kernels/activation/sve/qasymm8.cpp +++ b/src/core/cpu/kernels/activation/sve/qasymm8.cpp @@ -21,14 +21,13 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ - +#if defined(ARM_COMPUTE_ENABLE_SVE2) #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Window.h" #include #include -#if defined(__ARM_FEATURE_SVE2) #include "src/core/NEON/SVEAsymm.h" #include "src/core/NEON/SVEMath.h" #include @@ -251,4 +250,4 @@ void qasymm8_sve_activation(const ITensor *src, ITensor *dst, const ActivationLa } } // namespace cpu } // namespace arm_compute -#endif /* defined(__ARM_FEATURE_SVE2) */ \ No newline at end of file +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ \ No newline at end of file diff --git a/src/core/cpu/kernels/activation/sve/qasymm8_signed.cpp b/src/core/cpu/kernels/activation/sve/qasymm8_signed.cpp index 989f825eb9..53ee515ff9 100644 --- a/src/core/cpu/kernels/activation/sve/qasymm8_signed.cpp +++ b/src/core/cpu/kernels/activation/sve/qasymm8_signed.cpp @@ -28,7 +28,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE2) #include "src/core/NEON/SVEAsymm.h" #include "src/core/NEON/SVEMath.h" #include @@ -250,4 +250,4 @@ void qasymm8_signed_sve_activation(const ITensor *src, ITensor *dst, const Activ } } // namespace cpu } // namespace arm_compute -#endif /* defined(__ARM_FEATURE_SVE2) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ diff --git a/src/core/cpu/kernels/activation/sve/qsymm16.cpp b/src/core/cpu/kernels/activation/sve/qsymm16.cpp index 66974875da..ac549770a2 100644 --- a/src/core/cpu/kernels/activation/sve/qsymm16.cpp +++ b/src/core/cpu/kernels/activation/sve/qsymm16.cpp @@ -29,7 +29,7 @@ #include #include -#if defined(__ARM_FEATURE_SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE2) #include "src/core/NEON/SVEMath.h" #include "src/core/NEON/SVESymm.h" #include @@ -117,4 +117,4 @@ void qsymm16_sve_activation(const ITensor *src, ITensor *dst, const ActivationLa } } // namespace cpu } // namespace arm_compute -#endif /* defined(__ARM_FEATURE_SVE2) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ diff --git a/src/core/cpu/kernels/add/sve/impl.h b/src/core/cpu/kernels/add/sve/impl.h index c38b1d47e0..32ff5d0496 100644 --- a/src/core/cpu/kernels/add/sve/impl.h +++ b/src/core/cpu/kernels/add/sve/impl.h @@ -24,7 +24,7 @@ #ifndef SRC_CORE_SVE_KERNELS_ADD_IMPL_H #define SRC_CORE_SVE_KERNELS_ADD_IMPL_H -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/Traits.h" @@ -36,5 +36,5 @@ template void add_same_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window); } // namespace cpu } // namespace arm_compute -#endif // defined(ENABLE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) #endif // SRC_CORE_SVE_KERNELS_ADD_IMPL_H \ No newline at end of file diff --git a/src/core/cpu/kernels/add/sve/list.h b/src/core/cpu/kernels/add/sve/list.h index aebb43bb60..9e439497c9 100644 --- a/src/core/cpu/kernels/add/sve/list.h +++ b/src/core/cpu/kernels/add/sve/list.h @@ -24,7 +24,7 @@ #ifndef SRC_CORE_SVE_KERNELS_ADD_LIST_H #define SRC_CORE_SVE_KERNELS_ADD_LIST_H -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/Traits.h" #include "src/core/NEON/SVEMath.h" @@ -50,5 +50,5 @@ DECLARE_ADD_KERNEL(add_u8_u8_s16_sve); } // namespace cpu } // namespace arm_compute -#endif // defined(ENABLE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) #endif // SRC_CORE_SVE_KERNELS_ADD_LIST_H \ No newline at end of file diff --git a/src/core/cpu/kernels/add/sve/qasymm8.cpp b/src/core/cpu/kernels/add/sve/qasymm8.cpp index f6d1485e61..888ad878ca 100644 --- a/src/core/cpu/kernels/add/sve/qasymm8.cpp +++ b/src/core/cpu/kernels/add/sve/qasymm8.cpp @@ -21,13 +21,13 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#if defined(__ARM_FEATURE_SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE2) #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/Traits.h" -#include "src/core/NEON/wrapper/intrinsics/intrinsics.h" #include "src/core/NEON/SVEMath.h" +#include "src/core/NEON/wrapper/intrinsics/intrinsics.h" #include namespace arm_compute @@ -179,4 +179,4 @@ void add_qasymm8_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, con } } // namespace cpu } // namespace arm_compute -#endif /* defined(__ARM_FEATURE_SVE2) */ \ No newline at end of file +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ \ No newline at end of file diff --git a/src/core/cpu/kernels/add/sve/qasymm8_signed.cpp b/src/core/cpu/kernels/add/sve/qasymm8_signed.cpp index 8102aa5c65..3b922c6c21 100644 --- a/src/core/cpu/kernels/add/sve/qasymm8_signed.cpp +++ b/src/core/cpu/kernels/add/sve/qasymm8_signed.cpp @@ -21,13 +21,13 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#if defined(__ARM_FEATURE_SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE2) #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/Traits.h" -#include "src/core/NEON/wrapper/intrinsics/intrinsics.h" #include "src/core/NEON/SVEMath.h" +#include "src/core/NEON/wrapper/intrinsics/intrinsics.h" #include namespace arm_compute @@ -178,4 +178,4 @@ void add_qasymm8_signed_sve(const ITensor *src0, const ITensor *src1, ITensor *d } } // namespace cpu } // namespace arm_compute -#endif /* defined(__ARM_FEATURE_SVE2) */ \ No newline at end of file +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ \ No newline at end of file diff --git a/src/core/cpu/kernels/add/sve/qsymm16.cpp b/src/core/cpu/kernels/add/sve/qsymm16.cpp index fb62257b0a..eef5d245d3 100644 --- a/src/core/cpu/kernels/add/sve/qsymm16.cpp +++ b/src/core/cpu/kernels/add/sve/qsymm16.cpp @@ -21,13 +21,13 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#if defined(__ARM_FEATURE_SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE2) #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/Traits.h" -#include "src/core/NEON/wrapper/intrinsics/intrinsics.h" #include "src/core/NEON/SVEMath.h" +#include "src/core/NEON/wrapper/intrinsics/intrinsics.h" #include namespace arm_compute @@ -153,4 +153,4 @@ void add_qsymm16_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, con } } // namespace cpu } // namespace arm_compute -#endif /* defined(__ARM_FEATURE_SVE2) */ \ No newline at end of file +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ \ No newline at end of file diff --git a/src/core/cpu/kernels/elementwise/sve/elementwise_list.h b/src/core/cpu/kernels/elementwise/sve/elementwise_list.h index a92a8648a8..fea38d2995 100644 --- a/src/core/cpu/kernels/elementwise/sve/elementwise_list.h +++ b/src/core/cpu/kernels/elementwise/sve/elementwise_list.h @@ -23,7 +23,7 @@ */ #ifndef SRC_CORE_SVE_KERNELS_ELEMENTWISE_LIST_H #define SRC_CORE_SVE_KERNELS_ELEMENTWISE_LIST_H -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Types.h" @@ -167,5 +167,5 @@ template void elementwise_sve_op(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op); } // namespace cpu } // namespace arm_compute -#endif // defined(ENABLE_SVE) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) #endif // SRC_CORE_NEON_KERNELS_ELEMENTWISE_UNARY_LIST_H \ No newline at end of file diff --git a/src/core/cpu/kernels/scale/sve/fp16.cpp b/src/core/cpu/kernels/scale/sve/fp16.cpp index 5b9377c6e6..76e7735b8a 100644 --- a/src/core/cpu/kernels/scale/sve/fp16.cpp +++ b/src/core/cpu/kernels/scale/sve/fp16.cpp @@ -22,7 +22,7 @@ * SOFTWARE. */ -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensorPack.h" #include "arm_compute/core/Window.h" @@ -173,4 +173,4 @@ void fp16_sve_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, co } // namespace cpu } // namespace arm_compute -#endif // ENABLE_SVE \ No newline at end of file +#endif // ARM_COMPUTE_ENABLE_SVE \ No newline at end of file diff --git a/src/core/cpu/kernels/scale/sve/fp32.cpp b/src/core/cpu/kernels/scale/sve/fp32.cpp index 05fbedf20d..030e109cdf 100644 --- a/src/core/cpu/kernels/scale/sve/fp32.cpp +++ b/src/core/cpu/kernels/scale/sve/fp32.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensorPack.h" #include "arm_compute/core/Window.h" @@ -171,4 +171,4 @@ void fp32_sve_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, co } // namespace cpu } // namespace arm_compute -#endif // ENABLE_SVE \ No newline at end of file +#endif // ARM_COMPUTE_ENABLE_SVE \ No newline at end of file diff --git a/src/core/cpu/kernels/scale/sve/integer.cpp b/src/core/cpu/kernels/scale/sve/integer.cpp index d7e270c661..486c674612 100644 --- a/src/core/cpu/kernels/scale/sve/integer.cpp +++ b/src/core/cpu/kernels/scale/sve/integer.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensorPack.h" #include "arm_compute/core/Window.h" @@ -297,4 +297,4 @@ void s16_sve_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, con } // namespace cpu } // namespace arm_compute -#endif // ENABLE_SVE \ No newline at end of file +#endif // ARM_COMPUTE_ENABLE_SVE \ No newline at end of file diff --git a/src/core/cpu/kernels/scale/sve/qasymm8.cpp b/src/core/cpu/kernels/scale/sve/qasymm8.cpp index f747037938..c9122ad40b 100644 --- a/src/core/cpu/kernels/scale/sve/qasymm8.cpp +++ b/src/core/cpu/kernels/scale/sve/qasymm8.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensorPack.h" #include "arm_compute/core/Window.h" @@ -204,4 +204,4 @@ void qasymm8_sve_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, } // namespace cpu } // namespace arm_compute -#endif // defined(ENABLE_SVE) \ No newline at end of file +#endif // defined(ARM_COMPUTE_ENABLE_SVE) \ No newline at end of file diff --git a/src/core/cpu/kernels/scale/sve/qasymm8_signed.cpp b/src/core/cpu/kernels/scale/sve/qasymm8_signed.cpp index 584ec7a0da..0843e61fd4 100644 --- a/src/core/cpu/kernels/scale/sve/qasymm8_signed.cpp +++ b/src/core/cpu/kernels/scale/sve/qasymm8_signed.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensorPack.h" #include "arm_compute/core/Window.h" @@ -204,4 +204,4 @@ void qasymm8_signed_sve_scale(const ITensor *src, ITensor *dst, const ITensor *o } // namespace cpu } // namespace arm_compute -#endif // ENABLE_SVE \ No newline at end of file +#endif // ARM_COMPUTE_ENABLE_SVE \ No newline at end of file diff --git a/src/core/cpu/kernels/softmax/impl/sve/impl.cpp b/src/core/cpu/kernels/softmax/impl/sve/impl.cpp index 4ed5a4fbea..7a577fd565 100644 --- a/src/core/cpu/kernels/softmax/impl/sve/impl.cpp +++ b/src/core/cpu/kernels/softmax/impl/sve/impl.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Types.h" @@ -182,4 +182,4 @@ template void sve_softmax_logits_1d_float(const ITensor *in, const IT ITensor *out, const float beta, bool is_log, const Window &window); } // namespace cpu } // namespace arm_compute -#endif /* defined(ENABLE_SVE) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ diff --git a/src/core/cpu/kernels/softmax/impl/sve/list.h b/src/core/cpu/kernels/softmax/impl/sve/list.h index 7ddb358b8e..b4e1e1b186 100644 --- a/src/core/cpu/kernels/softmax/impl/sve/list.h +++ b/src/core/cpu/kernels/softmax/impl/sve/list.h @@ -24,7 +24,7 @@ #ifndef SRC_CORE_SVE_KERNELS_SOFTMAX_LIST_H #define SRC_CORE_SVE_KERNELS_SOFTMAX_LIST_H -#if defined(ENABLE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/Traits.h" #include "src/core/NEON/SVEMath.h" @@ -42,7 +42,7 @@ template void sve_softmax_logits_1d_float(const ITensor *in, const ITensor *max, void *const tmp, ITensor *out, const float beta, bool is_log, const Window &window); -#if defined(__ARM_FEATURE_SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE2) template void sve_softmax_logits_1d_quantized(const ITensor *in, const ITensor *max, void *const tmp, ITensor *out, float beta, bool is_log, const Window &window) @@ -215,9 +215,9 @@ void sve_softmax_logits_1d_quantized(const ITensor *in, const ITensor *max, void }, in_it, max_it, out_it); } -#endif /* defined(__ARM_FEATURE_SVE2) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ } // namespace cpu } // namespace arm_compute -#endif /* defined(ENABLE_SVE) */ +#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */ #endif /* SRC_CORE_SVE_KERNELS_SOFTMAX_LIST_H */ diff --git a/src/cpu/CpuContext.cpp b/src/cpu/CpuContext.cpp index 18fa2e7469..a1c6413c98 100644 --- a/src/cpu/CpuContext.cpp +++ b/src/cpu/CpuContext.cpp @@ -90,67 +90,66 @@ AllocatorWrapper populate_allocator(AclAllocator *external_allocator) return is_valid ? AllocatorWrapper(*external_allocator) : AllocatorWrapper(default_allocator); } -CpuCapabilities populate_capabilities_legacy(const CPUInfo &cpu_info) +cpuinfo::CpuIsaInfo populate_capabilities_legacy(const CPUInfo &cpu_info) { - CpuCapabilities caps; + cpuinfo::CpuIsaInfo isa_caps; // Extract SIMD extension - caps.neon = true; -#ifdef SVE2 - caps.sve2 = true; -#endif /* SVE2 */ + isa_caps.neon = true; + isa_caps.sve = cpu_info.has_sve(); + isa_caps.sve2 = cpu_info.has_sve2(); + // Extract data-type support - caps.fp16 = cpu_info.has_fp16(); -#ifdef V8P6_BF - caps.bf16 = true; -#endif /* V8P6_BF */ + isa_caps.fp16 = cpu_info.has_fp16(); + isa_caps.bf16 = cpu_info.has_bf16(); + isa_caps.svebf16 = cpu_info.has_svebf16(); // Extract ISA extensions - caps.dot = cpu_info.has_dotprod(); -#ifdef MMLA_FP32 - caps.mmla_fp = true; -#endif /* MMLA_FP32 */ -#ifdef MMLA_INT8 - caps.mmla_int8 = true; -#endif /* MMLA_INT8 */ + isa_caps.dot = cpu_info.has_dotprod(); + isa_caps.i8mm = cpu_info.has_i8mm(); + isa_caps.svei8mm = cpu_info.has_svei8mm(); + isa_caps.svef32mm = cpu_info.has_svef32mm(); - return caps; + return isa_caps; } -CpuCapabilities populate_capabilities_flags(AclTargetCapabilities external_caps) +cpuinfo::CpuIsaInfo populate_capabilities_flags(AclTargetCapabilities external_caps) { - CpuCapabilities caps; + cpuinfo::CpuIsaInfo isa_caps; // Extract SIMD extension - caps.neon = external_caps & AclCpuCapabilitiesNeon; - caps.sve = external_caps & AclCpuCapabilitiesSve; - caps.sve2 = external_caps & AclCpuCapabilitiesSve2; + isa_caps.neon = external_caps & AclCpuCapabilitiesNeon; + isa_caps.sve = external_caps & AclCpuCapabilitiesSve; + isa_caps.sve2 = external_caps & AclCpuCapabilitiesSve2; + // Extract data-type support - caps.fp16 = external_caps & AclCpuCapabilitiesFp16; - caps.bf16 = external_caps & AclCpuCapabilitiesBf16; + isa_caps.fp16 = external_caps & AclCpuCapabilitiesFp16; + isa_caps.bf16 = external_caps & AclCpuCapabilitiesBf16; + // Extract ISA extensions - caps.dot = external_caps & AclCpuCapabilitiesDot; - caps.mmla_fp = external_caps & AclCpuCapabilitiesMmlaFp; - caps.mmla_int8 = external_caps & AclCpuCapabilitiesMmlaInt8; + isa_caps.dot = external_caps & AclCpuCapabilitiesDot; + isa_caps.i8mm = external_caps & AclCpuCapabilitiesMmlaInt8; + isa_caps.svef32mm = external_caps & AclCpuCapabilitiesMmlaFp; - return caps; + return isa_caps; } CpuCapabilities populate_capabilities(AclTargetCapabilities external_caps, int32_t max_threads) { - // Extract legacy structure - CPUInfo cpu_info; - CpuCapabilities caps; + + // Extract legacy structure + cpuinfo::CpuIsaInfo isa_caps; if(external_caps != AclCpuCapabilitiesAuto) { - caps = populate_capabilities_flags(external_caps); + isa_caps = populate_capabilities_flags(external_caps); } else { - caps = populate_capabilities_legacy(cpu_info); + isa_caps = populate_capabilities_legacy(CPUInfo::get()); } + caps.cpu_info = cpuinfo::CpuInfo(isa_caps, {}); // Set max number of threads #if defined(BARE_METAL) diff --git a/src/cpu/CpuContext.h b/src/cpu/CpuContext.h index e909767a7b..9a59af39c1 100644 --- a/src/cpu/CpuContext.h +++ b/src/cpu/CpuContext.h @@ -26,6 +26,7 @@ #include "src/common/AllocatorWrapper.h" #include "src/common/IContext.h" +#include "src/common/cpuinfo/CpuInfo.h" namespace arm_compute { @@ -34,17 +35,8 @@ namespace cpu /** Structure that encodes the CPU capabilities to be used */ struct CpuCapabilities { - bool neon{ false }; - bool sve{ false }; - bool sve2{ false }; - - bool fp16{ false }; - bool bf16{ false }; - bool dot{ false }; - bool mmla_int8{ false }; - bool mmla_fp{ false }; - - int32_t max_threads{ -1 }; + cpuinfo::CpuInfo cpu_info{}; + int32_t max_threads{ -1 }; }; /** CPU context implementation class */ diff --git a/src/runtime/CPP/CPPScheduler.cpp b/src/runtime/CPP/CPPScheduler.cpp index f112d456c7..3d7f1f16b1 100644 --- a/src/runtime/CPP/CPPScheduler.cpp +++ b/src/runtime/CPP/CPPScheduler.cpp @@ -493,7 +493,7 @@ void CPPScheduler::run_workloads(std::vector &workloads) } ThreadFeeder feeder(num_threads_to_use, workloads.size()); ThreadInfo info; - info.cpu_info = &_cpu_info; + info.cpu_info = &cpu_info(); info.num_threads = num_threads_to_use; unsigned int t = 0; auto thread_it = _impl->_threads.begin(); diff --git a/src/runtime/CPP/SingleThreadScheduler.cpp b/src/runtime/CPP/SingleThreadScheduler.cpp index 70536b7ccc..5890553f6f 100644 --- a/src/runtime/CPP/SingleThreadScheduler.cpp +++ b/src/runtime/CPP/SingleThreadScheduler.cpp @@ -49,7 +49,7 @@ void SingleThreadScheduler::schedule(ICPPKernel *kernel, const Hints &hints) } ThreadInfo info; - info.cpu_info = &_cpu_info; + info.cpu_info = &cpu_info(); kernel->run(kernel->window(), info); } @@ -57,14 +57,14 @@ void SingleThreadScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, { ARM_COMPUTE_UNUSED(hints); ThreadInfo info; - info.cpu_info = &_cpu_info; + info.cpu_info = &cpu_info(); kernel->run_op(tensors, window, info); } void SingleThreadScheduler::run_workloads(std::vector &workloads) { ThreadInfo info; - info.cpu_info = &_cpu_info; + info.cpu_info = &cpu_info(); for(auto &wl : workloads) { wl(info); diff --git a/src/runtime/IScheduler.cpp b/src/runtime/IScheduler.cpp index df04fed401..004b8a46b6 100644 --- a/src/runtime/IScheduler.cpp +++ b/src/runtime/IScheduler.cpp @@ -32,7 +32,6 @@ namespace arm_compute { IScheduler::IScheduler() - : _cpu_info() { // Work out the best possible number of execution threads _num_threads_hint = cpuinfo::num_threads_hint(); @@ -40,7 +39,7 @@ IScheduler::IScheduler() CPUInfo &IScheduler::cpu_info() { - return _cpu_info; + return CPUInfo::get(); } void IScheduler::set_num_threads_with_affinity(unsigned int num_threads, BindFunc func) @@ -111,7 +110,7 @@ void IScheduler::schedule_common(ICPPKernel *kernel, const Hints &hints, const W if(!kernel->is_parallelisable() || num_threads == 1) { ThreadInfo info; - info.cpu_info = &_cpu_info; + info.cpu_info = &cpu_info(); if(tensors.empty()) { kernel->run(max_window, info); diff --git a/src/runtime/NEON/functions/NEFFT2D.cpp b/src/runtime/NEON/functions/NEFFT2D.cpp index 3b787cd523..5aaf587cdf 100644 --- a/src/runtime/NEON/functions/NEFFT2D.cpp +++ b/src/runtime/NEON/functions/NEFFT2D.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,9 +26,6 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/Scheduler.h" -#include "src/core/NEON/kernels/NEFFTDigitReverseKernel.h" -#include "src/core/NEON/kernels/NEFFTRadixStageKernel.h" -#include "src/core/NEON/kernels/NEFFTScaleKernel.h" namespace arm_compute { diff --git a/src/runtime/OMP/OMPScheduler.cpp b/src/runtime/OMP/OMPScheduler.cpp index ca763f907b..e9b0bf4426 100644 --- a/src/runtime/OMP/OMPScheduler.cpp +++ b/src/runtime/OMP/OMPScheduler.cpp @@ -66,7 +66,7 @@ void OMPScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, const Win if(!kernel->is_parallelisable() || num_threads == 1) { ThreadInfo info; - info.cpu_info = &_cpu_info; + info.cpu_info = &cpu_info(); kernel->run_op(tensors, max_window, info); } else @@ -96,7 +96,7 @@ void OMPScheduler::run_workloads(std::vector } ThreadInfo info; - info.cpu_info = &_cpu_info; + info.cpu_info = &cpu_info(); info.num_threads = num_threads; #pragma omp parallel firstprivate(info) num_threads(num_threads) { diff --git a/tests/validation/cpu/unit/Context.cpp b/tests/validation/cpu/unit/Context.cpp index 57ca866032..42247ba1da 100644 --- a/tests/validation/cpu/unit/Context.cpp +++ b/tests/validation/cpu/unit/Context.cpp @@ -94,13 +94,13 @@ TEST_CASE(CpuCapabilities, framework::DatasetMode::ALL) opts.copts.capabilities = AclCpuCapabilitiesDot | AclCpuCapabilitiesMmlaInt8 | AclCpuCapabilitiesSve2; arm_compute::cpu::CpuContext ctx(&opts.copts); - ARM_COMPUTE_ASSERT(ctx.capabilities().dot == true); - ARM_COMPUTE_ASSERT(ctx.capabilities().mmla_int8 == true); - ARM_COMPUTE_ASSERT(ctx.capabilities().sve2 == true); - ARM_COMPUTE_ASSERT(ctx.capabilities().fp16 == false); + ARM_COMPUTE_ASSERT(ctx.capabilities().cpu_info.has_dotprod() == true); + ARM_COMPUTE_ASSERT(ctx.capabilities().cpu_info.has_i8mm() == true); + ARM_COMPUTE_ASSERT(ctx.capabilities().cpu_info.has_sve2() == true); + ARM_COMPUTE_ASSERT(ctx.capabilities().cpu_info.has_fp16() == false); arm_compute::cpu::CpuContext ctx_legacy(nullptr); - ARM_COMPUTE_ASSERT(ctx_legacy.capabilities().neon == true); + ARM_COMPUTE_ASSERT(ctx_legacy.capabilities().cpu_info.has_neon() == true); } TEST_SUITE_END() // Context -- cgit v1.2.1