aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2021-06-07 14:23:57 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2021-06-23 12:25:50 +0000
commit20fca524baf99402f742ce38c538f2fd07d5fff9 (patch)
treeb63d98383d1ba22bb3ca59d393e4ab9d47a9c762
parent1d359279e22874121def2ce4bfdb633d94ea5ade (diff)
downloadComputeLibrary-20fca524baf99402f742ce38c538f2fd07d5fff9.tar.gz
Create core library using high priority operators
A smaller core library is created using a subset of the operators. Changed the structure of filelist.json in order to include more information about the kernels and make the selection easier. Resolves: COMPMID-4514 Change-Id: I079ca7d8e64346174eebdd13b834e1dd4dc36ca2 Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5786 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--.gitignore1
-rw-r--r--Android.bp2
-rw-r--r--SConscript187
-rw-r--r--SConstruct16
-rw-r--r--arm_compute/core/CPP/CPPTypes.h57
-rw-r--r--arm_compute/runtime/IScheduler.h2
-rw-r--r--filelist.json2060
-rw-r--r--src/common/cpuinfo/CpuInfo.cpp15
-rw-r--r--src/common/cpuinfo/CpuInfo.h16
-rw-r--r--src/common/cpuinfo/CpuIsaInfo.cpp24
-rw-r--r--src/common/cpuinfo/CpuIsaInfo.h6
-rw-r--r--src/core/CPP/CPPTypes.cpp41
-rw-r--r--src/core/NEON/SVEAsymm.h6
-rw-r--r--src/core/NEON/SVEAsymm.inl6
-rw-r--r--src/core/NEON/SVEMath.h4
-rw-r--r--src/core/NEON/SVEMath.inl56
-rw-r--r--src/core/NEON/SVESymm.h6
-rw-r--r--src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp19
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp16.cpp23
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp31
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/depthwise_implementation_constraints.hpp12
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp29
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8q.cpp26
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8s8u8q.cpp17
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/interleaves/8b_mla.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/interleaves/list.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_s8q_3x3_dot.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_u8q_3x3_dot.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst_strided.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst_strided/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_generic_output9_mla_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_generic_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_generic_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_generic_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_generic_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_generic_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_generic_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_generic_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_generic_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_avg_generic_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_avg_generic_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_generic_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_generic_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_avg_generic_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_avg_generic_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_max_generic_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_max_generic_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_avg_generic_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_avg_generic_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_generic_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_generic_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_avg_generic_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_avg_generic_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_max_generic_depthfirst.hpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_max_generic_depthfirst/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/pooling_fp16.cpp32
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/pooling_fp32.cpp24
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/pooling_s8.cpp24
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/pooling_s8q.cpp12
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/pooling_u8.cpp24
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/pooling_u8q.cpp12
-rw-r--r--src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp18
-rw-r--r--src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp2
-rw-r--r--src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp16
-rw-r--r--src/core/NEON/kernels/arm_gemm/gemm_int8.cpp14
-rw-r--r--src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp22
-rw-r--r--src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp16
-rw-r--r--src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp10
-rw-r--r--src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp16
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_gemv_fp32_mla_8VL/generic.cpp2
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/mergeresults-sve.cpp (renamed from src/common/cpuinfo/target/CpuInfoSveUtils.cpp)33
-rw-r--r--src/core/NEON/kernels/arm_gemm/mergeresults.cpp6
-rw-r--r--src/core/NEON/kernels/arm_gemm/merges/list-sve.hpp (renamed from src/common/cpuinfo/target/CpuInfoSveUtils.h)20
-rw-r--r--src/core/NEON/kernels/arm_gemm/merges/list.hpp6
-rw-r--r--src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp16_3VLx8.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp32_3VLx8.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/merges/sve_merge_s32_3VLx8.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/merges/sve_merge_u32_3VLx8.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/utils.hpp50
-rw-r--r--src/core/NEON/kernels/batchnormalization/impl/SVE/fp16.cpp2
-rw-r--r--src/core/NEON/kernels/batchnormalization/impl/SVE/fp32.cpp2
-rw-r--r--src/core/NEON/wrapper/svtraits.h4
-rw-r--r--src/core/NEON/wrapper/traits.h8
-rw-r--r--src/core/common/Registrars.h54
-rw-r--r--src/core/cpu/kernels/CpuActivationKernel.cpp30
-rw-r--r--src/core/cpu/kernels/CpuAddKernel.cpp120
-rw-r--r--src/core/cpu/kernels/CpuElementwiseKernel.cpp231
-rw-r--r--src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp8
-rw-r--r--src/core/cpu/kernels/CpuScaleKernel.cpp29
-rw-r--r--src/core/cpu/kernels/CpuSoftmaxKernel.cpp49
-rw-r--r--src/core/cpu/kernels/activation/sve/qasymm8.cpp5
-rw-r--r--src/core/cpu/kernels/activation/sve/qasymm8_signed.cpp4
-rw-r--r--src/core/cpu/kernels/activation/sve/qsymm16.cpp4
-rw-r--r--src/core/cpu/kernels/add/sve/impl.h4
-rw-r--r--src/core/cpu/kernels/add/sve/list.h4
-rw-r--r--src/core/cpu/kernels/add/sve/qasymm8.cpp6
-rw-r--r--src/core/cpu/kernels/add/sve/qasymm8_signed.cpp6
-rw-r--r--src/core/cpu/kernels/add/sve/qsymm16.cpp6
-rw-r--r--src/core/cpu/kernels/elementwise/sve/elementwise_list.h4
-rw-r--r--src/core/cpu/kernels/elementwise/sve/elementwise_quantized_list.h4
-rw-r--r--src/core/cpu/kernels/elementwise/sve/elementwise_unary_list.h4
-rw-r--r--src/core/cpu/kernels/scale/sve/fp16.cpp4
-rw-r--r--src/core/cpu/kernels/scale/sve/fp32.cpp4
-rw-r--r--src/core/cpu/kernels/scale/sve/integer.cpp4
-rw-r--r--src/core/cpu/kernels/scale/sve/qasymm8.cpp4
-rw-r--r--src/core/cpu/kernels/scale/sve/qasymm8_signed.cpp4
-rw-r--r--src/core/cpu/kernels/softmax/impl/sve/impl.cpp4
-rw-r--r--src/core/cpu/kernels/softmax/impl/sve/list.h8
-rw-r--r--src/cpu/CpuContext.cpp67
-rw-r--r--src/cpu/CpuContext.h14
-rw-r--r--src/runtime/CPP/CPPScheduler.cpp2
-rw-r--r--src/runtime/CPP/SingleThreadScheduler.cpp6
-rw-r--r--src/runtime/IScheduler.cpp5
-rw-r--r--src/runtime/NEON/functions/NEFFT2D.cpp5
-rw-r--r--src/runtime/OMP/OMPScheduler.cpp4
-rw-r--r--tests/validation/cpu/unit/Context.cpp10
240 files changed, 3073 insertions, 1317 deletions
diff --git a/.gitignore b/.gitignore
index e917c499f0..2e7f887550 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,7 @@ build/
*.txt
*.xml
*.embed
+*.diff
# Generated Android bp file
Generated_Android.bp
diff --git a/Android.bp b/Android.bp
index 7dc764de93..ce3420518e 100644
--- a/Android.bp
+++ b/Android.bp
@@ -64,7 +64,6 @@ cc_library_static {
"src/common/cpuinfo/CpuInfo.cpp",
"src/common/cpuinfo/CpuIsaInfo.cpp",
"src/common/cpuinfo/CpuModel.cpp",
- "src/common/cpuinfo/target/CpuInfoSveUtils.cpp",
"src/common/utils/LegacySupport.cpp",
"src/core/AccessWindowAutoPadding.cpp",
"src/core/AccessWindowStatic.cpp",
@@ -224,6 +223,7 @@ cc_library_static {
"src/core/NEON/kernels/arm_gemm/gemm_uint16.cpp",
"src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp",
"src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp",
+ "src/core/NEON/kernels/arm_gemm/mergeresults-sve.cpp",
"src/core/NEON/kernels/arm_gemm/mergeresults.cpp",
"src/core/NEON/kernels/arm_gemm/misc.cpp",
"src/core/NEON/kernels/arm_gemm/quantized.cpp",
diff --git a/SConscript b/SConscript
index 3e834e347c..2441d1870f 100644
--- a/SConscript
+++ b/SConscript
@@ -39,6 +39,7 @@ Import('vars')
Import('install_lib')
def build_bootcode_objs(sources):
+
arm_compute_env.Append(ASFLAGS = "-I bootcode/")
obj = arm_compute_env.Object(sources)
obj = install_lib(obj)
@@ -46,6 +47,7 @@ def build_bootcode_objs(sources):
return obj
def build_sve_objs(sources):
+
tmp_env = arm_compute_env.Clone()
tmp_env.Append(CXXFLAGS = "-march=armv8.2-a+sve+fp16")
obj = tmp_env.SharedObject(sources)
@@ -53,6 +55,13 @@ def build_sve_objs(sources):
Default(obj)
return obj
+def build_objs(sources):
+
+ obj = arm_compute_env.SharedObject(sources)
+ obj = install_lib(obj)
+ Default(obj)
+ return obj
+
def build_library(name, build_env, sources, static=False, libs=[]):
if static:
obj = build_env.StaticLibrary(name, source=sources, LIBS = arm_compute_env["LIBS"] + libs)
@@ -146,6 +155,73 @@ def create_version_file(target, source, env):
with open(target[0].get_path(), "w") as fd:
fd.write(build_info)
+def get_cpu_runtime_files(operator):
+ file_list = []
+ operators = filelist['cpu']['operators']
+
+ if "operator" in operators[operator]["files"]:
+ file_list += operators[operator]["files"]["operator"]
+ return file_list
+
+def get_gpu_runtime_files(operator):
+ file_list = []
+ operators = filelist['gpu']['operators']
+
+ if "operator" in operators[operator]["files"]:
+ file_list += operators[operator]["files"]["operator"]
+ return file_list
+
+def get_cpu_kernel_files(operator):
+
+ file_list = []
+ file_list_sve = []
+ operators = filelist['cpu']['operators']
+
+ if env['estate'] == '64' and "neon" in operators[operator]['files'] and "estate64" in operators[operator]['files']['neon']:
+ file_list += operators[operator]['files']['neon']['estate64']
+ if env['estate'] == '32' and "neon" in operators[operator]['files'] and "estate32" in operators[operator]['files']['neon']:
+ file_list += operators[operator]['files']['neon']['estate32']
+
+ if "kernel" in operators[operator]["files"]:
+ file_list += operators[operator]["files"]["kernel"]
+
+ if ("neon" in operators[operator]["files"]):
+ if any(i in env['data_type_support'] for i in ['all', 'qasymm8']) and ("qasymm8" in operators[operator]["files"]["neon"]):
+ file_list += operators[operator]["files"]["neon"]["qasymm8"]
+ if any(i in env['data_type_support'] for i in ['all', 'qasymm8_signed']) and ("qasymm8_signed" in operators[operator]["files"]["neon"]):
+ file_list += operators[operator]["files"]["neon"]["qasymm8_signed"]
+ if any(i in env['data_type_support'] for i in ['all', 'qsymm16']) and ("qsymm16" in operators[operator]["files"]["neon"]):
+ file_list += operators[operator]["files"]["neon"]["qsymm16"]
+ if any(i in env['data_type_support'] for i in ['all', 'integer']) and ("integer" in operators[operator]["files"]["neon"]):
+ file_list += operators[operator]["files"]["neon"]["integer"]
+
+ if (not "sve" in env['arch'] or env['fat_binary']) and ("neon" in operators[operator]["files"]):
+ if any(i in env['data_type_support'] for i in ['all', 'fp16']) and ("fp16" in operators[operator]["files"]["neon"]):
+ file_list += operators[operator]["files"]["neon"]["fp16"]
+ if any(i in env['data_type_support'] for i in ['all', 'fp32']) and ("fp32" in operators[operator]["files"]["neon"]):
+ file_list += operators[operator]["files"]["neon"]["fp32"]
+ if any(i in env['data_layout_support'] for i in ['all', 'nchw']) and ("nchw" in operators[operator]["files"]["neon"]):
+ file_list += operators[operator]['files']['neon']['nchw']
+ if ("all" in operators[operator]["files"]["neon"]):
+ file_list += operators[operator]["files"]["neon"]["all"]
+ if ("sve" in env['arch'] or env['fat_binary']) and ("sve" in operators[operator]["files"]):
+ if any(i in env['data_type_support'] for i in ['all', 'fp16']) and ("fp16" in operators[operator]["files"]["sve"]):
+ file_list_sve += operators[operator]["files"]["sve"]["fp16"]
+ if any(i in env['data_type_support'] for i in ['all', 'fp32']) and ("fp32" in operators[operator]["files"]["sve"]):
+ file_list_sve += operators[operator]["files"]["sve"]["fp32"]
+ if any(i in env['data_type_support'] for i in ['all', 'qasymm8']) and ("qasymm8" in operators[operator]["files"]["sve"]):
+ file_list_sve += operators[operator]["files"]["sve"]["qasymm8"]
+ if any(i in env['data_type_support'] for i in ['all', 'qasymm8_signed']) and ("qasymm8_signed" in operators[operator]["files"]["sve"]):
+ file_list_sve += operators[operator]["files"]["sve"]["qasymm8_signed"]
+ if any(i in env['data_type_support'] for i in ['all', 'qsymm16']) and ("qsymm16" in operators[operator]["files"]["sve"]):
+ file_list_sve += operators[operator]["files"]["sve"]["qsymm16"]
+ if any(i in env['data_type_support'] for i in ['all', 'integer']) and ("integer" in operators[operator]["files"]["sve"]):
+ file_list_sve += operators[operator]["files"]["sve"]["integer"]
+ if ("all" in operators[operator]["files"]["sve"]):
+ file_list_sve += operators[operator]["files"]["sve"]["all"]
+
+ return file_list, file_list_sve
+
arm_compute_env = env.Clone()
version_file = arm_compute_env.Command("src/core/arm_compute_version.embed", "", action=create_version_file)
arm_compute_env.AlwaysBuild(version_file)
@@ -208,6 +284,11 @@ if env['opencl']:
# Common backend files
core_files += filelist['common']
+# Initialize high priority core files
+core_files_hp = core_files
+core_files_sve_hp = []
+core_files = []
+
runtime_files += Glob('src/runtime/CPP/SingleThreadScheduler.cpp')
graph_files = Glob('src/graph/*.cpp')
@@ -220,16 +301,6 @@ if env['openmp']:
runtime_files += Glob('src/runtime/OMP/OMPScheduler.cpp')
if env['opencl']:
- cl_kernel_hp_files = ['src/core/gpu/cl/kernels/gemm/ClGemmHelpers.cpp',
- 'src/core/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeBifrost.cpp',
- 'src/core/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeMidgard.cpp',
- 'src/core/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeValhall.cpp',
- 'src/core/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedBifrost.cpp',
- 'src/core/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedValhall.cpp',
- 'src/core/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyBifrost.cpp',
- 'src/core/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.cpp',
- ]
- core_files += cl_kernel_hp_files
core_files += Glob('src/core/CL/*.cpp')
core_files += Glob('src/core/gpu/cl/*.cpp')
@@ -238,26 +309,24 @@ if env['opencl']:
runtime_files += Glob('src/runtime/CL/gemm/*.cpp')
runtime_files += Glob('src/runtime/CL/tuners/*.cpp')
runtime_files += Glob('src/runtime/gpu/cl/*.cpp')
- runtime_files += Glob('src/runtime/gpu/cl/operators/*.cpp')
runtime_files += Glob('src/runtime/CL/mlgo/*.cpp')
runtime_files += Glob('src/runtime/CL/gemm_auto_heuristics/*.cpp')
runtime_files += Glob('src/gpu/cl/*.cpp')
graph_files += Glob('src/graph/backends/CL/*.cpp')
- core_files += filelist['gpu']['core']['kernels']['high_priority'] + filelist['gpu']['core']['kernels']['all']
+ operators = filelist['gpu']['operators']
+ for operator in operators:
+ runtime_files += get_gpu_runtime_files(operator)
+ if "kernel" in operators[operator]["files"]:
+ core_files += operators[operator]["files"]["kernel"]
sve_o = []
core_files_sve = []
if env['neon']:
core_files += Glob('src/core/NEON/*.cpp')
- core_files += Glob('src/core/NEON/kernels/*.cpp')
-
- core_files += Glob('src/core/NEON/kernels/arm_gemm/*.cpp')
# build winograd/depthwise sources for either v7a / v8a
- core_files += Glob('src/core/NEON/kernels/convolution/*/*.cpp')
- core_files += Glob('src/core/NEON/kernels/convolution/winograd/*/*.cpp')
arm_compute_env.Append(CPPPATH = ["src/core/NEON/kernels/convolution/common/",
"src/core/NEON/kernels/convolution/winograd/",
"src/core/NEON/kernels/convolution/depthwise/",
@@ -267,64 +336,22 @@ if env['neon']:
graph_files += Glob('src/graph/backends/NEON/*.cpp')
- if env['estate'] == '32':
- core_files += Glob('src/core/NEON/kernels/arm_gemm/kernels/a32_*/*.cpp')
-
- if env['estate'] == '64':
- core_files += Glob('src/core/NEON/kernels/assembly/*.cpp')
- core_files += Glob('src/core/NEON/kernels/arm_conv/depthwise/*.cpp')
- core_files += Glob('src/core/NEON/kernels/arm_conv/depthwise/kernels/cpp_*/*.cpp')
- core_files += Glob('src/core/NEON/kernels/arm_conv/depthwise/interleaves/8b_mla.cpp')
- core_files += Glob('src/core/NEON/kernels/arm_conv/pooling/*.cpp')
- core_files += Glob('src/core/NEON/kernels/arm_conv/pooling/kernels/cpp_*/*.cpp')
-
- core_files += Glob('src/core/NEON/kernels/arm_gemm/kernels/a64_*/*.cpp')
- core_files += Glob('src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_*.cpp')
- core_files += Glob('src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_*/*.cpp')
- core_files += Glob('src/core/NEON/kernels/arm_conv/pooling/kernels/a64_*/*.cpp')
- if "sve" in env['arch'] or env['fat_binary']:
- core_files_sve += filelist['cpu']['core']['sve']['all']
- core_files_sve += Glob('src/core/NEON/kernels/arm_gemm/kernels/sve_*/*.cpp')
- core_files += Glob('src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_*.cpp')
- core_files += Glob('src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_*/*.cpp')
- core_files_sve += Glob('src/core/NEON/kernels/arm_conv/pooling/kernels/sve_*/*.cpp')
-
- if any(i in env['data_layout_support'] for i in ['all', 'nchw']):
- core_files += filelist['cpu']['core']['neon']['nchw']
-
- if any(i in env['data_type_support'] for i in ['all', 'fp16']):
- if not "sve" in env['arch'] or env['fat_binary']:
- core_files += filelist['cpu']['core']['neon']['fp16']
- if "sve" in env['arch'] or env['fat_binary']:
- core_files_sve += filelist['cpu']['core']['sve']['fp16']
- if any(i in env['data_type_support'] for i in ['all', 'fp32']):
- if not "sve" in env['arch'] or env['fat_binary']:
- core_files += filelist['cpu']['core']['neon']['fp32']
- if "sve" in env['arch'] or env['fat_binary']:
- core_files_sve += filelist['cpu']['core']['sve']['fp32']
- if any(i in env['data_type_support'] for i in ['all', 'qasymm8']):
- core_files += filelist['cpu']['core']['neon']['qasymm8']
- core_files_sve += filelist['cpu']['core']['sve']['qasymm8']
- if any(i in env['data_type_support'] for i in ['all', 'qasymm8_signed']):
- core_files += filelist['cpu']['core']['neon']['qasymm8_signed']
- core_files_sve += filelist['cpu']['core']['sve']['qasymm8_signed']
- if any(i in env['data_type_support'] for i in ['all', 'qsymm16']):
- core_files += filelist['cpu']['core']['neon']['qsymm16']
- core_files_sve += filelist['cpu']['core']['sve']['qsymm16']
- if any(i in env['data_type_support'] for i in ['all', 'integer']):
- if not "sve" in env['arch'] or env['fat_binary']:
- core_files += filelist['cpu']['core']['neon']['integer']
- if "sve" in env['arch'] or env['fat_binary']:
- core_files_sve += filelist['cpu']['core']['sve']['integer']
-
- core_files += Glob('src/core/cpu/kernels/*/*.cpp')
- core_files += filelist['cpu']['core']['kernels']['high_priority'] + filelist['cpu']['core']['kernels']['all']
+ # Load files based on user's options
+ operators = filelist['cpu']['operators']
+ for operator in operators:
+ runtime_files += get_cpu_runtime_files(operator)
+ if operator in filelist['cpu']['high_priority']:
+ file_list, file_list_sve = get_cpu_kernel_files(operator)
+ core_files_hp += file_list
+ core_files_sve_hp += file_list_sve
+ else:
+ file_list, file_list_sve = get_cpu_kernel_files(operator)
+ core_files += file_list
+ core_files_sve += file_list_sve
runtime_files += Glob('src/runtime/NEON/*.cpp')
runtime_files += Glob('src/runtime/NEON/functions/*.cpp')
- runtime_files += Glob('src/runtime/NEON/functions/assembly/*.cpp')
- runtime_files += filelist['cpu']['runtime']['all'] + filelist['cpu']['runtime']['operators']['high_priority'] \
- + filelist['cpu']['runtime']['operators']['all'] + filelist['cpu']['runtime']['operators']['internal']
+ runtime_files += filelist['cpu']['all']
bootcode_o = []
if env['os'] == 'bare_metal':
@@ -332,19 +359,27 @@ if env['os'] == 'bare_metal':
bootcode_o = build_bootcode_objs(bootcode_files)
Export('bootcode_o')
+high_priority_o = build_objs(core_files_hp)
+high_priority_sve_o = []
if (env['fat_binary']):
sve_o = build_sve_objs(core_files_sve)
- arm_compute_core_a = build_library('arm_compute_core-static', arm_compute_env, core_files + sve_o, static=True)
+ high_priority_sve_o = build_sve_objs(core_files_sve_hp)
+ arm_compute_core_a = build_library('arm_compute_core-static', arm_compute_env, core_files + high_priority_o + sve_o + high_priority_sve_o, static=True)
else:
- arm_compute_core_a = build_library('arm_compute_core-static', arm_compute_env, core_files + core_files_sve, static=True)
+ high_priority_o += build_objs(core_files_sve_hp)
+ arm_compute_core_a = build_library('arm_compute_core-static', arm_compute_env, core_files + core_files_sve + high_priority_o, static=True)
+arm_compute_core_hp_a = build_library('arm_compute_core_hp-static', arm_compute_env, high_priority_o + high_priority_sve_o, static=True)
Export('arm_compute_core_a')
+Export('arm_compute_core_hp_a')
if env['os'] != 'bare_metal' and not env['standalone']:
if (env['fat_binary']):
- arm_compute_core_so = build_library('arm_compute_core', arm_compute_env, core_files + sve_o, static=False)
+ arm_compute_core_so = build_library('arm_compute_core', arm_compute_env, core_files + high_priority_o + sve_o + high_priority_sve_o, static=False)
else:
- arm_compute_core_so = build_library('arm_compute_core', arm_compute_env, core_files + core_files_sve, static=False)
+ arm_compute_core_so = build_library('arm_compute_core', arm_compute_env, core_files + core_files_sve + high_priority_o, static=False)
+ arm_compute_core_so_hp = build_library('arm_compute_core_hp', arm_compute_env, high_priority_o + high_priority_sve_o, static=False)
Export('arm_compute_core_so')
+ Export('arm_compute_core_so_hp')
arm_compute_a = build_library('arm_compute-static', arm_compute_env, runtime_files, static=True, libs = [ arm_compute_core_a ])
Export('arm_compute_a')
diff --git a/SConstruct b/SConstruct
index f800d9d105..db6e3e0529 100644
--- a/SConstruct
+++ b/SConstruct
@@ -211,7 +211,7 @@ if 'v7a' in env['arch']:
elif 'v8' in env['arch']:
if 'sve2' in env['arch']:
env.Append(CXXFLAGS = ['-march=armv8.2-a+sve2+fp16+dotprod'])
- env.Append(CPPDEFINES = ['SVE2'])
+ env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_SVE2'])
elif 'sve' in env['arch']:
env.Append(CXXFLAGS = ['-march=armv8.2-a+sve+fp16+dotprod'])
elif 'armv8r64' in env['arch']:
@@ -221,10 +221,10 @@ elif 'v8' in env['arch']:
else:
env.Append(CXXFLAGS = ['-march=armv8-a'])
- if 'v8.6-a' in env['arch']:
- env.Append(CPPDEFINES = ['MMLA_INT8', 'V8P6', 'V8P6_BF', 'ARM_COMPUTE_FORCE_BF16'])
+ if 'v8.6-a' in env['arch'] or env['fat_binary']:
+ env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_I8MM', 'ARM_COMPUTE_ENABLE_BF16'])
if "disable_mmla_fp" not in env['custom_options']:
- env.Append(CPPDEFINES = ['MMLA_FP32'])
+ env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_SVEF32MM'])
elif 'x86' in env['arch']:
if env['estate'] == '32':
env.Append(CCFLAGS = ['-m32'])
@@ -257,9 +257,9 @@ if 'x86' not in env['arch']:
prefix = "aarch64-tizen-linux-gnu-"
if 'sve' in env['arch']:
- env.Append(CXXFLAGS = ['-DENABLE_SVE'])
+ env.Append(CXXFLAGS = ['-DENABLE_SVE', '-DARM_COMPUTE_ENABLE_SVE'])
else:
- env.Append(CXXFLAGS = ['-DENABLE_NEON'])
+ env.Append(CXXFLAGS = ['-DENABLE_NEON', '-DARM_COMPUTE_ENABLE_NEON'])
if env['build'] == 'native':
prefix = ""
@@ -308,8 +308,8 @@ if env['fat_binary']:
if env['arch'] != 'armv8.2-a':
print("Currently fat binary is only supported with armv8.2-a")
Exit(1)
- env.Append(CXXFLAGS = ['-DENABLE_SVE'])
- env.Append(CXXFLAGS = ['-DENABLE_NEON'])
+ env.Append(CXXFLAGS = ['-DENABLE_SVE', '-DARM_COMPUTE_ENABLE_SVE'])
+ env.Append(CXXFLAGS = ['-DENABLE_NEON', '-DARM_COMPUTE_ENABLE_NEON'])
if env['data_type_support']:
if any(i in env['data_type_support'] for i in ['all', 'fp16']):
diff --git a/arm_compute/core/CPP/CPPTypes.h b/arm_compute/core/CPP/CPPTypes.h
index 11891937d1..4484271d63 100644
--- a/arm_compute/core/CPP/CPPTypes.h
+++ b/arm_compute/core/CPP/CPPTypes.h
@@ -56,18 +56,23 @@ enum class CPUModel
class CPUInfo final
{
-public:
- /** Constructor */
+protected:
CPUInfo();
~CPUInfo();
- /** Disable copy constructor and assignment operator to avoid copying the vector of CPUs each time
- * CPUInfo is initialized once in the IScheduler and ThreadInfo will get a pointer to it.
+public:
+ /** Access the KernelLibrary singleton.
+ * This method has been deprecated and will be removed in future releases
+ * @return The KernelLibrary instance.
*/
- CPUInfo &operator=(const CPUInfo &cpuinfo) = delete;
- CPUInfo(const CPUInfo &cpuinfo) = delete;
- CPUInfo &operator=(CPUInfo &&cpuinfo) = default;
- CPUInfo(CPUInfo &&cpuinfo) = default;
+ static CPUInfo &get();
+
+ /* Delete move and copy constructors and assignment operator
+ s */
+ CPUInfo(CPUInfo const &) = delete; // Copy construct
+ CPUInfo(CPUInfo &&) = delete; // Move construct
+ CPUInfo &operator=(CPUInfo const &) = delete; // Copy assign
+ CPUInfo &operator=(CPUInfo &&) = delete; // Move assign
/** Checks if the cpu model supports fp16.
*
@@ -79,16 +84,41 @@ public:
* @return true of the cpu supports bf16, false otherwise
*/
bool has_bf16() const;
+ /** Checks if the cpu model supports bf16.
+ *
+ * @return true of the cpu supports bf16, false otherwise
+ */
+ bool has_svebf16() const;
/** Checks if the cpu model supports dot product.
*
* @return true of the cpu supports dot product, false otherwise
*/
bool has_dotprod() const;
+ /** Checks if the cpu model supports floating-point matrix multiplication.
+ *
+ * @return true of the cpu supports floating-point matrix multiplication, false otherwise
+ */
+ bool has_svef32mm() const;
+ /** Checks if the cpu model supports integer matrix multiplication.
+ *
+ * @return true of the cpu supports integer matrix multiplication, false otherwise
+ */
+ bool has_i8mm() const;
+ /** Checks if the cpu model supports integer matrix multiplication.
+ *
+ * @return true of the cpu supports integer matrix multiplication, false otherwise
+ */
+ bool has_svei8mm() const;
/** Checks if the cpu model supports sve.
*
* @return true of the cpu supports sve, false otherwise
*/
bool has_sve() const;
+ /** Checks if the cpu model supports sve2.
+ *
+ * @return true of the cpu supports sve2, false otherwise
+ */
+ bool has_sve2() const;
/** Gets the cpu model for a given cpuid.
*
* @param[in] cpuid the id of the cpu core to be retrieved,
@@ -111,17 +141,6 @@ public:
* @return the size of the L1 cache
*/
unsigned int get_L2_cache_size() const;
- /** Set fp16 support
- *
- * @param[in] fp16 whether the cpu supports fp16.
- */
- void set_fp16(const bool fp16);
- /** Set dot product support
- *
- * @param[in] dotprod whether the cpu supports dot product.
- */
- void set_dotprod(const bool dotprod);
-
/** Return the maximum number of CPUs present
*
* @return Number of CPUs
diff --git a/arm_compute/runtime/IScheduler.h b/arm_compute/runtime/IScheduler.h
index 417c62cc9c..3759fee8a8 100644
--- a/arm_compute/runtime/IScheduler.h
+++ b/arm_compute/runtime/IScheduler.h
@@ -215,8 +215,6 @@ protected:
*/
void schedule_common(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors);
- CPUInfo _cpu_info{};
-
private:
unsigned int _num_threads_hint = {};
};
diff --git a/filelist.json b/filelist.json
index e30426bd19..0e17944e1d 100644
--- a/filelist.json
+++ b/filelist.json
@@ -1,297 +1,1775 @@
{
- "common" : [
- "src/common/cpuinfo/target/CpuInfoSveUtils.cpp",
- "src/common/cpuinfo/CpuInfo.cpp",
- "src/common/cpuinfo/CpuModel.cpp",
- "src/common/cpuinfo/CpuIsaInfo.cpp",
- "src/common/utils/LegacySupport.cpp",
- "src/common/AllocatorWrapper.cpp",
- "src/common/ITensorV2.cpp",
- "src/common/TensorPack.cpp"
+ "common": [
+ "src/common/cpuinfo/CpuInfo.cpp",
+ "src/common/cpuinfo/CpuModel.cpp",
+ "src/common/cpuinfo/CpuIsaInfo.cpp",
+ "src/common/utils/LegacySupport.cpp",
+ "src/common/AllocatorWrapper.cpp",
+ "src/common/ITensorV2.cpp",
+ "src/common/TensorPack.cpp"
+ ],
+ "c_api": {
+ "cpu": [
+ "src/c/AclContext.cpp",
+ "src/c/AclQueue.cpp",
+ "src/c/AclTensor.cpp",
+ "src/c/AclTensorPack.cpp",
+ "src/c/AclVersion.cpp"
],
- "c_api" :
- {
- "cpu": [
- "src/c/AclContext.cpp",
- "src/c/AclQueue.cpp",
- "src/c/AclTensor.cpp",
- "src/c/AclTensorPack.cpp",
- "src/c/AclVersion.cpp"
+ "gpu": [
+ "src/c/cl/AclOpenClExt.cpp"
+ ]
+ },
+ "gpu": {
+ "high_priority": [
+ "Activation",
+ "DepthwiseConv2d",
+ "DirectConv2d",
+ "Permute",
+ "Pool2d",
+ "Reshape"
+ ],
+ "operators": {
+ "Activation": {
+ "files": {
+ "operator": [
+ "src/runtime/gpu/cl/operators/ClActivation.cpp"
+ ],
+ "kernel": [
+ "src/core/gpu/cl/kernels/ClActivationKernel.cpp"
+ ]
+ }
+ },
+ "Add": {
+ "files": {
+ "operator": [
+ "src/runtime/gpu/cl/operators/ClAdd.cpp"
+ ]
+ }
+ },
+ "Cast": {
+ "files": {
+ "operator": [
+ "src/runtime/gpu/cl/operators/ClCast.cpp"
+ ],
+ "kernel": [
+ "src/core/gpu/cl/kernels/ClCastKernel.cpp"
+ ]
+ }
+ },
+ "Concatenate": {
+ "files": {
+ "operator": [
+ "src/runtime/gpu/cl/operators/ClConcatenate.cpp"
+ ],
+ "kernel": [
+ "src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp",
+ "src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp",
+ "src/core/gpu/cl/kernels/ClWidthConcatenateKernel.cpp",
+ "src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp",
+ "src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp",
+ "src/core/gpu/cl/kernels/ClBatchConcatenateKernel.cpp"
+ ]
+ }
+ },
+ "DirectConv2d": {
+ "files": {
+ "operator": [
+ "src/runtime/gpu/cl/operators/ClDirectConv2d.cpp"
+ ],
+ "kernel": [
+ "src/core/gpu/cl/kernels/ClDirectConv2dKernel.cpp"
+ ]
+ }
+ },
+ "ConvertFullyConnectedWeights": {
+ "files": {
+ "operator": [
+ "src/runtime/gpu/cl/operators/ClConvertFullyConnectedWeights.cpp"
+ ],
+ "kernel": [
+ "src/core/gpu/cl/kernels/ClConvertFullyConnectedWeightsKernel.cpp"
+ ]
+ }
+ },
+ "Permute": {
+ "files": {
+ "operator": [
+ "src/runtime/gpu/cl/operators/ClPermute.cpp"
+ ],
+ "kernel": [
+ "src/core/gpu/cl/kernels/ClPermuteKernel.cpp"
+ ]
+ }
+ },
+ "Pool2d": {
+ "files": {
+ "operator": [
+ "src/runtime/gpu/cl/operators/ClPool2d.cpp"
+ ],
+ "kernel": [
+ "src/core/gpu/cl/kernels/ClPool2dKernel.cpp"
+ ]
+ }
+ },
+ "PRelu": {
+ "files": {
+ "operator": [
+ "src/runtime/gpu/cl/operators/ClPRelu.cpp"
+ ]
+ }
+ },
+ "Reshape": {
+ "files": {
+ "operator": [
+ "src/runtime/gpu/cl/operators/ClReshape.cpp"
+ ],
+ "kernel": [
+ "src/core/gpu/cl/kernels/ClReshapeKernel.cpp"
+ ]
+ }
+ },
+ "Copy": {
+ "files": {
+ "operator": [
+ "src/runtime/gpu/cl/operators/ClCopy.cpp"
+ ],
+ "kernel": [
+ "src/core/gpu/cl/kernels/ClCopyKernel.cpp"
+ ]
+ }
+ },
+ "Crop": {
+ "files": {
+ "operator": [
+ "src/runtime/gpu/cl/operators/ClCrop.cpp"
+ ],
+ "kernel": [
+ "src/core/gpu/cl/kernels/ClCropKernel.cpp"
+ ]
+ }
+ },
+ "Dequantize": {
+ "files": {
+ "operator": [
+ "src/runtime/gpu/cl/operators/ClDequantize.cpp"
+ ],
+ "kernel": [
+ "src/core/gpu/cl/kernels/ClDequantizeKernel.cpp"
+ ]
+ }
+ },
+ "Elementwise": {
+ "files": {
+ "operator": [
+ "src/runtime/gpu/cl/operators/ClElementwiseOperations.cpp"
+ ],
+ "kernel": [
+ "src/core/gpu/cl/kernels/ClElementwiseKernel.cpp"
+ ]
+ }
+ },
+ "ElementwiseUnary": {
+ "files": {
+ "operator": [
+ "src/runtime/gpu/cl/operators/ClElementwiseUnary.cpp"
+ ],
+ "kernel": [
+ "src/core/gpu/cl/kernels/ClElementwiseUnaryKernel.cpp"
+ ]
+ }
+ },
+ "Fill": {
+ "files": {
+ "operator": [
+ "src/runtime/gpu/cl/operators/ClFill.cpp"
+ ],
+ "kernel": [
+ "src/core/gpu/cl/kernels/ClFillKernel.cpp"
+ ]
+ }
+ },
+ "Flatten": {
+ "files": {
+ "operator": [
+ "src/runtime/gpu/cl/operators/ClFlatten.cpp"
+ ]
+ }
+ },
+ "Floor": {
+ "files": {
+ "operator": [
+ "src/runtime/gpu/cl/operators/ClFloor.cpp"
+ ],
+ "kernel": [
+ "src/core/gpu/cl/kernels/ClFloorKernel.cpp"
+ ]
+ }
+ },
+ "GEMM": {
+ "files": {
+ "operator": [
+ "src/runtime/gpu/cl/operators/ClGemm.cpp"
+ ],
+ "kernel": [
+ "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.cpp",
+ "src/core/gpu/cl/kernels/gemm/ClGemmHelpers.cpp",
+ "src/core/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeBifrost.cpp",
+ "src/core/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeMidgard.cpp",
+ "src/core/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeValhall.cpp",
+ "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.cpp",
+ "src/core/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedBifrost.cpp",
+ "src/core/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedValhall.cpp",
+ "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.cpp",
+ "src/core/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyBifrost.cpp",
+ "src/core/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.cpp",
+ "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyKernel.cpp",
+ "src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.cpp",
+ "src/core/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.cpp"
+ ]
+ }
+ },
+ "Mul": {
+ "files": {
+ "operator": [
+ "src/runtime/gpu/cl/operators/ClMul.cpp"
+ ],
+ "kernel": [
+ "src/core/gpu/cl/kernels/ClMulKernel.cpp"
+ ]
+ }
+ },
+ "Quantize": {
+ "files": {
+ "operator": [
+ "src/runtime/gpu/cl/operators/ClQuantize.cpp"
+ ],
+ "kernel": [
+ "src/core/gpu/cl/kernels/ClQuantizeKernel.cpp"
+ ]
+ }
+ },
+ "Scale": {
+ "files": {
+ "operator": [
+ "src/runtime/gpu/cl/operators/ClScale.cpp"
+ ],
+ "kernel": [
+ "src/core/gpu/cl/kernels/ClScaleKernel.cpp"
+ ]
+ }
+ },
+ "Softmax": {
+ "files": {
+ "operator": [
+ "src/runtime/gpu/cl/operators/ClSoftmax.cpp"
+ ],
+ "kernel": [
+ "src/core/gpu/cl/kernels/ClSoftmaxKernel.cpp"
+ ]
+ }
+ },
+ "Sub": {
+ "files": {
+ "operator": [
+ "src/runtime/gpu/cl/operators/ClSub.cpp"
+ ]
+ }
+ },
+ "Transpose": {
+ "files": {
+ "operator": [
+ "src/runtime/gpu/cl/operators/ClTranspose.cpp"
+ ],
+ "kernel": [
+ "src/core/gpu/cl/kernels/ClTransposeKernel.cpp"
+ ]
+ }
+ },
+ "GenerateProposals": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp"
+ ]
+ }
+ },
+ "ArgMinMax": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp"
+ ]
+ }
+ },
+ "BatchNormalization": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp"
+ ]
+ }
+ },
+ "BatchToSpace": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp"
+ ]
+ }
+ },
+ "Bitwise": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLBitwiseKernel.cpp"
+ ]
+ }
+ },
+ "BoundingBoxTransform": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp"
+ ]
+ }
+ },
+ "ChannelShuffleLayer": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp"
+ ]
+ }
+ },
+ "GEMMConv2d": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLCol2ImKernel.cpp",
+ "src/core/CL/kernels/CLIm2ColKernel.cpp"
+ ]
+ }
+ },
+ "Comparison": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLComparisonKernel.cpp"
+ ]
+ }
+ },
+ "DeconvolutionLayerUpsample": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp"
+ ]
+ }
+ },
+ "DeconvolutionReshapeOutput": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp"
+ ]
+ }
+ },
+ "DepthToSpace": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp"
+ ]
+ }
+ },
+ "DepthwiseConvolutionLayer3x3NCHW": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp"
+ ]
+ }
+ },
+ "DepthwiseConvolutionLayer3x3NHWC": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp"
+ ]
+ }
+ },
+ "DepthwiseConvolutionLayerNative": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp"
+ ]
+ }
+ },
+ "FFTDigitReverse": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLFFTDigitReverseKernel.cpp"
+ ]
+ }
+ },
+ "FFTRadixStage": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLFFTRadixStageKernel.cpp"
+ ]
+ }
+ },
+ "FFTScale": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLFFTScaleKernel.cpp"
+ ]
+ }
+ },
+ "FillBorder": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLFillBorderKernel.cpp"
+ ]
+ }
+ },
+ "FuseBatchNormalization": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp"
+ ]
+ }
+ },
+ "Gather": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLGatherKernel.cpp"
+ ]
+ }
+ },
+ "GEMMLowpMatrixMultiplyNative": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp"
+ ]
+ }
+ },
+ "GEMMLowpMatrixMultiplyReshaped": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp"
+ ]
+ }
+ },
+ "GEMMLowpMatrixMultiplyReshapedOnlyRHS": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp"
+ ]
+ }
+ },
+ "GEMMLowpOffsetContribution": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp"
+ ]
+ }
+ },
+ "GEMMLowpOffsetContributionOutputStage": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp"
+ ]
+ }
+ },
+ "GEMMLowpQuantizeDownInt32ScaleByFixedPoint": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.cpp"
+ ]
+ }
+ },
+ "GEMMLowpQuantizeDownInt32ScaleByFloat": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp"
+ ]
+ }
+ },
+ "GEMMLowpQuantizeDownInt32Scale": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp"
+ ]
+ }
+ },
+ "GEMMLowpReduction": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp"
+ ]
+ }
+ },
+ "InstanceNormalization": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp"
+ ]
+ }
+ },
+ "L2Normalize": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp"
+ ]
+ }
+ },
+ "LogicalNot": {
+ "files": {
+ "operator": [
+ "src/runtime/gpu/cl/operators/ClLogicalNot.cpp"
+ ]
+ }
+ },
+ "MaxUnpooling": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp"
+ ]
+ }
+ },
+ "MeanStdDevNormalization": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp"
+ ]
+ }
+ },
+ "MinMax": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLMinMaxLayerKernel.cpp"
+ ]
+ }
+ },
+ "Normalization": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLNormalizationLayerKernel.cpp"
+ ]
+ }
+ },
+ "NormalizePlanarYUV": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp"
+ ]
+ }
+ },
+ "Pad": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLPadLayerKernel.cpp"
+ ]
+ }
+ },
+ "PriorBox": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLPriorBoxLayerKernel.cpp"
+ ]
+ }
+ },
+ "QLSTMLayerNormalization": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp"
+ ]
+ }
+ },
+ "Range": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLRangeKernel.cpp"
+ ]
+ }
+ },
+ "ReductionOperation": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLReductionOperationKernel.cpp"
+ ]
+ }
+ },
+ "Remap": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLRemapKernel.cpp"
+ ]
+ }
+ },
+ "Reorg": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLReorgLayerKernel.cpp"
+ ]
+ }
+ },
+ "Reverse": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLReverseKernel.cpp"
+ ]
+ }
+ },
+ "ROIAlign": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLROIAlignLayerKernel.cpp"
+ ]
+ }
+ },
+ "ROIPooling": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLROIPoolingLayerKernel.cpp"
+ ]
+ }
+ },
+ "Select": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLSelectKernel.cpp"
+ ]
+ }
+ },
+ "SpaceToBatch": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp"
+ ]
+ }
+ },
+ "SpaceToDepth": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp"
+ ]
+ }
+ },
+ "Stack": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLStackLayerKernel.cpp"
+ ]
+ }
+ },
+ "StridedSlice": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLStridedSliceKernel.cpp"
+ ]
+ }
+ },
+ "Tile": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLTileKernel.cpp"
+ ]
+ }
+ },
+ "WeightsReshape": {
+ "files": {
+ "kernel": [
+ "src/core/CL/kernels/CLWeightsReshapeKernel.cpp"
+ ]
+ }
+ },
+ "WinogradConv2d": {
+ "files": {
+ "operator": [
+ "src/runtime/gpu/cl/operators/ClWinogradConv2d.cpp"
+ ],
+ "kernel": [
+ "src/core/gpu/cl/kernels/ClWinogradFilterTransformKernel.cpp",
+ "src/core/gpu/cl/kernels/ClWinogradInputTransformKernel.cpp",
+ "src/core/gpu/cl/kernels/ClWinogradOutputTransformKernel.cpp"
+ ]
+ }
+ }
+ }
+ },
+ "cpu": {
+ "all": [
+ "src/cpu/CpuContext.cpp",
+ "src/cpu/CpuQueue.cpp",
+ "src/cpu/CpuTensor.cpp"
+ ],
+ "high_priority": [
+ "Activation",
+ "DepthwiseConv2d",
+ "DirectConv2d",
+ "Permute",
+ "Pool2d",
+ "Reshape"
+ ],
+ "operators": {
+ "Activation": {
+ "files": {
+ "operator": [
+ "src/runtime/cpu/operators/CpuActivation.cpp"
+ ],
+ "kernel": [
+ "src/core/cpu/kernels/CpuActivationKernel.cpp"
+ ],
+ "sve": {
+ "fp32": [
+ "src/core/cpu/kernels/activation/sve/fp32.cpp"
+ ],
+ "fp16": [
+ "src/core/cpu/kernels/activation/sve/fp16.cpp"
+ ],
+ "qsymm16": [
+ "src/core/cpu/kernels/activation/sve/qsymm16.cpp"
+ ],
+ "qasymm8": [
+ "src/core/cpu/kernels/activation/sve/qasymm8.cpp"
+ ],
+ "qasymm8_signed": [
+ "src/core/cpu/kernels/activation/sve/qasymm8_signed.cpp"
+ ]
+ },
+ "neon": {
+ "fp32": [
+ "src/core/cpu/kernels/activation/neon/fp32.cpp"
+ ],
+ "fp16": [
+ "src/core/cpu/kernels/activation/neon/fp16.cpp"
+ ],
+ "qsymm16": [
+ "src/core/cpu/kernels/activation/neon/qsymm16.cpp"
+ ],
+ "qasymm8": [
+ "src/core/cpu/kernels/activation/neon/qasymm8.cpp"
+ ],
+ "qasymm8_signed": [
+ "src/core/cpu/kernels/activation/neon/qasymm8_signed.cpp"
+ ]
+ }
+ }
+ },
+ "Add": {
+ "files": {
+ "operator": [
+ "src/runtime/cpu/operators/CpuAdd.cpp"
+ ],
+ "kernel": [
+ "src/core/cpu/kernels/CpuAddKernel.cpp"
+ ],
+ "sve": {
+ "all": [
+ "src/core/cpu/kernels/add/sve/impl.cpp"
+ ],
+ "qsymm16": [
+ "src/core/cpu/kernels/add/sve/qsymm16.cpp"
+ ],
+ "qasymm8": [
+ "src/core/cpu/kernels/add/sve/qasymm8.cpp"
+ ],
+ "qasymm8_signed": [
+ "src/core/cpu/kernels/add/sve/qasymm8_signed.cpp"
+ ],
+ "integer": [
+ "src/core/cpu/kernels/add/sve/integer.cpp"
+ ]
+ },
+ "neon": {
+ "qsymm16": [
+ "src/core/cpu/kernels/add/neon/qsymm16.cpp"
+ ],
+ "qasymm8": [
+ "src/core/cpu/kernels/add/neon/qasymm8.cpp"
+ ],
+ "qasymm8_signed": [
+ "src/core/cpu/kernels/add/neon/qasymm8_signed.cpp"
+ ],
+ "integer": [
+ "src/core/cpu/kernels/add/neon/integer.cpp"
+ ]
+ }
+ }
+ },
+ "BatchNorm": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp"
+ ],
+ "sve": {
+ "fp32": [
+ "src/core/NEON/kernels/batchnormalization/impl/SVE/fp32.cpp"
+ ],
+ "fp16": [
+ "src/core/NEON/kernels/batchnormalization/impl/SVE/fp16.cpp"
+ ]
+ },
+ "neon": {
+ "fp32": [
+ "src/core/NEON/kernels/batchnormalization/impl/NEON/fp32.cpp"
+ ],
+ "fp16": [
+ "src/core/NEON/kernels/batchnormalization/impl/NEON/fp16.cpp"
+ ]
+ }
+ }
+ },
+ "BatchToSpace": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp"
+ ]
+ }
+ },
+ "BitwiseAnd": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NEBitwiseAndKernel.cpp"
+ ]
+ }
+ },
+ "BitwiseNot": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NEBitwiseNotKernel.cpp"
+ ]
+ }
+ },
+ "BitwiseOr": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NEBitwiseOrKernel.cpp"
+ ]
+ }
+ },
+ "BitwiseXor": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NEBitwiseXorKernel.cpp"
+ ]
+ }
+ },
+ "BoundingBoxTransform": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp"
+ ]
+ }
+ },
+ "ChannelShuffleLayer": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp"
+ ]
+ }
+ },
+ "Col2Im": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NECol2ImKernel.cpp"
+ ]
+ }
+ },
+ "Cast": {
+ "files": {
+ "operator": [
+ "src/runtime/cpu/operators/CpuCast.cpp"
+ ],
+ "kernel": [
+ "src/core/cpu/kernels/CpuCastKernel.cpp"
+ ]
+ }
+ },
+ "Concatenate": {
+ "files": {
+ "operator": [
+ "src/runtime/cpu/operators/CpuConcatenate.cpp"
+ ],
+ "kernel": [
+ "src/core/cpu/kernels/CpuConcatenateWidthKernel.cpp",
+ "src/core/cpu/kernels/CpuConcatenateBatchKernel.cpp",
+ "src/core/cpu/kernels/CpuConcatenateDepthKernel.cpp",
+ "src/core/cpu/kernels/CpuConcatenateHeightKernel.cpp"
+ ]
+ }
+ },
+ "ConvertFullyConnectedWeights": {
+ "files": {
+ "operator": [
+ "src/runtime/cpu/operators/CpuConvertFullyConnectedWeights.cpp"
+ ],
+ "kernel": [
+ "src/core/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.cpp"
+ ]
+ }
+ },
+ "ConvertQuantizedSignedness": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.cpp"
+ ]
+ }
+ },
+ "Copy": {
+ "files": {
+ "operator": [
+ "src/runtime/cpu/operators/CpuCopy.cpp"
+ ],
+ "kernel": [
+ "src/core/cpu/kernels/CpuCopyKernel.cpp"
+ ]
+ }
+ },
+ "Crop": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NECropKernel.cpp"
+ ]
+ }
+ },
+ "DepthwiseConv2d": {
+ "deps": [
+ "Activation",
+ "Permute"
+ ],
+ "files": {
+ "operator": [
+ "src/runtime/cpu/operators/CpuDepthwiseConv2d.cpp",
+ "src/runtime/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp",
+ "src/core/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.cpp"
+ ],
+ "kernel": [
+ "src/core/NEON/kernels/convolution/common/padding.cpp",
+ "src/core/NEON/kernels/convolution/common/qasymm8.cpp",
+ "src/core/NEON/kernels/convolution/common/qsymm8.cpp",
+ "src/core/NEON/kernels/convolution/common/utils.cpp",
+ "src/core/cpu/kernels/CpuDepthwiseConv2dNativeKernel.cpp"
+ ],
+ "sve": {
+ "all": [
+ "src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_s8q_3x3_dot.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_u8q_3x3_dot.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst_strided/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp"
+ ]
+ },
+ "neon": {
+ "estate64": [
+ "src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp16.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8q.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8s8u8q.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/interleaves/8b_mla.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_s8q_3x3_dot.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_u8q_3x3_dot.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_generic_output9_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_generic_output9_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp"
+ ]
+ }
+ }
+ },
+ "DepthToSpaceLayer": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp"
+ ]
+ }
+ },
+ "Dequantize": {
+ "files": {
+ "operator": [
+ "src/runtime/cpu/operators/CpuDequantize.cpp"
+ ],
+ "kernel": [
+ "src/core/cpu/kernels/CpuDequantizeKernel.cpp"
+ ]
+ }
+ },
+ "DirectConv2d": {
+ "deps": [
+ "Activation",
+ "FillBorder"
+ ],
+ "files": {
+ "operator": [
+ "src/runtime/cpu/operators/CpuDirectConv2d.cpp"
+ ],
+ "kernel": [
+ "src/core/cpu/kernels/CpuDirectConv2dKernel.cpp",
+ "src/core/cpu/kernels/CpuDirectConv2dOutputStageKernel.cpp"
+ ]
+ }
+ },
+ "Elementwise": {
+ "files": {
+ "operator": [
+ "src/runtime/cpu/operators/CpuElementwise.cpp"
+ ],
+ "kernel": [
+ "src/core/cpu/kernels/CpuElementwiseKernel.cpp"
+ ],
+ "sve": {
+ "all": [
+ "src/core/cpu/kernels/elementwise/sve/elementwise.cpp"
+ ]
+ }
+ }
+ },
+ "ElementwiseUnary": {
+ "files": {
+ "operator": [
+ "src/runtime/cpu/operators/CpuElementwiseUnary.cpp"
+ ],
+ "kernel": [
+ "src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp"
+ ],
+ "sve": {
+ "all": [
+ "src/core/cpu/kernels/elementwise/sve/elementwise_unary.cpp"
+ ]
+ }
+ }
+ },
+ "FFT1D": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NEFFTDigitReverseKernel.cpp",
+ "src/core/NEON/kernels/NEFFTRadixStageKernel.cpp",
+ "src/core/NEON/kernels/NEFFTScaleKernel.cpp"
+ ]
+ }
+ },
+ "FillBorder": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NEFillBorderKernel.cpp"
+ ]
+ }
+ },
+ "Flatten": {
+ "deps: ": [
+ "Reshape"
+ ],
+ "files": {
+ "operator": [
+ "src/runtime/cpu/operators/CpuFlatten.cpp"
+ ]
+ }
+ },
+ "Fill": {
+ "files": {
+ "operator": [
+ "src/runtime/cpu/operators/CpuFill.cpp"
+ ],
+ "kernel": [
+ "src/core/cpu/kernels/CpuFillKernel.cpp"
+ ]
+ }
+ },
+ "Floor": {
+ "files": {
+ "operator": [
+ "src/runtime/cpu/operators/CpuFloor.cpp"
+ ],
+ "kernel": [
+ "src/core/cpu/kernels/CpuFloorKernel.cpp"
+ ],
+ "neon": {
+ "fp32": [
+ "src/core/cpu/kernels/floor/neon/fp32.cpp"
+ ],
+ "fp16": [
+ "src/core/cpu/kernels/floor/neon/fp16.cpp"
+ ]
+ }
+ }
+ },
+ "FuseBatchNormalization": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp"
+ ]
+ }
+ },
+ "GEMM": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp",
+ "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp",
+ "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp",
+ "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp"
+ ]
+ }
+ },
+ "GEMMLowp": {
+ "deps": [
+ "GemmAssemblyDispatch"
+ ],
+ "files": {
+ "operator" : ["src/runtime/cpu/operators/CpuGemmLowpOutputStage.cpp"],
+ "kernel": [
+ "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp",
+ "src/core/cpu/kernels/CpuGemmLowpQuantizeDownInt32ScaleKernel.cpp",
+ "src/core/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp",
+ "src/core/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp",
+ "src/core/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp",
+ "src/core/NEON/kernels/NEGEMMLowpReductionKernel.cpp",
+ "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp",
+ "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp"
+ ]
+ }
+ },
+ "GEMMConvolution": {
+ "deps": [
+ "Activation",
+ "Col2Im",
+ "Reshape",
+ "Im2Col",
+ "GEMMLowpOffsetContributionOutputStage",
+ "ConvertQuantizedSignedness"
+ ],
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NEWeightsReshapeKernel.cpp"
+ ]
+ }
+ },
+ "GemmAssemblyDispatch": {
+ "files": {
+ "operator": [
+ "src/runtime/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp"
+ ],
+ "kernel": [
+ "src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp",
+ "src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp",
+ "src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp",
+ "src/core/NEON/kernels/arm_gemm/gemm_int16.cpp",
+ "src/core/NEON/kernels/arm_gemm/gemm_int8.cpp",
+ "src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp",
+ "src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp",
+ "src/core/NEON/kernels/arm_gemm/gemm_uint16.cpp",
+ "src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp",
+ "src/core/NEON/kernels/arm_gemm/mergeresults.cpp",
+ "src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp",
+ "src/core/NEON/kernels/arm_gemm/misc.cpp",
+ "src/core/NEON/kernels/arm_gemm/quantized.cpp",
+ "src/core/NEON/kernels/arm_gemm/rowsum_indirect_s8.cpp",
+ "src/core/NEON/kernels/arm_gemm/rowsum_indirect_u8.cpp"
+ ],
+ "neon": {
+ "estate32": [
+ "src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6/a53.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6/a55r1.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6/generic.cpp"
+ ],
+ "estate64": [
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s16_8x12/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/a55r1.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/x1.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u16_8x12/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/a55r1.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/x1.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_gemv_fp32_mla_32/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/a55r1.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/x1.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_dot_6x16/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_8x12/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_8x12/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_mmla_8x12/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a53.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55r1.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/x1.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x6/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemv_pretransposed/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_6x4/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_8x4/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_6x4/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_6x4/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_8x4/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_8x4/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_6x4/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_6x4/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/generic.cpp"
+ ]
+ },
+ "sve": {
+ "all": [
+ "src/core/NEON/kernels/arm_gemm/mergeresults-sve.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/sve_gemv_fp32_mla_8VL/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp"
+ ]
+ }
+ }
+ },
+ "GemmDirectConv2d": {
+ "deps": [
+ "Activation",
+ "GemmAssemblyDispatch",
+ "Permute"
+ ],
+ "files": {
+ "operator": [
+ "src/runtime/cpu/operators/CpuGemmDirectConv2d.cpp"
+ ]
+ }
+ },
+ "Mul": {
+ "files": {
+ "operator": [
+ "src/runtime/cpu/operators/CpuMul.cpp"
+ ],
+ "kernel": [
+ "src/core/cpu/kernels/CpuMulKernel.cpp"
+ ]
+ }
+ },
+ "Quantize": {
+ "files": {
+ "operator": [
+ "src/runtime/cpu/operators/CpuQuantize.cpp"
+ ],
+ "kernel": [
+ "src/core/cpu/kernels/CpuQuantizeKernel.cpp"
+ ]
+ }
+ },
+ "Reshape": {
+ "files": {
+ "operator": [
+ "src/runtime/cpu/operators/CpuReshape.cpp"
+ ],
+ "kernel": [
+ "src/core/cpu/kernels/CpuReshapeKernel.cpp"
+ ]
+ }
+ },
+ "Gather": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NEGatherKernel.cpp"
+ ]
+ }
+ },
+ "GenerateProposalsLayer": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp"
+ ]
+ }
+ },
+ "Im2Col": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NEIm2ColKernel.cpp"
+ ]
+ }
+ },
+ "InstanceNormalization": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp"
+ ]
+ }
+ },
+ "L2Normalize": {
+ "deps": [
+ "Reduction"
+ ],
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp"
+ ]
+ }
+ },
+ "Logical": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NELogicalKernel.cpp"
+ ]
+ }
+ },
+ "MaxUnpooling": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp"
+ ]
+ }
+ },
+ "MeanStdDevNormalization": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp"
+ ]
+ }
+ },
+ "MinMax": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NEMinMaxLayerKernel.cpp"
+ ]
+ }
+ },
+ "Normalization": {
+ "deps": [
+ "PixelWiseMultiplication"
+ ],
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NENormalizationLayerKernel.cpp"
+ ]
+ }
+ },
+ "Pad": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NEPadLayerKernel.cpp"
+ ]
+ }
+ },
+ "Permute": {
+ "files": {
+ "operator": [
+ "src/runtime/cpu/operators/CpuPermute.cpp"
+ ],
+ "kernel": [
+ "src/core/cpu/kernels/CpuPermuteKernel.cpp"
+ ]
+ }
+ },
+ "Pool2d": {
+ "files": {
+ "operator": [
+ "src/runtime/cpu/operators/CpuPool2d.cpp"
+ ],
+ "kernel": [
+ "src/core/cpu/kernels/CpuPool2dKernel.cpp",
+ "src/core/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/cpp_nhwc_1x1_stride_any_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/pooling_fp16.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/pooling_fp32.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/pooling_s8.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/pooling_s8q.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/pooling_u8.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/pooling_u8q.cpp"
+ ],
+ "neon": {
+ "nchw": [
+ "src/core/cpu/kernels/pooling/neon/nchw/all.cpp"
+ ],
+ "fp32": [
+ "src/core/cpu/kernels/pooling/neon/fp32.cpp"
+ ],
+ "fp16": [
+ "src/core/cpu/kernels/pooling/neon/fp16.cpp"
+ ],
+ "qasymm8": [
+ "src/core/cpu/kernels/pooling/neon/qasymm8.cpp"
+ ],
+ "qasymm8_signed": [
+ "src/core/cpu/kernels/pooling/neon/qasymm8_signed.cpp"
+ ],
+ "estate64": [
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_avg_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_avg_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_max_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_avg_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_avg_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_max_generic_depthfirst/generic.cpp"
+ ]
+ },
+ "sve": {
+ "all": [
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_avg_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_avg_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_max_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_avg_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_avg_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_max_generic_depthfirst/generic.cpp"
+ ]
+ }
+ }
+ },
+ "PriorBox": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp"
+ ]
+ }
+ },
+ "QLSTMLayerNormalization": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.cpp"
+ ]
+ }
+ },
+ "Range": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NERangeKernel.cpp"
+ ]
+ }
+ },
+ "ReductionOperation": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NEReductionOperationKernel.cpp"
+ ]
+ }
+ },
+ "Remap": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NERemapKernel.cpp"
+ ]
+ }
+ },
+ "Reorg": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NEReorgLayerKernel.cpp"
+ ]
+ }
+ },
+ "Reverse": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NEReverseKernel.cpp"
+ ]
+ }
+ },
+ "ROIAlign": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NEROIAlignLayerKernel.cpp"
+ ]
+ }
+ },
+ "ROIPooling": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp"
+ ]
+ }
+ },
+ "Select": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NESelectKernel.cpp"
+ ]
+ }
+ },
+ "SpaceToBatch": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp"
+ ]
+ }
+ },
+ "SpaceToDepth": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp"
+ ]
+ }
+ },
+ "Stack": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NEStackLayerKernel.cpp"
+ ]
+ }
+ },
+ "StridedSlice": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NEStridedSliceKernel.cpp"
+ ]
+ }
+ },
+ "Scale": {
+ "files": {
+ "operator": [
+ "src/runtime/cpu/operators/CpuScale.cpp"
+ ],
+ "kernel": [
+ "src/core/cpu/kernels/CpuScaleKernel.cpp"
+ ],
+ "sve": {
+ "fp32": [
+ "src/core/cpu/kernels/scale/sve/fp32.cpp"
+ ],
+ "fp16": [
+ "src/core/cpu/kernels/scale/sve/fp16.cpp"
+ ],
+ "qasymm8": [
+ "src/core/cpu/kernels/scale/sve/qasymm8.cpp"
+ ],
+ "qasymm8_signed": [
+ "src/core/cpu/kernels/scale/sve/qasymm8_signed.cpp"
+ ],
+ "integer": [
+ "src/core/cpu/kernels/scale/sve/integer.cpp"
+ ]
+ },
+ "neon": {
+ "fp16": [
+ "src/core/cpu/kernels/scale/neon/fp16.cpp"
+ ],
+ "qasymm8": [
+ "src/core/cpu/kernels/scale/neon/qasymm8.cpp"
+ ],
+ "qasymm8_signed": [
+ "src/core/cpu/kernels/scale/neon/qasymm8_signed.cpp"
+ ],
+ "integer": [
+ "src/core/cpu/kernels/scale/neon/integer.cpp"
+ ]
+ }
+ }
+ },
+ "Softmax": {
+ "deps": [
+ "Permute"
+ ],
+ "files": {
+ "operator": [
+ "src/runtime/cpu/operators/CpuSoftmax.cpp"
+ ],
+ "kernel": [
+ "src/core/cpu/kernels/CpuSoftmaxKernel.cpp"
+ ],
+ "sve": {
+ "all": [
+ "src/core/cpu/kernels/softmax/impl/sve/impl.cpp"
+ ]
+ }
+ }
+ },
+ "Sub": {
+ "files": {
+ "operator": [
+ "src/runtime/cpu/operators/CpuSub.cpp"
+ ],
+ "kernel": [
+ "src/core/cpu/kernels/CpuSubKernel.cpp"
+ ],
+ "neon": {
+ "qsymm16": [
+ "src/core/cpu/kernels/sub/neon/qsymm16.cpp"
+ ],
+ "qasymm8": [
+ "src/core/cpu/kernels/sub/neon/qasymm8.cpp"
+ ],
+ "qasymm8_signed": [
+ "src/core/cpu/kernels/sub/neon/qasymm8_signed.cpp"
+ ],
+ "integer": [
+ "src/core/cpu/kernels/sub/neon/integer.cpp"
+ ]
+ }
+ }
+ },
+ "Transpose": {
+ "files": {
+ "operator": [
+ "src/runtime/cpu/operators/CpuTranspose.cpp"
+ ],
+ "kernel": [
+ "src/core/cpu/kernels/CpuTransposeKernel.cpp"
+ ]
+ }
+ },
+ "Tile": {
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NETileKernel.cpp"
+ ]
+ }
+ },
+ "WinogradConvolution": {
+ "deps": [
+ "Activation",
+ "Permute"
],
- "gpu": [
- "src/c/cl/AclOpenClExt.cpp"
- ]
- },
-
- "gpu" :
- {
- "core" :
- {
- "kernels" :
- {
- "high_priority" : [
- "src/core/gpu/cl/kernels/ClActivationKernel.cpp",
- "src/core/gpu/cl/kernels/ClDirectConv2dKernel.cpp",
- "src/core/gpu/cl/kernels/ClPermuteKernel.cpp",
- "src/core/gpu/cl/kernels/ClPool2dKernel.cpp",
- "src/core/gpu/cl/kernels/ClReshapeKernel.cpp"
- ],
- "all" : [
- "src/core/gpu/cl/kernels/ClBatchConcatenateKernel.cpp",
- "src/core/gpu/cl/kernels/ClCastKernel.cpp",
- "src/core/gpu/cl/kernels/ClConvertFullyConnectedWeightsKernel.cpp",
- "src/core/gpu/cl/kernels/ClCopyKernel.cpp",
- "src/core/gpu/cl/kernels/ClCropKernel.cpp",
- "src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp",
- "src/core/gpu/cl/kernels/ClDequantizeKernel.cpp",
- "src/core/gpu/cl/kernels/ClElementwiseKernel.cpp",
- "src/core/gpu/cl/kernels/ClElementwiseUnaryKernel.cpp",
- "src/core/gpu/cl/kernels/ClFillKernel.cpp",
- "src/core/gpu/cl/kernels/ClFloorKernel.cpp",
- "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.cpp",
- "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.cpp",
- "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.cpp",
- "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyKernel.cpp",
- "src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.cpp",
- "src/core/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.cpp",
- "src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp",
- "src/core/gpu/cl/kernels/ClMulKernel.cpp",
- "src/core/gpu/cl/kernels/ClQuantizeKernel.cpp",
- "src/core/gpu/cl/kernels/ClScaleKernel.cpp",
- "src/core/gpu/cl/kernels/ClSoftmaxKernel.cpp",
- "src/core/gpu/cl/kernels/ClTransposeKernel.cpp",
- "src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp",
- "src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp",
- "src/core/gpu/cl/kernels/ClWidthConcatenateKernel.cpp",
- "src/core/gpu/cl/kernels/ClWinogradFilterTransformKernel.cpp",
- "src/core/gpu/cl/kernels/ClWinogradInputTransformKernel.cpp",
- "src/core/gpu/cl/kernels/ClWinogradOutputTransformKernel.cpp",
- "src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp",
- "src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp",
- "src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp",
- "src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp",
- "src/core/CL/kernels/CLBitwiseKernel.cpp",
- "src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp",
- "src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp",
- "src/core/CL/kernels/CLCol2ImKernel.cpp",
- "src/core/CL/kernels/CLComparisonKernel.cpp",
- "src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp",
- "src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp",
- "src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp",
- "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp",
- "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp",
- "src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp",
- "src/core/CL/kernels/CLFFTDigitReverseKernel.cpp",
- "src/core/CL/kernels/CLFFTRadixStageKernel.cpp",
- "src/core/CL/kernels/CLFFTScaleKernel.cpp",
- "src/core/CL/kernels/CLFillBorderKernel.cpp",
- "src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp",
- "src/core/CL/kernels/CLGatherKernel.cpp",
- "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp",
- "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp",
- "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp",
- "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp",
- "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp",
- "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.cpp",
- "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp",
- "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp",
- "src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp",
- "src/core/CL/kernels/CLIm2ColKernel.cpp",
- "src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp",
- "src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp",
- "src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp",
- "src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp",
- "src/core/CL/kernels/CLMinMaxLayerKernel.cpp",
- "src/core/CL/kernels/CLNormalizationLayerKernel.cpp",
- "src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp",
- "src/core/CL/kernels/CLPadLayerKernel.cpp",
- "src/core/CL/kernels/CLPriorBoxLayerKernel.cpp",
- "src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp",
- "src/core/CL/kernels/CLRangeKernel.cpp",
- "src/core/CL/kernels/CLReductionOperationKernel.cpp",
- "src/core/CL/kernels/CLRemapKernel.cpp",
- "src/core/CL/kernels/CLReorgLayerKernel.cpp",
- "src/core/CL/kernels/CLReverseKernel.cpp",
- "src/core/CL/kernels/CLROIAlignLayerKernel.cpp",
- "src/core/CL/kernels/CLROIPoolingLayerKernel.cpp",
- "src/core/CL/kernels/CLSelectKernel.cpp",
- "src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp",
- "src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp",
- "src/core/CL/kernels/CLStackLayerKernel.cpp",
- "src/core/CL/kernels/CLStridedSliceKernel.cpp",
- "src/core/CL/kernels/CLTileKernel.cpp",
- "src/core/CL/kernels/CLWeightsReshapeKernel.cpp"
- ]
- }
- }
- },
- "cpu" :
- {
- "runtime" :
- {
- "all" : [
- "src/cpu/CpuContext.cpp",
- "src/cpu/CpuQueue.cpp",
- "src/cpu/CpuTensor.cpp"
- ],
- "operators" :
- {
- "high_priority" : [
- "src/runtime/cpu/operators/CpuActivation.cpp",
- "src/runtime/cpu/operators/CpuDepthwiseConv2d.cpp",
- "src/runtime/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp",
- "src/runtime/cpu/operators/CpuDirectConv2d.cpp",
- "src/runtime/cpu/operators/CpuPermute.cpp",
- "src/runtime/cpu/operators/CpuPool2d.cpp"
- ],
- "internal" : [
- "src/runtime/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp"
- ],
- "all" : [
- "src/runtime/cpu/operators/CpuAdd.cpp",
- "src/runtime/cpu/operators/CpuCast.cpp",
- "src/runtime/cpu/operators/CpuConcatenate.cpp",
- "src/runtime/cpu/operators/CpuConvertFullyConnectedWeights.cpp",
- "src/runtime/cpu/operators/CpuCopy.cpp",
- "src/runtime/cpu/operators/CpuDequantize.cpp",
- "src/runtime/cpu/operators/CpuElementwise.cpp",
- "src/runtime/cpu/operators/CpuElementwiseUnary.cpp",
- "src/runtime/cpu/operators/CpuFill.cpp",
- "src/runtime/cpu/operators/CpuFlatten.cpp",
- "src/runtime/cpu/operators/CpuFloor.cpp",
- "src/runtime/cpu/operators/CpuGemmDirectConv2d.cpp",
- "src/runtime/cpu/operators/CpuGemmLowpOutputStage.cpp",
- "src/runtime/cpu/operators/CpuMul.cpp",
- "src/runtime/cpu/operators/CpuQuantize.cpp",
- "src/runtime/cpu/operators/CpuReshape.cpp",
- "src/runtime/cpu/operators/CpuScale.cpp",
- "src/runtime/cpu/operators/CpuSoftmax.cpp",
- "src/runtime/cpu/operators/CpuSub.cpp",
- "src/runtime/cpu/operators/CpuTranspose.cpp"
- ]
- }
- },
- "core" :
- {
- "kernels" :
- {
- "high_priority" : [
- "src/core/cpu/kernels/CpuActivationKernel.cpp",
- "src/core/cpu/kernels/CpuDepthwiseConv2dNativeKernel.cpp",
- "src/core/cpu/kernels/CpuDirectConv2dKernel.cpp",
- "src/core/cpu/kernels/CpuDirectConv2dOutputStageKernel.cpp",
- "src/core/cpu/kernels/CpuPermuteKernel.cpp",
- "src/core/cpu/kernels/CpuPool2dKernel.cpp",
- "src/core/cpu/kernels/CpuReshapeKernel.cpp"
- ],
- "all" : [
- "src/core/cpu/kernels/CpuAddKernel.cpp",
- "src/core/cpu/kernels/CpuCastKernel.cpp",
- "src/core/cpu/kernels/CpuConcatenateBatchKernel.cpp",
- "src/core/cpu/kernels/CpuConcatenateDepthKernel.cpp",
- "src/core/cpu/kernels/CpuConcatenateHeightKernel.cpp",
- "src/core/cpu/kernels/CpuConcatenateWidthKernel.cpp",
- "src/core/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.cpp",
- "src/core/cpu/kernels/CpuCopyKernel.cpp",
- "src/core/cpu/kernels/CpuDequantizeKernel.cpp",
- "src/core/cpu/kernels/CpuElementwiseKernel.cpp",
- "src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp",
- "src/core/cpu/kernels/CpuFillKernel.cpp",
- "src/core/cpu/kernels/CpuFloorKernel.cpp",
- "src/core/cpu/kernels/CpuGemmLowpQuantizeDownInt32ScaleKernel.cpp",
- "src/core/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp",
- "src/core/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp",
- "src/core/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp",
- "src/core/cpu/kernels/CpuMulKernel.cpp",
- "src/core/cpu/kernels/CpuQuantizeKernel.cpp",
- "src/core/cpu/kernels/CpuScaleKernel.cpp",
- "src/core/cpu/kernels/CpuSoftmaxKernel.cpp",
- "src/core/cpu/kernels/CpuSubKernel.cpp",
- "src/core/cpu/kernels/CpuTransposeKernel.cpp"
- ]
- },
-
- "sve" :
- {
- "all" : [
- "src/core/cpu/kernels/add/sve/impl.cpp",
- "src/core/cpu/kernels/softmax/impl/sve/impl.cpp",
- "src/core/cpu/kernels/elementwise/sve/elementwise.cpp",
- "src/core/cpu/kernels/elementwise/sve/elementwise_unary.cpp"
- ],
- "fp32" : [
- "src/core/cpu/kernels/activation/sve/fp32.cpp",
- "src/core/cpu/kernels/scale/sve/fp32.cpp",
- "src/core/NEON/kernels/batchnormalization/impl/SVE/fp32.cpp"
- ],
- "fp16" : [
- "src/core/cpu/kernels/activation/sve/fp16.cpp",
- "src/core/cpu/kernels/scale/sve/fp16.cpp",
- "src/core/NEON/kernels/batchnormalization/impl/SVE/fp16.cpp"
- ],
- "qsymm16" : [
- "src/core/cpu/kernels/activation/sve/qsymm16.cpp",
- "src/core/cpu/kernels/add/sve/qsymm16.cpp"
- ],
- "qasymm8" : [
- "src/core/cpu/kernels/activation/sve/qasymm8.cpp",
- "src/core/cpu/kernels/add/sve/qasymm8.cpp",
- "src/core/cpu/kernels/scale/sve/qasymm8.cpp"
- ],
- "qasymm8_signed" : [
- "src/core/cpu/kernels/activation/sve/qasymm8_signed.cpp",
- "src/core/cpu/kernels/add/sve/qasymm8_signed.cpp",
- "src/core/cpu/kernels/scale/sve/qasymm8_signed.cpp"
- ],
- "integer" : [
- "src/core/cpu/kernels/add/sve/integer.cpp",
- "src/core/cpu/kernels/scale/sve/integer.cpp"
- ]
- },
-
- "neon":
- {
- "nchw" : [
- "src/core/cpu/kernels/pooling/neon/nchw/all.cpp"
- ],
- "fp32" : [
- "src/core/cpu/kernels/activation/neon/fp32.cpp",
- "src/core/cpu/kernels/floor/neon/fp32.cpp",
- "src/core/cpu/kernels/pooling/neon/fp32.cpp",
- "src/core/NEON/kernels/batchnormalization/impl/NEON/fp32.cpp"
- ],
- "fp16" : [
- "src/core/cpu/kernels/activation/neon/fp16.cpp",
- "src/core/cpu/kernels/floor/neon/fp16.cpp",
- "src/core/cpu/kernels/pooling/neon/fp16.cpp",
- "src/core/cpu/kernels/scale/neon/fp16.cpp",
- "src/core/NEON/kernels/batchnormalization/impl/NEON/fp16.cpp"
- ],
- "qsymm16" : [
- "src/core/cpu/kernels/activation/neon/qsymm16.cpp",
- "src/core/cpu/kernels/add/neon/qsymm16.cpp",
- "src/core/cpu/kernels/sub/neon/qsymm16.cpp"
-
- ],
- "qasymm8" : [
- "src/core/cpu/kernels/activation/neon/qasymm8.cpp",
- "src/core/cpu/kernels/add/neon/qasymm8.cpp",
- "src/core/cpu/kernels/pooling/neon/qasymm8.cpp",
- "src/core/cpu/kernels/scale/neon/qasymm8.cpp",
- "src/core/cpu/kernels/sub/neon/qasymm8.cpp"
- ],
- "qasymm8_signed" : [
- "src/core/cpu/kernels/activation/neon/qasymm8_signed.cpp",
- "src/core/cpu/kernels/add/neon/qasymm8_signed.cpp",
- "src/core/cpu/kernels/pooling/neon/qasymm8_signed.cpp",
- "src/core/cpu/kernels/scale/neon/qasymm8_signed.cpp",
- "src/core/cpu/kernels/sub/neon/qasymm8_signed.cpp"
- ],
- "integer" : [
- "src/core/cpu/kernels/sub/neon/integer.cpp",
- "src/core/cpu/kernels/add/neon/integer.cpp"
- ]
- }
+ "files": {
+ "kernel": [
+ "src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp",
+ "src/core/NEON/kernels/convolution/winograd/padding.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_1x8_fp32_fp32_integers.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_4x4_fp16_fp16_integers.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_4x4_fp32_fp32_integers.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_6x6_fp16_fp16_integers.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_6x6_fp32_fp32_integers.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_2_7_fp32_fp32_integers.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_2x2_3x3_fp32_fp32_integers.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_2x2_5x5_fp32_fp32_integers.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_4_5_fp32_fp32_integers.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_4x4_3x3_fp16_fp16_integers.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_4x4_3x3_fp32_fp32_integers.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_6_3_fp32_fp32_integers.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_2_7_fp32_fp32_integers.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_2x2_3x3_fp32_fp32_integers.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_2x2_5x5_fp32_fp32_integers.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_4_5_fp32_fp32_integers.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_4x4_3x3_fp16_fp16_integers.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_4x4_3x3_fp32_fp32_integers.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_6_3_fp32_fp32_integers.cpp"
+ ]
}
+ }
}
+ }
} \ No newline at end of file
diff --git a/src/common/cpuinfo/CpuInfo.cpp b/src/common/cpuinfo/CpuInfo.cpp
index 436e7ea803..32504acc44 100644
--- a/src/common/cpuinfo/CpuInfo.cpp
+++ b/src/common/cpuinfo/CpuInfo.cpp
@@ -25,7 +25,6 @@
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Log.h"
-#include "src/common/cpuinfo/target/CpuInfoSveUtils.h"
#include "support/StringSupport.h"
#include "support/ToolchainSupport.h"
@@ -260,6 +259,20 @@ int get_max_cpus()
return max_cpus;
}
#endif /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */
+
+#if defined(BARE_METAL) && defined(__aarch64__)
+uint64_t get_sve_feature_reg()
+{
+ uint64_t svefr0 = 0;
+ __asm __volatile(
+ ".inst 0xd5380483 // mrs x3, ID_AA64ZFR0_EL1\n"
+ "MOV %0, X3"
+ : "=r"(svefr0)
+ :
+ : "x3");
+ return svefr0;
+}
+#endif /* defined(BARE_METAL) && defined(__aarch64__) */
} // namespace
CpuInfo::CpuInfo(CpuIsaInfo isa, std::vector<CpuModel> cpus)
diff --git a/src/common/cpuinfo/CpuInfo.h b/src/common/cpuinfo/CpuInfo.h
index f3056d2faf..c04c9f4ec8 100644
--- a/src/common/cpuinfo/CpuInfo.h
+++ b/src/common/cpuinfo/CpuInfo.h
@@ -79,17 +79,25 @@ public:
{
return _isa.bf16;
}
+ bool has_svebf16() const
+ {
+ return _isa.svebf16;
+ }
bool has_dotprod() const
{
return _isa.dot;
}
- bool has_immla() const
+ bool has_i8mm() const
+ {
+ return _isa.i8mm;
+ }
+ bool has_svei8mm() const
{
- return _isa.immla;
+ return _isa.svei8mm;
}
- bool has_fmmla() const
+ bool has_svef32mm() const
{
- return _isa.fmmla;
+ return _isa.svef32mm;
}
CpuModel cpu_model(uint32_t cpuid) const;
diff --git a/src/common/cpuinfo/CpuIsaInfo.cpp b/src/common/cpuinfo/CpuIsaInfo.cpp
index d99f9aec29..14466ef4e7 100644
--- a/src/common/cpuinfo/CpuIsaInfo.cpp
+++ b/src/common/cpuinfo/CpuIsaInfo.cpp
@@ -90,6 +90,10 @@ void decode_hwcaps(CpuIsaInfo &isa, const uint32_t hwcaps, const uint32_t hwcaps
{
isa.bf16 = true;
}
+ if(hwcaps2 & ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEBF16)
+ {
+ isa.svebf16 = true;
+ }
// Instruction extensions
if(hwcaps & ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDDP)
@@ -98,11 +102,15 @@ void decode_hwcaps(CpuIsaInfo &isa, const uint32_t hwcaps, const uint32_t hwcaps
}
if(hwcaps2 & ARM_COMPUTE_CPU_FEATURE_HWCAP2_I8MM)
{
- isa.immla = true;
+ isa.i8mm = true;
+ }
+ if(hwcaps2 & ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEI8MM)
+ {
+ isa.svei8mm = true;
}
if(hwcaps2 & ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEF32MM)
{
- isa.fmmla = true;
+ isa.svef32mm = true;
}
}
#else /* defined(__aarch64__) */
@@ -133,6 +141,10 @@ void decode_regs(CpuIsaInfo &isa, const uint64_t isar0, const uint64_t isar1, co
{
isa.bf16 = true;
}
+ if((svefr0 >> 20) & 0xf)
+ {
+ isa.svebf16 = true;
+ }
// Instruction extensions
if((isar0 >> 44) & 0xf)
@@ -141,11 +153,15 @@ void decode_regs(CpuIsaInfo &isa, const uint64_t isar0, const uint64_t isar1, co
}
if((isar1 >> 48) & 0xf)
{
- isa.immla = true;
+ isa.i8mm = true;
+ }
+ if((svefr0 >> 44) & 0xf)
+ {
+ isa.svei8mm = true;
}
if((svefr0 >> 52) & 0xf)
{
- isa.fmmla = true;
+ isa.svef32mm = true;
}
}
diff --git a/src/common/cpuinfo/CpuIsaInfo.h b/src/common/cpuinfo/CpuIsaInfo.h
index 1125f766dd..a2aace1b80 100644
--- a/src/common/cpuinfo/CpuIsaInfo.h
+++ b/src/common/cpuinfo/CpuIsaInfo.h
@@ -44,11 +44,13 @@ struct CpuIsaInfo
/* Data-type extensions support */
bool fp16{ false };
bool bf16{ false };
+ bool svebf16{ false };
/* Instruction support */
bool dot{ false };
- bool immla{ false };
- bool fmmla{ false };
+ bool i8mm{ false };
+ bool svei8mm{ false };
+ bool svef32mm{ false };
};
/** Identify ISA related information through system information
diff --git a/src/core/CPP/CPPTypes.cpp b/src/core/CPP/CPPTypes.cpp
index edcb9cb1ba..44cd000ada 100644
--- a/src/core/CPP/CPPTypes.cpp
+++ b/src/core/CPP/CPPTypes.cpp
@@ -36,6 +36,12 @@ struct CPUInfo::Impl
unsigned int L2_cache_size = 262144;
};
+CPUInfo &CPUInfo::get()
+{
+ static CPUInfo _cpuinfo;
+ return _cpuinfo;
+}
+
CPUInfo::CPUInfo()
: _impl(std::make_unique<Impl>())
{
@@ -49,11 +55,6 @@ unsigned int CPUInfo::get_cpu_num() const
return _impl->info.num_cpus();
}
-bool CPUInfo::has_sve() const
-{
- return _impl->info.has_sve();
-}
-
bool CPUInfo::has_fp16() const
{
return _impl->info.has_fp16();
@@ -64,11 +65,41 @@ bool CPUInfo::has_bf16() const
return _impl->info.has_bf16();
}
+bool CPUInfo::has_svebf16() const
+{
+ return _impl->info.has_svebf16();
+}
+
bool CPUInfo::has_dotprod() const
{
return _impl->info.has_dotprod();
}
+bool CPUInfo::has_svef32mm() const
+{
+ return _impl->info.has_svef32mm();
+}
+
+bool CPUInfo::has_i8mm() const
+{
+ return _impl->info.has_i8mm();
+}
+
+bool CPUInfo::has_svei8mm() const
+{
+ return _impl->info.has_svei8mm();
+}
+
+bool CPUInfo::has_sve() const
+{
+ return _impl->info.has_sve();
+}
+
+bool CPUInfo::has_sve2() const
+{
+ return _impl->info.has_sve2();
+}
+
CPUModel CPUInfo::get_cpu_model() const
{
return _impl->info.cpu_model();
diff --git a/src/core/NEON/SVEAsymm.h b/src/core/NEON/SVEAsymm.h
index 4b0ecd9eea..40b8e64b67 100644
--- a/src/core/NEON/SVEAsymm.h
+++ b/src/core/NEON/SVEAsymm.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_SVEASYMM_H
#define ARM_COMPUTE_SVEASYMM_H
-#if defined(__ARM_FEATURE_SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
#include "src/core/NEON/SVEMath.h"
#include <arm_sve.h>
@@ -258,5 +258,5 @@ inline svuint16x2_t svquantize_qasymm16_z(svbool_t pg, const svfloat32x4_t qv, c
}
} // namespace arm_compute
#include "src/core/NEON/SVEAsymm.inl"
-#endif /* defined(__ARM_FEATURE_SVE2) */
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
#endif // ARM_COMPUTE_NEASYMM_H
diff --git a/src/core/NEON/SVEAsymm.inl b/src/core/NEON/SVEAsymm.inl
index edf5733c36..e85cacd721 100644
--- a/src/core/NEON/SVEAsymm.inl
+++ b/src/core/NEON/SVEAsymm.inl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,7 +23,7 @@
*/
namespace arm_compute
{
-#if defined(__ARM_FEATURE_SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
inline svuint8_t svmla_qasymm8_z(svbool_t pg, svuint8_t vd, svfloat32_t vs, svfloat32_t vo)
{
// Convert uint8 vectors to uint16 vectors
@@ -101,5 +101,5 @@ inline svint8_t svmla_qasymm8_signed_z(svbool_t pg, svint8_t vd, svfloat32_t vs,
const auto res = svqxtnt_s16(svqxtnb_s16(vd_low_s16), vd_high_s16);
return res;
}
-#endif /* (__ARM_FEATURE_SVE2) */
+#endif /* (ARM_COMPUTE_ENABLE_SVE2) */
} // namespace arm_compute
diff --git a/src/core/NEON/SVEMath.h b/src/core/NEON/SVEMath.h
index dde75e8088..5ada7ae0ff 100644
--- a/src/core/NEON/SVEMath.h
+++ b/src/core/NEON/SVEMath.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_SVEMATH_H
#define ARM_COMPUTE_SVEMATH_H
-#if defined(ENABLE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
#include "src/core/NEON/wrapper/intrinsics/svcvt.h"
#include "src/core/NEON/wrapper/intrinsics/svdup_n.h"
#include "src/core/NEON/wrapper/intrinsics/svreinterpret.h"
@@ -185,5 +185,5 @@ int_vec_type convert_float_to_int(const svfloat32_t &in_0, const svfloat32_t &in
} // namespace arm_compute
#include "src/core/NEON/SVEMath.inl"
-#endif /* defined(ENABLE_SVE) */
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
#endif /* ARM_COMPUTE_SVEMATH_H */ \ No newline at end of file
diff --git a/src/core/NEON/SVEMath.inl b/src/core/NEON/SVEMath.inl
index 7625e5be34..5ebef5ad6a 100644
--- a/src/core/NEON/SVEMath.inl
+++ b/src/core/NEON/SVEMath.inl
@@ -24,7 +24,7 @@
#include <cmath>
#include <limits>
-#if defined(__ARM_FEATURE_SVE) && defined(ENABLE_SVE)
+#if defined(__ARM_FEATURE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE)
#ifndef M_PI
#define M_PI (3.14159265358979323846)
@@ -117,22 +117,22 @@ inline svfloat32_t svexp_f32_z(svbool_t pg, svfloat32_t x)
inline svfloat16_t svexp_f16_z(svbool_t pg, svfloat16_t x)
{
auto bottom = svcvt_f32_z(pg, x);
-#if defined(__ARM_FEATURE_SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
auto top = svcvtlt_f32_x(pg, x);
auto pg_top = pg;
-#else /* defined(__ARM_FEATURE_SVE2) */
+#else /* defined(ARM_COMPUTE_ENABLE_SVE2) */
auto pg_top = svptrue_b16();
auto top = svcvt_f32_z(pg_top, svreinterpret_f16(svrevh_z(svptrue_b16(), svreinterpret_u32(x))));
-#endif /* defined(__ARM_FEATURE_SVE2) */
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
bottom = svexp_f32_z(pg, bottom);
top = svexp_f32_z(pg_top, top);
-#if defined(__ARM_FEATURE_SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
return svcvtnt_f16_m(svcvt_f16_z(pg, bottom), pg_top, top);
-#else /* defined(__ARM_FEATURE_SVE2) */
+#else /* defined(ARM_COMPUTE_ENABLE_SVE2) */
return svtrn1(svcvt_f16_z(pg, bottom), svcvt_f16_z(pg_top, top));
-#endif /* defined(__ARM_FEATURE_SVE2) */
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
}
inline svfloat32_t svtanh_f32_z(svbool_t pg, svfloat32_t val)
@@ -196,22 +196,22 @@ inline svfloat32_t svlog_f32_z(svbool_t pg, svfloat32_t x)
inline svfloat16_t svlog_f16_z(svbool_t pg, svfloat16_t x)
{
auto bottom = svcvt_f32_z(pg, x);
-#if defined(__ARM_FEATURE_SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
auto top = svcvtlt_f32_x(pg, x);
auto pg_top = pg;
-#else /* defined(__ARM_FEATURE_SVE2) */
+#else /* defined(ARM_COMPUTE_ENABLE_SVE2) */
auto pg_top = svptrue_b16();
auto top = svcvt_f32_z(pg_top, svreinterpret_f16(svrevh_z(svptrue_b16(), svreinterpret_u32(x))));
-#endif /* defined(__ARM_FEATURE_SVE2) */
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
bottom = svlog_f32_z(pg, bottom);
top = svlog_f32_z(pg_top, top);
-#if defined(__ARM_FEATURE_SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
return svcvtnt_f16_m(svcvt_f16_z(pg, bottom), pg_top, top);
-#else /* defined(__ARM_FEATURE_SVE2) */
+#else /* defined(ARM_COMPUTE_ENABLE_SVE2) */
return svtrn1(svcvt_f16_z(pg, bottom), svcvt_f16_z(pg_top, top));
-#endif /* defined(__ARM_FEATURE_SVE2) */
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
}
inline svfloat32_t svsin_f32_z(svbool_t pg, svfloat32_t val)
@@ -269,22 +269,22 @@ inline svfloat32_t svsin_f32_z(svbool_t pg, svfloat32_t val)
inline svfloat16_t svsin_f16_z(svbool_t pg, svfloat16_t val)
{
auto bottom = svcvt_f32_z(pg, val);
-#if defined(__ARM_FEATURE_SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
auto top = svcvtlt_f32_x(pg, val);
auto pg_top = pg;
-#else /* defined(__ARM_FEATURE_SVE2) */
+#else /* defined(ARM_COMPUTE_ENABLE_SVE2) */
auto pg_top = svptrue_b16();
auto top = svcvt_f32_z(pg_top, svreinterpret_f16(svrevh_z(svptrue_b16(), svreinterpret_u32(val))));
-#endif /* defined(__ARM_FEATURE_SVE2) */
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
bottom = svsin_f32_z(pg, bottom);
top = svsin_f32_z(pg_top, top);
-#if defined(__ARM_FEATURE_SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
return svcvtnt_f16_m(svcvt_f16_z(pg, bottom), pg_top, top);
-#else /* defined(__ARM_FEATURE_SVE2) */
+#else /* defined(ARM_COMPUTE_ENABLE_SVE2) */
return svtrn1(svcvt_f16_z(pg, bottom), svcvt_f16_z(pg_top, top));
-#endif /* defined(__ARM_FEATURE_SVE2) */
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
}
inline svfloat32_t svpow_f32_z(svbool_t pg, svfloat32_t a, svfloat32_t b)
@@ -297,27 +297,27 @@ inline svfloat16_t svpow_f16_z(svbool_t pg, svfloat16_t a, svfloat16_t b)
auto a_bottom = svcvt_f32_z(pg, a);
auto b_bottom = svcvt_f32_z(pg, b);
-#if defined(__ARM_FEATURE_SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
auto pg_top = pg;
auto a_top = svcvtlt_f32_x(pg, a);
auto b_top = svcvtlt_f32_x(pg, b);
-#else /* defined(__ARM_FEATURE_SVE2) */
+#else /* defined(ARM_COMPUTE_ENABLE_SVE2) */
auto pg_top = svptrue_b16();
auto a_top = svcvt_f32_z(pg_top, svreinterpret_f16(svrevh_z(svptrue_b16(), svreinterpret_u32(a))));
auto b_top = svcvt_f32_z(pg_top, svreinterpret_f16(svrevh_z(svptrue_b16(), svreinterpret_u32(b))));
-#endif /* defined(__ARM_FEATURE_SVE2) */
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
auto res_bottom = svpow_f32_z(pg, a_bottom, b_bottom);
auto res_top = svpow_f32_z(pg_top, a_top, b_top);
-#if defined(__ARM_FEATURE_SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
return svcvtnt_f16_m(svcvt_f16_z(pg, res_bottom), pg_top, res_top);
-#else /* defined(__ARM_FEATURE_SVE2) */
+#else /* defined(ARM_COMPUTE_ENABLE_SVE2) */
return svtrn1(svcvt_f16_z(pg, res_bottom), svcvt_f16_z(pg_top, res_top));
-#endif /* defined(__ARM_FEATURE_SVE2) */
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
}
-#if defined(__ARM_FEATURE_SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
template <>
inline svuint8_t convert_float_to_int<svuint8_t>(const svfloat32_t &in_0, const svfloat32_t &in_1, const svfloat32_t &in_2, const svfloat32_t &in_3)
{
@@ -385,7 +385,7 @@ inline svint8_t convert_float_to_int<svint8_t>(const svfloat32_t &in_0, const sv
return out;
}
-#endif /* defined(__ARM_FEATURE_SVE2) */
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
} // namespace arm_compute
-#endif /* defined(ENABLE_SVE) */
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
diff --git a/src/core/NEON/SVESymm.h b/src/core/NEON/SVESymm.h
index 30e1e172a3..c71d273b67 100644
--- a/src/core/NEON/SVESymm.h
+++ b/src/core/NEON/SVESymm.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,7 +26,7 @@
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
-#if defined(__ARM_FEATURE_SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
#include "src/core/NEON/SVEMath.h"
#include <arm_sve.h>
@@ -123,5 +123,5 @@ inline svint16x2_t svquantize_qsymm16_z(svbool_t pg, const svfloat32x4_t qv, con
}
} // namespace arm_compute
-#endif /* defined(__ARM_FEATURE_SVE2) */
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
#endif // ARM_COMPUTE_NESYMM_H \ No newline at end of file
diff --git a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp
index 92000bb2f6..46551553c9 100644
--- a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp
@@ -48,7 +48,8 @@ namespace
{
struct BatchNormalizationSelectorData
{
- DataType dt;
+ DataType dt;
+ const CPUInfo &ci;
};
using BatchNormalizationSelectorPtr = std::add_pointer<bool(const BatchNormalizationSelectorData &data)>::type;
using BatchNormalizationKernelPtr = std::add_pointer<void(ITensor *, ITensor *, const ITensor *, const ITensor *, const ITensor *, const ITensor *,
@@ -63,19 +64,19 @@ struct BatchNormalizationKernel
static const BatchNormalizationKernel available_kernels[] =
{
-#if defined(ENABLE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
{
"fp16_sve_batch_normalization",
- [](const BatchNormalizationSelectorData & data) { return data.dt == DataType::F16; },
+ [](const BatchNormalizationSelectorData & data) { return data.dt == DataType::F16 && data.ci.has_sve(); },
REGISTER_FP16_SVE(arm_compute::cpu::fp16_sve_batch_normalization)
},
{
"f32_sve_batch_normalization",
- [](const BatchNormalizationSelectorData & data) { return data.dt == DataType::F32; },
+ [](const BatchNormalizationSelectorData & data) { return data.dt == DataType::F32 && data.ci.has_sve(); },
REGISTER_FP32_SVE(arm_compute::cpu::fp32_sve_batch_normalization)
},
-#endif /* !defined(ENABLE_SVE) */
-#if defined(ENABLE_NEON)
+#endif /* !defined(ARM_COMPUTE_ENABLE_SVE) */
+#if defined(ARM_COMPUTE_ENABLE_NEON)
#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
{
"fp16_neon_batch_normalization",
@@ -88,7 +89,7 @@ static const BatchNormalizationKernel available_kernels[] =
[](const BatchNormalizationSelectorData & data) { return data.dt == DataType::F32; },
REGISTER_FP32_NEON(arm_compute::cpu::fp32_neon_batch_normalization)
},
-#endif /* !defined(ENABLE_NEON) */
+#endif /* !defined(ARM_COMPUTE_ENABLE_NEON) */
};
const BatchNormalizationKernel *get_implementation(const BatchNormalizationSelectorData &data)
@@ -109,7 +110,7 @@ validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const IT
{
ARM_COMPUTE_UNUSED(epsilon);
- const auto *uk = get_implementation(BatchNormalizationSelectorData{ input->data_type() });
+ const auto *uk = get_implementation(BatchNormalizationSelectorData{ input->data_type(), CPUInfo::get() });
ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
if(act_info.enabled())
@@ -387,7 +388,7 @@ void NEBatchNormalizationLayerKernel::run(const Window &window, const ThreadInfo
}
else
{
- const auto *uk = get_implementation(BatchNormalizationSelectorData{ _input->info()->data_type() });
+ const auto *uk = get_implementation(BatchNormalizationSelectorData{ _input->info()->data_type(), CPUInfo::get() });
uk->ukernel(_input, _output, _mean, _var, _beta, _gamma, _epsilon, _act_info, window);
}
}
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp16.cpp b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp16.cpp
index fdb36fc1d1..6ba7c78e97 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp16.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp16.cpp
@@ -33,13 +33,13 @@
#include "depthwise_implementation_constraints.hpp"
#if defined(__aarch64__)
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
#include "kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp"
#include "kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp"
#include "kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp"
#include "kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp"
#include "kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp"
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
#include "kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp"
#include "kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp"
#include "kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp"
@@ -83,12 +83,13 @@ namespace
static const DepthwiseImplementation<__fp16, __fp16> depthwise_fp16_methods[] = {
#if defined(__aarch64__)
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
{
DepthwiseMethod::DEPTHFIRST,
"sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst",
constraint(is_supported<sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst>,
- has_no_channel_multiplier),
+ has_no_channel_multiplier,
+ cpu_has_sve),
cycle_estimate<sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst>,
[] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
return new DepthwiseDepthfirst<sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst>(args);
@@ -98,7 +99,8 @@ static const DepthwiseImplementation<__fp16, __fp16> depthwise_fp16_methods[] =
DepthwiseMethod::DEPTHFIRST,
"sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst",
constraint(is_supported<sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst>,
- has_no_channel_multiplier),
+ has_no_channel_multiplier,
+ cpu_has_sve),
cycle_estimate<sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst>,
[] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
return new DepthwiseDepthfirst<sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst>(args);
@@ -108,7 +110,8 @@ static const DepthwiseImplementation<__fp16, __fp16> depthwise_fp16_methods[] =
DepthwiseMethod::DEPTHFIRST,
"sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst",
constraint(is_supported<sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst>,
- has_no_channel_multiplier),
+ has_no_channel_multiplier,
+ cpu_has_sve),
cycle_estimate<sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst>,
[] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
return new DepthwiseDepthfirst<sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst>(args);
@@ -118,7 +121,8 @@ static const DepthwiseImplementation<__fp16, __fp16> depthwise_fp16_methods[] =
DepthwiseMethod::DEPTHFIRST,
"sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst",
constraint(is_supported<sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst>,
- has_no_channel_multiplier),
+ has_no_channel_multiplier,
+ cpu_has_sve),
cycle_estimate<sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst>,
[] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
return new DepthwiseDepthfirst<sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst>(args);
@@ -128,13 +132,14 @@ static const DepthwiseImplementation<__fp16, __fp16> depthwise_fp16_methods[] =
DepthwiseMethod::DEPTHFIRST,
"sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst",
constraint(is_supported<sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst>,
- has_no_channel_multiplier),
+ has_no_channel_multiplier,
+ cpu_has_sve),
cycle_estimate<sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst>,
[] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
return new DepthwiseDepthfirst<sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst>(args);
},
},
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
{
DepthwiseMethod::DEPTHFIRST,
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp
index aea750a475..ac43df979c 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp
@@ -33,7 +33,7 @@
#include "depthwise_implementation_constraints.hpp"
#if defined(__aarch64__)
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
#include "kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp"
#include "kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp"
#include "kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp"
@@ -43,7 +43,7 @@
#include "kernels/sve_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst.hpp"
#include "kernels/sve_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst.hpp"
#include "kernels/sve_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst.hpp"
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
#include "kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp"
#include "kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp"
#include "kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp"
@@ -85,12 +85,13 @@ namespace
static const DepthwiseImplementation<float, float> depthwise_fp32_methods[] = {
#if defined(__aarch64__)
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
{
DepthwiseMethod::DEPTHFIRST,
"sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst",
constraint(is_supported<sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst>,
- has_no_channel_multiplier),
+ has_no_channel_multiplier,
+ cpu_has_sve),
cycle_estimate<sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst>,
[] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<float, float, float> * {
return new DepthwiseDepthfirst<sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst>(args);
@@ -100,7 +101,8 @@ static const DepthwiseImplementation<float, float> depthwise_fp32_methods[] = {
DepthwiseMethod::DEPTHFIRST,
"sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst",
constraint(is_supported<sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst>,
- has_no_channel_multiplier),
+ has_no_channel_multiplier,
+ cpu_has_sve),
cycle_estimate<sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst>,
[] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<float, float, float> * {
return new DepthwiseDepthfirst<sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst>(args);
@@ -110,7 +112,8 @@ static const DepthwiseImplementation<float, float> depthwise_fp32_methods[] = {
DepthwiseMethod::DEPTHFIRST,
"sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst",
constraint(is_supported<sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst>,
- has_no_channel_multiplier),
+ has_no_channel_multiplier,
+ cpu_has_sve),
cycle_estimate<sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst>,
[] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<float, float, float> * {
return new DepthwiseDepthfirst<sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst>(args);
@@ -120,7 +123,8 @@ static const DepthwiseImplementation<float, float> depthwise_fp32_methods[] = {
DepthwiseMethod::DEPTHFIRST,
"sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst",
constraint(is_supported<sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst>,
- has_no_channel_multiplier),
+ has_no_channel_multiplier,
+ cpu_has_sve),
cycle_estimate<sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst>,
[] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<float, float, float> * {
return new DepthwiseDepthfirst<sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst>(args);
@@ -130,7 +134,8 @@ static const DepthwiseImplementation<float, float> depthwise_fp32_methods[] = {
DepthwiseMethod::DEPTHFIRST,
"sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst",
constraint(is_supported<sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst>,
- has_no_channel_multiplier),
+ has_no_channel_multiplier,
+ cpu_has_sve),
cycle_estimate<sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst>,
[] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<float, float, float> * {
return new DepthwiseDepthfirst<sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst>(args);
@@ -139,7 +144,7 @@ static const DepthwiseImplementation<float, float> depthwise_fp32_methods[] = {
{
DepthwiseMethod::DEPTHFIRST,
"sve_fp32_nhwc_generic_output3x3_mla_depthfirst",
- constraint(has_no_channel_multiplier),
+ constraint(has_no_channel_multiplier, cpu_has_sve),
not_preferred,
[] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<float, float, float> * {
return new DepthwiseDepthfirstGeneric<sve_fp32_nhwc_generic_output9_mla_depthfirst, 3, 3>(args);
@@ -148,7 +153,7 @@ static const DepthwiseImplementation<float, float> depthwise_fp32_methods[] = {
{
DepthwiseMethod::DEPTHFIRST,
"sve_fp32_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst",
- constraint(is_supported<sve_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst>),
+ constraint(is_supported<sve_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst>, cpu_has_sve),
not_preferred_if_no_multiplier,
[] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<float, float, float> * {
return new DepthwiseDepthfirstWithMultiplier<sve_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst>(args);
@@ -157,7 +162,7 @@ static const DepthwiseImplementation<float, float> depthwise_fp32_methods[] = {
{
DepthwiseMethod::DEPTHFIRST,
"sve_fp32_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst",
- constraint(is_supported<sve_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst>),
+ constraint(is_supported<sve_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst>, cpu_has_sve),
not_preferred_if_no_multiplier,
[] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<float, float, float> * {
return new DepthwiseDepthfirstWithMultiplier<sve_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst>(args);
@@ -166,13 +171,13 @@ static const DepthwiseImplementation<float, float> depthwise_fp32_methods[] = {
{
DepthwiseMethod::DEPTHFIRST,
"sve_fp32_nhwc_generic_with_multiplier_output2x8_mla_depthfirst",
- nullptr,
+ constraint(cpu_has_sve),
not_preferred_if_no_multiplier,
[] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<float, float, float> * {
return new DepthwiseDepthfirstGenericWithMultiplier<sve_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst>(args);
},
},
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
{
DepthwiseMethod::DEPTHFIRST,
"a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst",
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_implementation_constraints.hpp b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_implementation_constraints.hpp
index b4814bef92..6526d001b3 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_implementation_constraints.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_implementation_constraints.hpp
@@ -85,6 +85,18 @@ bool cpu_has_dot_product(const DepthwiseArgs &args, const void *)
return args.cpu_info->has_dotprod();
}
+bool cpu_has_sve(const DepthwiseArgs &args, const void *) __attribute__ ((unused));
+bool cpu_has_sve(const DepthwiseArgs &args, const void *)
+{
+ return args.cpu_info->has_sve();
+}
+
+bool cpu_has_sve2(const DepthwiseArgs &args, const void *) __attribute__ ((unused));
+bool cpu_has_sve2(const DepthwiseArgs &args, const void *)
+{
+ return args.cpu_info->has_sve2();
+}
+
bool has_no_channel_multiplier(const DepthwiseArgs &args, const void *) __attribute__ ((unused));
bool has_no_channel_multiplier(const DepthwiseArgs &args, const void *)
{
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp
index 40370fe59e..f38912d257 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp
@@ -33,7 +33,7 @@
#include "depthwise_implementation_constraints.hpp"
#if defined(__aarch64__)
-#if defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
#include "kernels/sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp"
#include "kernels/sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp"
#include "kernels/sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp"
@@ -41,7 +41,7 @@
#include "kernels/sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp"
#include "kernels/sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst.hpp"
#include "kernels/sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst.hpp"
-#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
#include "kernels/a64_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp"
#include "kernels/a64_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp"
#include "kernels/a64_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp"
@@ -73,14 +73,15 @@ bool qp_weights_are_symmetric(const DepthwiseArgs &, const void *_qp)
static const DepthwiseImplementation<int8_t, int8_t, int8_t, Requantize32> depthwise_s8q_methods[] = {
#if defined(__aarch64__)
-#if defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
{
DepthwiseMethod::DEPTHFIRST,
"sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst",
constraint<Requantize32>(is_supported<sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst>,
has_no_channel_multiplier,
qp_has_no_left_shift,
- qp_weights_are_symmetric),
+ qp_weights_are_symmetric,
+ cpu_has_sve2),
nullptr,
[] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
return new DepthwiseDepthfirstQuantized<sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst>(args, qp);
@@ -91,7 +92,8 @@ static const DepthwiseImplementation<int8_t, int8_t, int8_t, Requantize32> depth
"sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst",
constraint<Requantize32>(is_supported<sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst>,
has_no_channel_multiplier,
- qp_has_no_left_shift),
+ qp_has_no_left_shift,
+ cpu_has_sve2),
nullptr,
[] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
return new DepthwiseDepthfirstQuantized<sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst>(args, qp);
@@ -102,7 +104,8 @@ static const DepthwiseImplementation<int8_t, int8_t, int8_t, Requantize32> depth
"sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst",
constraint<Requantize32>(is_supported<sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst>,
has_no_channel_multiplier,
- qp_has_no_left_shift),
+ qp_has_no_left_shift,
+ cpu_has_sve2),
nullptr,
[] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
return new DepthwiseDepthfirstQuantized<sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst>(args, qp);
@@ -113,7 +116,8 @@ static const DepthwiseImplementation<int8_t, int8_t, int8_t, Requantize32> depth
"sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst",
constraint<Requantize32>(is_supported<sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst>,
has_no_channel_multiplier,
- qp_has_no_left_shift),
+ qp_has_no_left_shift,
+ cpu_has_sve2),
nullptr,
[] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
return new DepthwiseDepthfirstQuantized<sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst>(args, qp);
@@ -124,7 +128,8 @@ static const DepthwiseImplementation<int8_t, int8_t, int8_t, Requantize32> depth
"sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst",
constraint<Requantize32>(is_supported<sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst>,
has_no_channel_multiplier,
- qp_has_no_left_shift),
+ qp_has_no_left_shift,
+ cpu_has_sve2),
nullptr,
[] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
return new DepthwiseDepthfirstQuantized<sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst>(args, qp);
@@ -134,7 +139,8 @@ static const DepthwiseImplementation<int8_t, int8_t, int8_t, Requantize32> depth
DepthwiseMethod::DEPTHFIRST,
"sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst",
constraint<Requantize32>(is_supported<sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst>,
- qp_has_no_left_shift),
+ qp_has_no_left_shift,
+ cpu_has_sve2),
nullptr,
[] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
return new DepthwiseDepthfirstWithMultiplierQuantized<sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst>(args, qp);
@@ -144,13 +150,14 @@ static const DepthwiseImplementation<int8_t, int8_t, int8_t, Requantize32> depth
DepthwiseMethod::DEPTHFIRST,
"sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst",
constraint<Requantize32>(is_supported<sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst>,
- qp_has_no_left_shift),
+ qp_has_no_left_shift,
+ cpu_has_sve2),
nullptr,
[] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
return new DepthwiseDepthfirstWithMultiplierQuantized<sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst>(args, qp);
},
},
-#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
{
DepthwiseMethod::DEPTHFIRST,
"a64_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst",
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8q.cpp b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8q.cpp
index 3e190d242a..67713c5bcc 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8q.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8q.cpp
@@ -33,14 +33,14 @@
#include "depthwise_implementation_constraints.hpp"
#if defined(__aarch64__)
-#if defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
#include "kernels/sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp"
#include "kernels/sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp"
#include "kernels/sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp"
#include "kernels/sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp"
#include "kernels/sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst.hpp"
#include "kernels/sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst.hpp"
-#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
#include "kernels/a64_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp"
#include "kernels/a64_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp"
#include "kernels/a64_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp"
@@ -60,13 +60,14 @@ namespace depthwise {
static const DepthwiseImplementation<uint8_t, uint8_t, uint8_t, Requantize32> depthwise_u8q_methods[] = {
#if defined(__aarch64__)
-#if defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
{
DepthwiseMethod::DEPTHFIRST,
"sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst",
constraint<Requantize32>(is_supported<sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst>,
has_no_channel_multiplier,
- qp_has_no_left_shift),
+ qp_has_no_left_shift,
+ cpu_has_sve2),
nullptr,
[] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, uint8_t, uint8_t> * {
return new DepthwiseDepthfirstQuantized<sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst>(args, qp);
@@ -77,7 +78,8 @@ static const DepthwiseImplementation<uint8_t, uint8_t, uint8_t, Requantize32> de
"sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst",
constraint<Requantize32>(is_supported<sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst>,
has_no_channel_multiplier,
- qp_has_no_left_shift),
+ qp_has_no_left_shift,
+ cpu_has_sve2),
nullptr,
[] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, uint8_t, uint8_t> * {
return new DepthwiseDepthfirstQuantized<sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst>(args, qp);
@@ -88,7 +90,8 @@ static const DepthwiseImplementation<uint8_t, uint8_t, uint8_t, Requantize32> de
"sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst",
constraint<Requantize32>(is_supported<sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst>,
has_no_channel_multiplier,
- qp_has_no_left_shift),
+ qp_has_no_left_shift,
+ cpu_has_sve2),
nullptr,
[] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, uint8_t, uint8_t> * {
return new DepthwiseDepthfirstQuantized<sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst>(args, qp);
@@ -99,7 +102,8 @@ static const DepthwiseImplementation<uint8_t, uint8_t, uint8_t, Requantize32> de
"sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst",
constraint<Requantize32>(is_supported<sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst>,
has_no_channel_multiplier,
- qp_has_no_left_shift),
+ qp_has_no_left_shift,
+ cpu_has_sve2),
nullptr,
[] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, uint8_t, uint8_t> * {
return new DepthwiseDepthfirstQuantized<sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst>(args, qp);
@@ -109,7 +113,8 @@ static const DepthwiseImplementation<uint8_t, uint8_t, uint8_t, Requantize32> de
DepthwiseMethod::DEPTHFIRST,
"sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst",
constraint<Requantize32>(is_supported<sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst>,
- qp_has_no_left_shift),
+ qp_has_no_left_shift,
+ cpu_has_sve2),
nullptr,
[] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, uint8_t, uint8_t> * {
return new DepthwiseDepthfirstWithMultiplierQuantized<sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst>(args, qp);
@@ -119,13 +124,14 @@ static const DepthwiseImplementation<uint8_t, uint8_t, uint8_t, Requantize32> de
DepthwiseMethod::DEPTHFIRST,
"sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst",
constraint<Requantize32>(is_supported<sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst>,
- qp_has_no_left_shift),
+ qp_has_no_left_shift,
+ cpu_has_sve2),
nullptr,
[] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, uint8_t, uint8_t> * {
return new DepthwiseDepthfirstWithMultiplierQuantized<sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst>(args, qp);
},
},
-#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
{
DepthwiseMethod::DEPTHFIRST,
"a64_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst",
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8s8u8q.cpp b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8s8u8q.cpp
index 537a7c5e01..af4426b69f 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8s8u8q.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8s8u8q.cpp
@@ -33,11 +33,11 @@
#include "depthwise_implementation_constraints.hpp"
#if defined(__aarch64__)
-#if defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
#include "kernels/sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp"
#include "kernels/sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp"
#include "kernels/sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp"
-#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
#include "kernels/a64_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp"
#include "kernels/a64_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp"
#include "kernels/a64_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp"
@@ -54,13 +54,14 @@ namespace depthwise {
static const DepthwiseImplementation<uint8_t, int8_t, uint8_t, Requantize32> depthwise_u8q_methods[] = {
#if defined(__aarch64__)
-#if defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
{
DepthwiseMethod::DEPTHFIRST,
"sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst",
constraint<Requantize32>(is_supported<sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst>,
has_no_channel_multiplier,
- qp_has_no_left_shift),
+ qp_has_no_left_shift,
+ cpu_has_sve2),
nullptr,
[] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, int8_t, uint8_t> * {
return new DepthwiseDepthfirstQuantized<sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst>(args, qp);
@@ -71,7 +72,8 @@ static const DepthwiseImplementation<uint8_t, int8_t, uint8_t, Requantize32> dep
"sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst",
constraint<Requantize32>(is_supported<sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst>,
has_no_channel_multiplier,
- qp_has_no_left_shift),
+ qp_has_no_left_shift,
+ cpu_has_sve2),
nullptr,
[] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, int8_t, uint8_t> * {
return new DepthwiseDepthfirstQuantized<sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst>(args, qp);
@@ -82,13 +84,14 @@ static const DepthwiseImplementation<uint8_t, int8_t, uint8_t, Requantize32> dep
"sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst",
constraint<Requantize32>(is_supported<sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst>,
has_no_channel_multiplier,
- qp_has_no_left_shift),
+ qp_has_no_left_shift,
+ cpu_has_sve2),
nullptr,
[] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, int8_t, uint8_t> * {
return new DepthwiseDepthfirstQuantized<sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst>(args, qp);
},
},
-#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
{
DepthwiseMethod::DEPTHFIRST,
"a64_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst",
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/interleaves/8b_mla.cpp b/src/core/NEON/kernels/arm_conv/depthwise/interleaves/8b_mla.cpp
index 6c5ef23684..04b904275c 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/interleaves/8b_mla.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/interleaves/8b_mla.cpp
@@ -110,14 +110,14 @@ void interleave_ ## ARCH ## _ ## TYPENAME ## _ ## KERN_ROWS ## x ## KERN_COLS #
namespace arm_conv {
namespace depthwise {
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
ADD_IMPLEMENTATION(sve, s8q, int8_t, SVE, 2, 3, 3)
ADD_IMPLEMENTATION(sve, s8q, int8_t, SVE, 2, 5, 5)
ADD_IMPLEMENTATION(sve, u8q, uint8_t, SVE, 2, 3, 3)
ADD_IMPLEMENTATION(sve, u8q, uint8_t, SVE, 2, 5, 5)
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
ADD_IMPLEMENTATION(a64, s8q, int8_t, None, 2, 3, 3)
ADD_IMPLEMENTATION(a64, s8q, int8_t, None, 2, 5, 5)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/interleaves/list.hpp b/src/core/NEON/kernels/arm_conv/depthwise/interleaves/list.hpp
index 41f0495acf..cb49a243af 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/interleaves/list.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/interleaves/list.hpp
@@ -27,7 +27,7 @@
namespace arm_conv {
namespace depthwise {
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
class interleave_sve_u8q_3x3_dot
{
@@ -71,7 +71,7 @@ class interleave_sve_s8q_5x5_mla
static size_t get_packed_size(const DepthwiseArgs &);
};
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
class interleave_a64_u8q_3x3_dot
{
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_s8q_3x3_dot.cpp b/src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_s8q_3x3_dot.cpp
index ea0c35b7ce..dfb6457ed9 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_s8q_3x3_dot.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_s8q_3x3_dot.cpp
@@ -22,7 +22,7 @@
* SOFTWARE.
*/
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
#include "arm_gemm.hpp"
#include "src/core/NEON/kernels/arm_gemm/utils.hpp"
@@ -133,4 +133,4 @@ void interleave_sve_s8q_3x3_dot::pack_parameters(unsigned int n_channels, void *
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_u8q_3x3_dot.cpp b/src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_u8q_3x3_dot.cpp
index edd32a43f5..6c16bdc2fb 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_u8q_3x3_dot.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_u8q_3x3_dot.cpp
@@ -22,7 +22,7 @@
* SOFTWARE.
*/
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
#include "arm_gemm.hpp"
#include "src/core/NEON/kernels/arm_gemm/utils.hpp"
@@ -133,4 +133,4 @@ void interleave_sve_u8q_3x3_dot::pack_parameters(unsigned int n_channels, void *
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp
index c444472c68..b8e59306d5 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp
@@ -28,7 +28,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
namespace arm_conv {
namespace depthwise {
@@ -69,4 +69,4 @@ struct sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp
index b788c705e5..a4c1a40100 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp
@@ -25,7 +25,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
namespace arm_conv {
namespace depthwise {
@@ -321,4 +321,4 @@ void sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst_direct_impl(
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp
index d8f905b33a..a845e7c0c6 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp
@@ -25,7 +25,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
namespace arm_conv {
namespace depthwise {
@@ -281,4 +281,4 @@ void sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst_indirect_impl(
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp
index f5d31e63f8..e1f23aae66 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp
@@ -28,7 +28,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
namespace arm_conv {
namespace depthwise {
@@ -69,4 +69,4 @@ struct sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp
index aebf0bf7ac..0708f578a8 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp
@@ -25,7 +25,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
namespace arm_conv {
namespace depthwise {
@@ -475,4 +475,4 @@ void sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst_direct_impl(
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp
index 65ecb6d218..770576c5da 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp
@@ -25,7 +25,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
namespace arm_conv {
namespace depthwise {
@@ -492,4 +492,4 @@ void sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst_indirect_impl(
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp
index f976842b7a..7d035f0571 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp
@@ -28,7 +28,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
namespace arm_conv {
namespace depthwise {
@@ -69,4 +69,4 @@ struct sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp
index 8f0fce7e96..93e1908df7 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp
@@ -25,7 +25,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
namespace arm_conv {
namespace depthwise {
@@ -685,4 +685,4 @@ void sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst_direct_impl(
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp
index 8148353f1a..8eaf0a46d6 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp
@@ -25,7 +25,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
namespace arm_conv {
namespace depthwise {
@@ -743,4 +743,4 @@ void sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst_indirect_impl(
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp
index 98f50f8436..a6a4afb3b5 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp
@@ -28,7 +28,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
namespace arm_conv {
namespace depthwise {
@@ -69,4 +69,4 @@ struct sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp
index e620604a16..2238bf08cd 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp
@@ -25,7 +25,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
namespace arm_conv {
namespace depthwise {
@@ -342,4 +342,4 @@ void sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst_direct_impl(
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp
index 3ed743e3ed..0d5d4176aa 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp
@@ -25,7 +25,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
namespace arm_conv {
namespace depthwise {
@@ -342,4 +342,4 @@ void sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst_indirect_impl(
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp
index 20f3ee0329..236f9bf43a 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp
@@ -28,7 +28,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
namespace arm_conv {
namespace depthwise {
@@ -69,4 +69,4 @@ struct sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp
index f1ee5c53ce..6b1564e6c9 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp
@@ -25,7 +25,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
namespace arm_conv {
namespace depthwise {
@@ -528,4 +528,4 @@ void sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst_direct_impl(
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp
index caa15a9816..be128b4aff 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp
@@ -25,7 +25,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
namespace arm_conv {
namespace depthwise {
@@ -556,4 +556,4 @@ void sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst_indirect_impl(
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp
index 74716ddf1f..05e82d4e76 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp
@@ -28,7 +28,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace depthwise {
@@ -69,4 +69,4 @@ struct sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp
index d443855758..eddcffc196 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp
@@ -25,7 +25,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace depthwise {
@@ -252,4 +252,4 @@ void sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst_impl(
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp
index d899255e84..eb632eb4fe 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp
@@ -25,7 +25,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace depthwise {
@@ -361,4 +361,4 @@ void sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst_direct_impl(
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp
index e8a1539437..fb41ca0754 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp
@@ -25,7 +25,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace depthwise {
@@ -315,4 +315,4 @@ void sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst_indirect_impl(
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst_strided.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst_strided.hpp
index 173fc631d8..65cb735bde 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst_strided.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst_strided.hpp
@@ -26,7 +26,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace depthwise {
@@ -63,4 +63,4 @@ struct sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst_strided
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst_strided/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst_strided/generic.cpp
index cecc192c49..97c4d88119 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst_strided/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst_strided/generic.cpp
@@ -25,7 +25,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace depthwise {
@@ -244,4 +244,4 @@ void sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst_strided_impl(
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp
index 5ec78aa05f..ef5f4187f9 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp
@@ -28,7 +28,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace depthwise {
@@ -69,4 +69,4 @@ struct sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp
index 4d0bd311cc..6bc333be41 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp
@@ -25,7 +25,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace depthwise {
@@ -535,4 +535,4 @@ void sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst_direct_impl(
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp
index 7c6fb306b7..3877ae2f03 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp
@@ -25,7 +25,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace depthwise {
@@ -544,4 +544,4 @@ void sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst_indirect_impl(
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp
index a9823e3917..fc9588cd58 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp
@@ -28,7 +28,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace depthwise {
@@ -69,4 +69,4 @@ struct sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp
index 4c24ad9c15..7df8e481c0 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp
@@ -25,7 +25,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace depthwise {
@@ -685,4 +685,4 @@ void sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst_direct_impl(
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp
index ac0c4ec4e3..22e12a7b9a 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp
@@ -25,7 +25,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace depthwise {
@@ -817,4 +817,4 @@ void sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst_indirect_impl(
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp
index f5b6a4f8ff..2119c06965 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp
@@ -28,7 +28,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace depthwise {
@@ -69,4 +69,4 @@ struct sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp
index ad53872630..78e67e1be1 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp
@@ -25,7 +25,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace depthwise {
@@ -402,4 +402,4 @@ void sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst_direct_impl(
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp
index 06b3575d4b..8555cfea7c 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp
@@ -25,7 +25,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace depthwise {
@@ -394,4 +394,4 @@ void sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst_indirect_impl(
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp
index d49f7fdceb..6f1f187818 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp
@@ -28,7 +28,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace depthwise {
@@ -69,4 +69,4 @@ struct sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp
index f751186dce..edafe82770 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp
@@ -25,7 +25,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace depthwise {
@@ -528,4 +528,4 @@ void sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst_direct_impl(
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp
index 6e35ee86c5..1bfe7eb09c 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp
@@ -25,7 +25,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace depthwise {
@@ -630,4 +630,4 @@ void sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst_indirect_impl(
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_generic_output9_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_generic_output9_mla_depthfirst.hpp
index dd2c519e3a..bd071d370c 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_generic_output9_mla_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_generic_output9_mla_depthfirst.hpp
@@ -28,7 +28,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace depthwise {
@@ -56,4 +56,4 @@ struct sve_fp32_nhwc_generic_output9_mla_depthfirst
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp
index 370218e1d4..eac77516c2 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp
@@ -25,7 +25,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace depthwise {
@@ -163,4 +163,4 @@ void sve_fp32_nhwc_generic_output9_mla_depthfirst_impl(
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst.hpp
index 5cf3314c65..563f0fc59f 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst.hpp
@@ -28,7 +28,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace depthwise {
@@ -67,4 +67,4 @@ struct sve_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp
index ce640a207d..395b112460 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp
@@ -25,7 +25,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace depthwise {
@@ -256,4 +256,4 @@ void sve_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst_imp
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst.hpp
index 3c2f77156d..e9378c2a12 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst.hpp
@@ -28,7 +28,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace depthwise {
@@ -67,4 +67,4 @@ struct sve_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp
index 453b00c0db..e7193d625f 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp
@@ -25,7 +25,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace depthwise {
@@ -389,4 +389,4 @@ void sve_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst_imp
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst.hpp
index 7a4bd1dd1e..6849e562bc 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst.hpp
@@ -28,7 +28,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace depthwise {
@@ -59,4 +59,4 @@ struct sve_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp
index 0124370067..b23cec8593 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp
@@ -25,7 +25,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace depthwise {
@@ -451,4 +451,4 @@ void sve_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_im
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp
index 295e1f6450..39974fde88 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp
@@ -29,7 +29,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace depthwise {
@@ -72,4 +72,4 @@ struct sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp
index 90f924a8ed..8e9e5f4aeb 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp
@@ -22,7 +22,7 @@
* SOFTWARE.
*/
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
#include "arm_gemm.hpp"
#include <cstdint>
@@ -454,4 +454,4 @@ void sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst_impl(const int8_t *const *cons
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp
index 7dd241a8cf..f788829572 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp
@@ -29,7 +29,7 @@
#pragma once
-#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
namespace arm_conv {
namespace depthwise {
@@ -72,4 +72,4 @@ struct sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp
index 8bf5badfaf..87387960f1 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp
@@ -27,7 +27,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
namespace arm_conv {
namespace depthwise {
@@ -415,4 +415,4 @@ void sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst_impl(
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp
index 89507ef9ea..5c2b4f6f53 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp
@@ -29,7 +29,7 @@
#pragma once
-#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
namespace arm_conv {
namespace depthwise {
@@ -72,4 +72,4 @@ struct sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp
index b773ca1fe6..b4a1026aaa 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp
@@ -27,7 +27,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
namespace arm_conv {
namespace depthwise {
@@ -456,4 +456,4 @@ void sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst_impl(
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp
index 54ac1c2e0b..948c5ad2e7 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp
@@ -29,7 +29,7 @@
#pragma once
-#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
namespace arm_conv {
namespace depthwise {
@@ -72,4 +72,4 @@ struct sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp
index c02bb584e5..565c145f92 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp
@@ -27,7 +27,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
namespace arm_conv {
namespace depthwise {
@@ -657,4 +657,4 @@ void sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst_impl(
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst.hpp
index 7ab83e8659..176c4f878e 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst.hpp
@@ -28,7 +28,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace depthwise {
@@ -67,4 +67,4 @@ struct sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp
index f531912e72..ea7acf5b6e 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp
@@ -27,7 +27,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace depthwise {
@@ -350,4 +350,4 @@ void sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst_impl
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst.hpp
index 2c33bdcd3a..10eee34d62 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst.hpp
@@ -28,7 +28,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace depthwise {
@@ -67,4 +67,4 @@ struct sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp
index ffa2c6a7bc..6bc5935348 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp
@@ -27,7 +27,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace depthwise {
@@ -425,4 +425,4 @@ void sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst_impl
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp
index 4098f6f660..b5c6e983ae 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp
@@ -29,7 +29,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace depthwise {
@@ -72,4 +72,4 @@ struct sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp
index 3345449fe1..095c1de8f2 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp
@@ -22,7 +22,7 @@
* SOFTWARE.
*/
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
#include "arm_gemm.hpp"
#include <cstdint>
@@ -385,4 +385,4 @@ void sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst_impl(const int8_t *const *con
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp
index 72b26a50a0..a087e801dc 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp
@@ -29,7 +29,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace depthwise {
@@ -72,4 +72,4 @@ struct sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp
index ca6af57171..0d4b9e6687 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp
@@ -22,7 +22,7 @@
* SOFTWARE.
*/
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
#include "arm_gemm.hpp"
#include <cstdint>
@@ -454,4 +454,4 @@ void sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst_impl(const uint8_t *const *con
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp
index 6174dd0e9f..c501c67a5b 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp
@@ -29,7 +29,7 @@
#pragma once
-#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
namespace arm_conv {
namespace depthwise {
@@ -72,4 +72,4 @@ struct sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp
index 2ec7f6e7ea..40220ad84e 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp
@@ -27,7 +27,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
namespace arm_conv {
namespace depthwise {
@@ -415,4 +415,4 @@ void sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst_impl(
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp
index 1f470f78aa..981864270d 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp
@@ -29,7 +29,7 @@
#pragma once
-#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
namespace arm_conv {
namespace depthwise {
@@ -72,4 +72,4 @@ struct sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp
index bc8f0ac1d9..39ab3534f5 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp
@@ -27,7 +27,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
namespace arm_conv {
namespace depthwise {
@@ -456,4 +456,4 @@ void sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst_impl(
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp
index f025b08a29..b1b16c55d3 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp
@@ -29,7 +29,7 @@
#pragma once
-#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
namespace arm_conv {
namespace depthwise {
@@ -72,4 +72,4 @@ struct sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp
index 95423186b8..7f4272672c 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp
@@ -27,7 +27,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
namespace arm_conv {
namespace depthwise {
@@ -657,4 +657,4 @@ void sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst_impl(
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst.hpp
index 9226a96662..dbf70c3f8e 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst.hpp
@@ -28,7 +28,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace depthwise {
@@ -67,4 +67,4 @@ struct sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp
index bb9931c20f..1c8b8f9d19 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp
@@ -27,7 +27,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace depthwise {
@@ -350,4 +350,4 @@ void sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst_impl
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst.hpp
index 3023ed16e5..90fefdcda3 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst.hpp
@@ -28,7 +28,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace depthwise {
@@ -67,4 +67,4 @@ struct sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp
index fc1e23e897..0085bbc6bc 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp
@@ -27,7 +27,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace depthwise {
@@ -425,4 +425,4 @@ void sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst_impl
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp
index 361f48bfbe..8ab2e5ba2a 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp
@@ -29,7 +29,7 @@
#pragma once
-#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
namespace arm_conv {
namespace depthwise {
@@ -72,4 +72,4 @@ struct sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp
index 4fc8999ea1..4b9be8f3e3 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp
@@ -27,7 +27,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
namespace arm_conv {
namespace depthwise {
@@ -415,4 +415,4 @@ void sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst_impl(
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp
index dc33a3fe3f..f652e48e42 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp
@@ -29,7 +29,7 @@
#pragma once
-#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
namespace arm_conv {
namespace depthwise {
@@ -72,4 +72,4 @@ struct sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp
index 63960f08e1..400e62d248 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp
@@ -27,7 +27,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
namespace arm_conv {
namespace depthwise {
@@ -456,4 +456,4 @@ void sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst_impl(
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp
index 906ef36c8f..f07ea13a03 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp
@@ -29,7 +29,7 @@
#pragma once
-#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
namespace arm_conv {
namespace depthwise {
@@ -72,4 +72,4 @@ struct sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp
index 6c321efa29..29582da0f6 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp
@@ -27,7 +27,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
namespace arm_conv {
namespace depthwise {
@@ -657,4 +657,4 @@ void sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst_impl(
} // namespace depthwise
} // namespace arm_conv
-#endif // defined(__aarch64__) && defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst.hpp
index 8c7a497376..0167d78eb7 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst.hpp
@@ -24,7 +24,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
namespace arm_conv {
namespace pooling {
@@ -57,4 +57,4 @@ struct sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst
} // namespace pooling
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp
index 3c1858633b..a1a530b94e 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp
@@ -26,7 +26,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
namespace arm_conv {
namespace pooling {
@@ -204,4 +204,4 @@ void sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst_impl(
} // namespace pooling
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_generic_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_generic_depthfirst.hpp
index 391d47cf41..02f2ce87a9 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_generic_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_generic_depthfirst.hpp
@@ -26,7 +26,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
namespace arm_conv {
namespace pooling {
@@ -51,4 +51,4 @@ struct sve_fp16_nhwc_avg_generic_depthfirst
} // namespace pooling
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_generic_depthfirst/generic.cpp
index 84a6acf80d..310df11e68 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_generic_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_generic_depthfirst/generic.cpp
@@ -24,7 +24,7 @@
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
namespace arm_conv {
namespace pooling {
@@ -228,4 +228,4 @@ void sve_fp16_nhwc_avg_generic_depthfirst_impl(
} // namespace pooling
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst.hpp
index 5fb297eb49..5e4327d6b7 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst.hpp
@@ -24,7 +24,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
namespace arm_conv {
namespace pooling {
@@ -57,4 +57,4 @@ struct sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst
} // namespace pooling
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp
index f6e23215b8..9abd0f5c1c 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp
@@ -26,7 +26,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
namespace arm_conv {
namespace pooling {
@@ -143,4 +143,4 @@ void sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst_impl(
} // namespace pooling
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_generic_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_generic_depthfirst.hpp
index 1c17c27619..44cdea31da 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_generic_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_generic_depthfirst.hpp
@@ -26,7 +26,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
namespace arm_conv {
namespace pooling {
@@ -51,4 +51,4 @@ struct sve_fp16_nhwc_max_generic_depthfirst
} // namespace pooling
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_generic_depthfirst/generic.cpp
index 58ab915605..fae1f014e7 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_generic_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_generic_depthfirst/generic.cpp
@@ -24,7 +24,7 @@
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
namespace arm_conv {
namespace pooling {
@@ -220,4 +220,4 @@ void sve_fp16_nhwc_max_generic_depthfirst_impl(
} // namespace pooling
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE) && defined(__ARM_FP16_ARGS)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst.hpp
index 9cbdb8a58d..55d2a47655 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst.hpp
@@ -24,7 +24,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace pooling {
@@ -57,4 +57,4 @@ struct sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst
} // namespace pooling
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp
index 50f5da4c3d..6cad63ee88 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp
@@ -26,7 +26,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace pooling {
@@ -204,4 +204,4 @@ void sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst_impl(
} // namespace pooling
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_generic_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_generic_depthfirst.hpp
index 0daa046a02..0fcdcb23dc 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_generic_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_generic_depthfirst.hpp
@@ -26,7 +26,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace pooling {
@@ -51,4 +51,4 @@ struct sve_fp32_nhwc_avg_generic_depthfirst
} // namespace pooling
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_generic_depthfirst/generic.cpp
index c2f5745adc..3e02570a4f 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_generic_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_generic_depthfirst/generic.cpp
@@ -24,7 +24,7 @@
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace pooling {
@@ -228,4 +228,4 @@ void sve_fp32_nhwc_avg_generic_depthfirst_impl(
} // namespace pooling
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst.hpp
index 086f49e957..b2c6912565 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst.hpp
@@ -24,7 +24,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace pooling {
@@ -57,4 +57,4 @@ struct sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst
} // namespace pooling
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp
index 250cc24226..786e477050 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp
@@ -26,7 +26,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace pooling {
@@ -143,4 +143,4 @@ void sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst_impl(
} // namespace pooling
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_generic_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_generic_depthfirst.hpp
index 17e3e5f0ba..5f65b7f340 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_generic_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_generic_depthfirst.hpp
@@ -26,7 +26,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace pooling {
@@ -51,4 +51,4 @@ struct sve_fp32_nhwc_max_generic_depthfirst
} // namespace pooling
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_generic_depthfirst/generic.cpp
index 8166379ce4..a2f4398465 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_generic_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_generic_depthfirst/generic.cpp
@@ -24,7 +24,7 @@
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace pooling {
@@ -220,4 +220,4 @@ void sve_fp32_nhwc_max_generic_depthfirst_impl(
} // namespace pooling
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_avg_generic_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_avg_generic_depthfirst.hpp
index 2ae38b5b2f..06582fe5ce 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_avg_generic_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_avg_generic_depthfirst.hpp
@@ -26,7 +26,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
namespace arm_conv {
namespace pooling {
@@ -51,4 +51,4 @@ struct sve_s8_nhwc_avg_generic_depthfirst
} // namespace pooling
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_avg_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_avg_generic_depthfirst/generic.cpp
index 2ea5b90561..3581095e8b 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_avg_generic_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_avg_generic_depthfirst/generic.cpp
@@ -27,7 +27,7 @@
#include <cmath>
-#if defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
namespace arm_conv {
namespace pooling {
@@ -413,4 +413,4 @@ void sve_s8_nhwc_avg_generic_depthfirst_impl(
} // namespace pooling
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst.hpp
index 071e79c93d..46132f2864 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst.hpp
@@ -24,7 +24,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace pooling {
@@ -57,4 +57,4 @@ struct sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst
} // namespace pooling
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp
index bdf3f53292..beabe7b099 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp
@@ -26,7 +26,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace pooling {
@@ -143,4 +143,4 @@ void sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst_impl(
} // namespace pooling
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_generic_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_generic_depthfirst.hpp
index 428902ad61..168cbf53c1 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_generic_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_generic_depthfirst.hpp
@@ -26,7 +26,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace pooling {
@@ -51,4 +51,4 @@ struct sve_s8_nhwc_max_generic_depthfirst
} // namespace pooling
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_generic_depthfirst/generic.cpp
index 3e88c8729c..11195f59ed 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_generic_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_generic_depthfirst/generic.cpp
@@ -24,7 +24,7 @@
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace pooling {
@@ -220,4 +220,4 @@ void sve_s8_nhwc_max_generic_depthfirst_impl(
} // namespace pooling
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_avg_generic_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_avg_generic_depthfirst.hpp
index 1242eaf530..637940e957 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_avg_generic_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_avg_generic_depthfirst.hpp
@@ -26,7 +26,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
namespace arm_conv {
namespace pooling {
@@ -51,4 +51,4 @@ struct sve_s8q_nhwc_avg_generic_depthfirst
} // namespace pooling
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_avg_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_avg_generic_depthfirst/generic.cpp
index 928eb412b5..75be96e283 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_avg_generic_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_avg_generic_depthfirst/generic.cpp
@@ -28,7 +28,7 @@
#include <cmath>
-#if defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
namespace arm_conv {
namespace pooling {
@@ -454,4 +454,4 @@ void sve_s8q_nhwc_avg_generic_depthfirst_impl(
} // namespace pooling
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_max_generic_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_max_generic_depthfirst.hpp
index 84aa0d3d6b..5aced30e52 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_max_generic_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_max_generic_depthfirst.hpp
@@ -26,7 +26,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
namespace arm_conv {
namespace pooling {
@@ -51,4 +51,4 @@ struct sve_s8q_nhwc_max_generic_depthfirst
} // namespace pooling
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_max_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_max_generic_depthfirst/generic.cpp
index 3717f8cb30..7f00d46d9d 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_max_generic_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_max_generic_depthfirst/generic.cpp
@@ -25,7 +25,7 @@
#include "pooling.hpp"
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
namespace arm_conv {
namespace pooling {
@@ -383,4 +383,4 @@ void sve_s8q_nhwc_max_generic_depthfirst_impl(
} // namespace pooling
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_avg_generic_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_avg_generic_depthfirst.hpp
index 299e55c9be..a2bfec746b 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_avg_generic_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_avg_generic_depthfirst.hpp
@@ -26,7 +26,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
namespace arm_conv {
namespace pooling {
@@ -51,4 +51,4 @@ struct sve_u8_nhwc_avg_generic_depthfirst
} // namespace pooling
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_avg_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_avg_generic_depthfirst/generic.cpp
index 51a69a42be..4c72461dd7 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_avg_generic_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_avg_generic_depthfirst/generic.cpp
@@ -27,7 +27,7 @@
#include <cmath>
-#if defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
namespace arm_conv {
namespace pooling {
@@ -413,4 +413,4 @@ void sve_u8_nhwc_avg_generic_depthfirst_impl(
} // namespace pooling
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst.hpp
index 06df1515ad..11f485ceea 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst.hpp
@@ -24,7 +24,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace pooling {
@@ -57,4 +57,4 @@ struct sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst
} // namespace pooling
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp
index e921f345d5..92779d0d99 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp
@@ -26,7 +26,7 @@
#include <cstddef>
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace pooling {
@@ -143,4 +143,4 @@ void sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst_impl(
} // namespace pooling
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_generic_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_generic_depthfirst.hpp
index 59cd4b9c78..92be064053 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_generic_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_generic_depthfirst.hpp
@@ -26,7 +26,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace pooling {
@@ -51,4 +51,4 @@ struct sve_u8_nhwc_max_generic_depthfirst
} // namespace pooling
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_generic_depthfirst/generic.cpp
index 164847480b..de81d1c54c 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_generic_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_generic_depthfirst/generic.cpp
@@ -24,7 +24,7 @@
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_conv {
namespace pooling {
@@ -220,4 +220,4 @@ void sve_u8_nhwc_max_generic_depthfirst_impl(
} // namespace pooling
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_avg_generic_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_avg_generic_depthfirst.hpp
index f6fc1a58c1..91a9925e14 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_avg_generic_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_avg_generic_depthfirst.hpp
@@ -26,7 +26,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
namespace arm_conv {
namespace pooling {
@@ -51,4 +51,4 @@ struct sve_u8q_nhwc_avg_generic_depthfirst
} // namespace pooling
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_avg_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_avg_generic_depthfirst/generic.cpp
index 373848ad2b..abf911c9d3 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_avg_generic_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_avg_generic_depthfirst/generic.cpp
@@ -28,7 +28,7 @@
#include <cmath>
-#if defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
namespace arm_conv {
namespace pooling {
@@ -483,4 +483,4 @@ void sve_u8q_nhwc_avg_generic_depthfirst_impl(
} // namespace pooling
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_max_generic_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_max_generic_depthfirst.hpp
index c3c0edd0d5..0d04ae5978 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_max_generic_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_max_generic_depthfirst.hpp
@@ -26,7 +26,7 @@
#pragma once
-#if defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
namespace arm_conv {
namespace pooling {
@@ -51,4 +51,4 @@ struct sve_u8q_nhwc_max_generic_depthfirst
} // namespace pooling
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_max_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_max_generic_depthfirst/generic.cpp
index c1c1d29613..b632af9118 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_max_generic_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_max_generic_depthfirst/generic.cpp
@@ -25,7 +25,7 @@
#include "pooling.hpp"
#include <cstdint>
-#if defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
namespace arm_conv {
namespace pooling {
@@ -413,4 +413,4 @@ void sve_u8q_nhwc_max_generic_depthfirst_impl(
} // namespace pooling
} // namespace arm_conv
-#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
diff --git a/src/core/NEON/kernels/arm_conv/pooling/pooling_fp16.cpp b/src/core/NEON/kernels/arm_conv/pooling/pooling_fp16.cpp
index 094c6aa301..42f23a158e 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/pooling_fp16.cpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/pooling_fp16.cpp
@@ -33,12 +33,12 @@
#include "kernels/cpp_nhwc_1x1_stride_any_depthfirst.hpp"
#if defined(__aarch64__)
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
#include "kernels/sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst.hpp"
#include "kernels/sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst.hpp"
#include "kernels/sve_fp16_nhwc_avg_generic_depthfirst.hpp"
#include "kernels/sve_fp16_nhwc_max_generic_depthfirst.hpp"
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
#include "kernels/a64_fp16_nhwc_max_2x2_s1_output2x2_depthfirst.hpp"
#include "kernels/a64_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst.hpp"
#include "kernels/a64_fp16_nhwc_avg_generic_depthfirst.hpp"
@@ -74,11 +74,13 @@ static const PoolingImplementation<__fp16, __fp16> pooling_fp16_methods[] = {
},
},
#if defined(__aarch64__)
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
{
PoolingMethod::DEPTHFIRST,
"sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst",
- is_supported<sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst>,
+ [] (const PoolingArgs &args, const Nothing &unused) -> bool {
+ return args.cpu_info->has_sve() && is_supported<sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst>(args, unused);
+ },
nullptr,
[] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<__fp16, __fp16> * {
return new PoolingDepthfirst<sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst>(args);
@@ -87,7 +89,9 @@ static const PoolingImplementation<__fp16, __fp16> pooling_fp16_methods[] = {
{
PoolingMethod::DEPTHFIRST,
"sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst",
- is_supported<sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst>,
+ [] (const PoolingArgs &args, const Nothing &unused) -> bool {
+ return args.cpu_info->has_sve() && is_supported<sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst>(args, unused);
+ },
nullptr,
[] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<__fp16, __fp16> * {
return new PoolingDepthfirst<sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst>(args);
@@ -96,7 +100,7 @@ static const PoolingImplementation<__fp16, __fp16> pooling_fp16_methods[] = {
{
PoolingMethod::DEPTHFIRST,
"sve_fp16_nhwc_avg_generic_depthfirst",
- [] (const PoolingArgs &args, const Nothing &) -> bool { return args.pool_type == PoolingType::AVERAGE; },
+ [] (const PoolingArgs &args, const Nothing &) -> bool { return args.cpu_info->has_sve() && args.pool_type == PoolingType::AVERAGE; },
nullptr,
[] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<__fp16, __fp16> * {
return new PoolingDepthfirstGeneric<sve_fp16_nhwc_avg_generic_depthfirst>(args);
@@ -105,18 +109,20 @@ static const PoolingImplementation<__fp16, __fp16> pooling_fp16_methods[] = {
{
PoolingMethod::DEPTHFIRST,
"sve_fp16_nhwc_max_generic_depthfirst",
- [] (const PoolingArgs &args, const Nothing &) -> bool { return args.pool_type == PoolingType::MAX; },
+ [] (const PoolingArgs &args, const Nothing &) -> bool { return args.cpu_info->has_sve() && args.pool_type == PoolingType::MAX; },
nullptr,
[] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<__fp16, __fp16> * {
return new PoolingDepthfirstGeneric<sve_fp16_nhwc_max_generic_depthfirst>(args);
},
},
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
{
PoolingMethod::DEPTHFIRST,
"a64_fp16_nhwc_max_2x2_s1_output2x2_depthfirst",
- is_supported<a64_fp16_nhwc_max_2x2_s1_output2x2_depthfirst>,
+ [] (const PoolingArgs &args, const Nothing &unused) -> bool {
+ return args.cpu_info->has_fp16() && is_supported<a64_fp16_nhwc_max_2x2_s1_output2x2_depthfirst>(args, unused);
+ },
nullptr,
[] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<__fp16, __fp16> * {
return new PoolingDepthfirst<a64_fp16_nhwc_max_2x2_s1_output2x2_depthfirst>(args);
@@ -125,7 +131,9 @@ static const PoolingImplementation<__fp16, __fp16> pooling_fp16_methods[] = {
{
PoolingMethod::DEPTHFIRST,
"a64_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst",
- is_supported<a64_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst>,
+ [] (const PoolingArgs &args, const Nothing &unused) -> bool {
+ return args.cpu_info->has_fp16() && is_supported<a64_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst>(args, unused);
+ },
nullptr,
[] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<__fp16, __fp16> * {
return new PoolingDepthfirst<a64_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst>(args);
@@ -134,7 +142,7 @@ static const PoolingImplementation<__fp16, __fp16> pooling_fp16_methods[] = {
{
PoolingMethod::DEPTHFIRST,
"a64_fp16_nhwc_avg_generic_depthfirst",
- [] (const PoolingArgs &args, const Nothing &) -> bool { return args.pool_type == PoolingType::AVERAGE; },
+ [] (const PoolingArgs &args, const Nothing &) -> bool { return args.cpu_info->has_fp16() && args.pool_type == PoolingType::AVERAGE; },
nullptr,
[] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<__fp16, __fp16> * {
return new PoolingDepthfirstGeneric<a64_fp16_nhwc_avg_generic_depthfirst>(args);
@@ -143,7 +151,7 @@ static const PoolingImplementation<__fp16, __fp16> pooling_fp16_methods[] = {
{
PoolingMethod::DEPTHFIRST,
"a64_fp16_nhwc_max_generic_depthfirst",
- [] (const PoolingArgs &args, const Nothing &) -> bool { return args.pool_type == PoolingType::MAX; },
+ [] (const PoolingArgs &args, const Nothing &) -> bool { return args.cpu_info->has_fp16() && args.pool_type == PoolingType::MAX; },
nullptr,
[] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<__fp16, __fp16> * {
return new PoolingDepthfirstGeneric<a64_fp16_nhwc_max_generic_depthfirst>(args);
diff --git a/src/core/NEON/kernels/arm_conv/pooling/pooling_fp32.cpp b/src/core/NEON/kernels/arm_conv/pooling/pooling_fp32.cpp
index 002115d78c..1905e1e9d6 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/pooling_fp32.cpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/pooling_fp32.cpp
@@ -30,12 +30,12 @@
#include "kernels/cpp_nhwc_1x1_stride_any_depthfirst.hpp"
#if defined(__aarch64__)
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
#include "kernels/sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst.hpp"
#include "kernels/sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst.hpp"
#include "kernels/sve_fp32_nhwc_avg_generic_depthfirst.hpp"
#include "kernels/sve_fp32_nhwc_max_generic_depthfirst.hpp"
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
#include "kernels/a64_fp32_nhwc_max_2x2_s1_output2x2_depthfirst.hpp"
#include "kernels/a64_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst.hpp"
#include "kernels/a64_fp32_nhwc_avg_generic_depthfirst.hpp"
@@ -71,11 +71,13 @@ static const PoolingImplementation<float, float> pooling_fp32_methods[] = {
},
},
#if defined(__aarch64__)
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
{
PoolingMethod::DEPTHFIRST,
"sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst",
- is_supported<sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst>,
+ [] (const PoolingArgs &args, const Nothing &unused) -> bool {
+ return args.cpu_info->has_sve() && is_supported<sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst>(args, unused);
+ },
nullptr,
[] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<float, float> * {
return new PoolingDepthfirst<sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst>(args);
@@ -84,7 +86,9 @@ static const PoolingImplementation<float, float> pooling_fp32_methods[] = {
{
PoolingMethod::DEPTHFIRST,
"sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst",
- is_supported<sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst>,
+ [] (const PoolingArgs &args, const Nothing &unused) -> bool {
+ return args.cpu_info->has_sve() && is_supported<sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst>(args, unused);
+ },
nullptr,
[] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<float, float> * {
return new PoolingDepthfirst<sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst>(args);
@@ -93,7 +97,9 @@ static const PoolingImplementation<float, float> pooling_fp32_methods[] = {
{
PoolingMethod::DEPTHFIRST,
"sve_fp32_nhwc_avg_generic_depthfirst",
- [] (const PoolingArgs &args, const Nothing &) -> bool { return args.pool_type == PoolingType::AVERAGE; },
+ [] (const PoolingArgs &args, const Nothing &) -> bool {
+ return args.cpu_info->has_sve() && args.pool_type == PoolingType::AVERAGE;
+ },
nullptr,
[] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<float, float> * {
return new PoolingDepthfirstGeneric<sve_fp32_nhwc_avg_generic_depthfirst>(args);
@@ -102,13 +108,15 @@ static const PoolingImplementation<float, float> pooling_fp32_methods[] = {
{
PoolingMethod::DEPTHFIRST,
"sve_fp32_nhwc_max_generic_depthfirst",
- [] (const PoolingArgs &args, const Nothing &) -> bool { return args.pool_type == PoolingType::MAX; },
+ [] (const PoolingArgs &args, const Nothing &) -> bool {
+ return args.cpu_info->has_sve() && args.pool_type == PoolingType::MAX;
+ },
nullptr,
[] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<float, float> * {
return new PoolingDepthfirstGeneric<sve_fp32_nhwc_max_generic_depthfirst>(args);
},
},
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
{
PoolingMethod::DEPTHFIRST,
"a64_fp32_nhwc_max_2x2_s1_output2x2_depthfirst",
diff --git a/src/core/NEON/kernels/arm_conv/pooling/pooling_s8.cpp b/src/core/NEON/kernels/arm_conv/pooling/pooling_s8.cpp
index 490fc0d863..1cad674e6e 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/pooling_s8.cpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/pooling_s8.cpp
@@ -30,13 +30,13 @@
#include "kernels/cpp_nhwc_1x1_stride_any_depthfirst.hpp"
#if defined(__aarch64__)
-#if defined(__ARM_FEATURE_SVE)
-#if defined(SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
#include "kernels/sve_s8_nhwc_avg_generic_depthfirst.hpp"
-#endif // defined(SVE2)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE2)
#include "kernels/sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst.hpp"
#include "kernels/sve_s8_nhwc_max_generic_depthfirst.hpp"
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
#include "kernels/a64_s8_nhwc_max_2x2_s1_output2x2_depthfirst.hpp"
#include "kernels/a64_s8_nhwc_avg_generic_depthfirst.hpp"
#include "kernels/a64_s8_nhwc_max_generic_depthfirst.hpp"
@@ -73,22 +73,24 @@ static const PoolingImplementation<int8_t, int8_t> pooling_s8_methods[] = {
},
},
#if defined(__aarch64__)
-#if defined(__ARM_FEATURE_SVE)
-#if defined(SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
{
PoolingMethod::DEPTHFIRST,
"sve_s8_nhwc_avg_generic_depthfirst",
- [] (const PoolingArgs &args, const Nothing &) -> bool { return args.pool_type == PoolingType::AVERAGE; },
+ [] (const PoolingArgs &args, const Nothing &) -> bool { return args.cpu_info->has_sve2() && args.pool_type == PoolingType::AVERAGE; },
nullptr,
[] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<int8_t, int8_t> * {
return new PoolingDepthfirstGeneric<sve_s8_nhwc_avg_generic_depthfirst>(args);
},
},
-#endif // defined(SVE2)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE2)
{
PoolingMethod::DEPTHFIRST,
"sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst",
- is_supported<sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst>,
+ [] (const PoolingArgs &args, const Nothing &unused) -> bool {
+ return args.cpu_info->has_sve() && is_supported<sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst>(args, unused);
+ },
nullptr,
[] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<int8_t, int8_t> * {
return new PoolingDepthfirst<sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst>(args);
@@ -97,13 +99,13 @@ static const PoolingImplementation<int8_t, int8_t> pooling_s8_methods[] = {
{
PoolingMethod::DEPTHFIRST,
"sve_s8_nhwc_max_generic_depthfirst",
- [] (const PoolingArgs &args, const Nothing &) -> bool { return args.pool_type == PoolingType::MAX; },
+ [] (const PoolingArgs &args, const Nothing &) -> bool { return args.cpu_info->has_sve() && args.pool_type == PoolingType::MAX; },
nullptr,
[] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<int8_t, int8_t> * {
return new PoolingDepthfirstGeneric<sve_s8_nhwc_max_generic_depthfirst>(args);
},
},
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
{
PoolingMethod::DEPTHFIRST,
"a64_s8_nhwc_max_2x2_s1_output2x2_depthfirst",
diff --git a/src/core/NEON/kernels/arm_conv/pooling/pooling_s8q.cpp b/src/core/NEON/kernels/arm_conv/pooling/pooling_s8q.cpp
index fd4e045035..bfc4dc0f15 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/pooling_s8q.cpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/pooling_s8q.cpp
@@ -28,10 +28,10 @@
#include "pooling_depthfirst_generic_quantized.hpp"
#if defined(__aarch64__)
-#if defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
#include "kernels/sve_s8q_nhwc_avg_generic_depthfirst.hpp"
#include "kernels/sve_s8q_nhwc_max_generic_depthfirst.hpp"
-#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
#include "kernels/a64_s8q_nhwc_avg_generic_depthfirst.hpp"
#include "kernels/a64_s8q_nhwc_max_generic_depthfirst.hpp"
#endif // defined(__aarch64__)
@@ -43,12 +43,12 @@ namespace pooling {
static const PoolingImplementation<int8_t, int8_t, Requantize32> pooling_u8_methods[] = {
#if defined(__aarch64__)
-#if defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
{
PoolingMethod::DEPTHFIRST,
"sve_s8q_nhwc_avg_generic_depthfirst",
[] (const PoolingArgs &args, const Requantize32 &) -> bool {
- return args.pool_type == PoolingType::AVERAGE;
+ return args.cpu_info->has_sve2() && args.pool_type == PoolingType::AVERAGE;
},
nullptr,
[] (const PoolingArgs &args, const Requantize32 &rq) -> PoolingCommon<int8_t, int8_t, Requantize32> * {
@@ -58,13 +58,13 @@ static const PoolingImplementation<int8_t, int8_t, Requantize32> pooling_u8_meth
{
PoolingMethod::DEPTHFIRST,
"sve_s8q_nhwc_max_generic_depthfirst",
- [] (const PoolingArgs &args, const Requantize32 &) -> bool { return args.pool_type == PoolingType::MAX; },
+ [] (const PoolingArgs &args, const Requantize32 &) -> bool { return args.cpu_info->has_sve2() && args.pool_type == PoolingType::MAX; },
nullptr,
[] (const PoolingArgs &args, const Requantize32 &rq) -> PoolingCommon<int8_t, int8_t, Requantize32> * {
return new PoolingDepthfirstGenericQuantized<sve_s8q_nhwc_max_generic_depthfirst>(args, rq);
},
},
-#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
{
PoolingMethod::DEPTHFIRST,
"a64_s8q_nhwc_avg_generic_depthfirst",
diff --git a/src/core/NEON/kernels/arm_conv/pooling/pooling_u8.cpp b/src/core/NEON/kernels/arm_conv/pooling/pooling_u8.cpp
index 052354922e..f6ea98002c 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/pooling_u8.cpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/pooling_u8.cpp
@@ -30,13 +30,13 @@
#include "kernels/cpp_nhwc_1x1_stride_any_depthfirst.hpp"
#if defined(__aarch64__)
-#if defined(__ARM_FEATURE_SVE)
-#if defined(SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
#include "kernels/sve_u8_nhwc_avg_generic_depthfirst.hpp"
-#endif // defined(SVE2)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE2)
#include "kernels/sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst.hpp"
#include "kernels/sve_u8_nhwc_max_generic_depthfirst.hpp"
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
#include "kernels/a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst.hpp"
#include "kernels/a64_u8_nhwc_avg_generic_depthfirst.hpp"
#include "kernels/a64_u8_nhwc_max_generic_depthfirst.hpp"
@@ -73,8 +73,8 @@ static const PoolingImplementation<uint8_t, uint8_t> pooling_u8_methods[] = {
},
},
#if defined(__aarch64__)
-#if defined(__ARM_FEATURE_SVE)
-#if defined(SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
{
PoolingMethod::DEPTHFIRST,
"sve_u8_nhwc_avg_generic_depthfirst",
@@ -82,7 +82,7 @@ static const PoolingImplementation<uint8_t, uint8_t> pooling_u8_methods[] = {
// This kernel can only be used when there is either no padding, or we don't care
// about the value of the padding. Otherwise, we would need to pass in the zero-point
// for the quantization regime.
- return (args.exclude_padding ||
+ return args.cpu_info->has_sve2() && (args.exclude_padding ||
(args.padding.top == 0 && args.padding.bottom == 0 &&
args.padding.left == 0 && args.padding.right == 0)
) && args.pool_type == PoolingType::AVERAGE;
@@ -92,11 +92,13 @@ static const PoolingImplementation<uint8_t, uint8_t> pooling_u8_methods[] = {
return new PoolingDepthfirstGeneric<sve_u8_nhwc_avg_generic_depthfirst>(args);
},
},
-#endif // defined(SVE2)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE2)
{
PoolingMethod::DEPTHFIRST,
"sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst",
- is_supported<sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst>,
+ [] (const PoolingArgs &args, const Nothing &unused) -> bool {
+ return args.cpu_info->has_sve() && is_supported<sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst>(args, unused);
+ },
nullptr,
[] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<uint8_t, uint8_t> * {
return new PoolingDepthfirst<sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst>(args);
@@ -105,13 +107,13 @@ static const PoolingImplementation<uint8_t, uint8_t> pooling_u8_methods[] = {
{
PoolingMethod::DEPTHFIRST,
"sve_u8_nhwc_max_generic_depthfirst",
- [] (const PoolingArgs &args, const Nothing &) -> bool { return args.pool_type == PoolingType::MAX; },
+ [] (const PoolingArgs &args, const Nothing &) -> bool { return args.cpu_info->has_sve() && args.pool_type == PoolingType::MAX; },
nullptr,
[] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<uint8_t, uint8_t> * {
return new PoolingDepthfirstGeneric<sve_u8_nhwc_max_generic_depthfirst>(args);
},
},
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
{
PoolingMethod::DEPTHFIRST,
"a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst",
diff --git a/src/core/NEON/kernels/arm_conv/pooling/pooling_u8q.cpp b/src/core/NEON/kernels/arm_conv/pooling/pooling_u8q.cpp
index 41303fb418..647e319c82 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/pooling_u8q.cpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/pooling_u8q.cpp
@@ -28,10 +28,10 @@
#include "pooling_depthfirst_generic_quantized.hpp"
#if defined(__aarch64__)
-#if defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
#include "kernels/sve_u8q_nhwc_avg_generic_depthfirst.hpp"
#include "kernels/sve_u8q_nhwc_max_generic_depthfirst.hpp"
-#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
#include "kernels/a64_u8q_nhwc_avg_generic_depthfirst.hpp"
#include "kernels/a64_u8q_nhwc_max_generic_depthfirst.hpp"
#endif // defined(__aarch64__)
@@ -43,12 +43,12 @@ namespace pooling {
static const PoolingImplementation<uint8_t, uint8_t, Requantize32> pooling_u8_methods[] = {
#if defined(__aarch64__)
-#if defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
{
PoolingMethod::DEPTHFIRST,
"sve_u8q_nhwc_avg_generic_depthfirst",
[] (const PoolingArgs &args, const Requantize32 &) -> bool {
- return args.pool_type == PoolingType::AVERAGE;
+ return args.cpu_info->has_sve2() && args.pool_type == PoolingType::AVERAGE;
},
nullptr,
[] (const PoolingArgs &args, const Requantize32 &rq) -> PoolingCommon<uint8_t, uint8_t, Requantize32> * {
@@ -58,13 +58,13 @@ static const PoolingImplementation<uint8_t, uint8_t, Requantize32> pooling_u8_me
{
PoolingMethod::DEPTHFIRST,
"sve_u8q_nhwc_max_generic_depthfirst",
- [] (const PoolingArgs &args, const Requantize32 &) -> bool { return args.pool_type == PoolingType::MAX; },
+ [] (const PoolingArgs &args, const Requantize32 &) -> bool { return args.cpu_info->has_sve2() && args.pool_type == PoolingType::MAX; },
nullptr,
[] (const PoolingArgs &args, const Requantize32 &rq) -> PoolingCommon<uint8_t, uint8_t, Requantize32> * {
return new PoolingDepthfirstGenericQuantized<sve_u8q_nhwc_max_generic_depthfirst>(args, rq);
},
},
-#endif // defined(__ARM_FEATURE_SVE) && defined(SVE2)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
{
PoolingMethod::DEPTHFIRST,
"a64_u8q_nhwc_avg_generic_depthfirst",
diff --git a/src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp b/src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp
index d8134c4bb5..8244523696 100644
--- a/src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp
+++ b/src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp
@@ -44,26 +44,26 @@ namespace arm_gemm {
static const GemmImplementation<bfloat16, float> gemm_bf16_methods[] =
{
-#ifdef V8P6_BF
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_BF16
+#ifdef ARM_COMPUTE_ENABLE_SVE
{ // gemm_bf16_interleaved
GemmMethod::GEMM_INTERLEAVED,
"sve_interleaved_bf16fp32_mmla_8x3VL",
- [](const GemmArgs &args) { return args._ci->has_sve() && (args._Ksize>4); },
+ [](const GemmArgs &args) { return args._ci->has_svebf16() && (args._Ksize>4); },
[](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args) { return new GemmInterleaved<cls_sve_interleaved_bf16fp32_mmla_8x3VL, bfloat16, float>(args); }
},
{
GemmMethod::GEMM_HYBRID,
"sve_hybrid_bf16fp32_dot_6x4VL",
- [](const GemmArgs &args) { return args._ci->has_sve(); },
+ [](const GemmArgs &args) { return args._ci->has_svebf16(); },
[](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN && ((args._Ksize <= 128) && (args._Nsize <= 128)); },
[](const GemmArgs &args) { return new GemmHybridIndirect<cls_sve_hybrid_bf16fp32_dot_6x4VL, bfloat16, float>(args); }
},
{ // gemm_bf16_interleaved
GemmMethod::GEMM_INTERLEAVED,
"sve_interleaved_bf16fp32_dot_8x3VL",
- [](const GemmArgs &args) { return args._ci->has_sve() && (args._Ksize>2); },
+ [](const GemmArgs &args) { return args._ci->has_svebf16() && (args._Ksize>2); },
[](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args) { return new GemmInterleaved<cls_sve_interleaved_bf16fp32_dot_8x3VL, bfloat16, float>(args); }
},
@@ -71,25 +71,25 @@ static const GemmImplementation<bfloat16, float> gemm_bf16_methods[] =
{ // gemm_bf16_interleaved
GemmMethod::GEMM_INTERLEAVED,
"a64_interleaved_bf16fp32_mmla_8x12",
- [](const GemmArgs &args) { return (args._Ksize>4); },
+ [](const GemmArgs &args) { return args._ci->has_bf16() && (args._Ksize>4); },
nullptr,
[](const GemmArgs &args) { return new GemmInterleaved<cls_a64_interleaved_bf16fp32_mmla_8x12, bfloat16, float>(args); }
},
{
GemmMethod::GEMM_HYBRID,
"a64_hybrid_bf16fp32_dot_6x16",
- nullptr,
+ [](const GemmArgs &args) { return args._ci->has_bf16(); },
nullptr,
[](const GemmArgs &args) { return new GemmHybridIndirect<cls_a64_hybrid_bf16fp32_dot_6x16, bfloat16, float>(args); }
},
{ // gemm_bf16_interleaved
GemmMethod::GEMM_INTERLEAVED,
"a64_interleaved_bf16fp32_dot_8x12",
- [](const GemmArgs &args) { return (args._Ksize>2); },
+ [](const GemmArgs &args) { return args._ci->has_bf16() && (args._Ksize>2); },
nullptr,
[](const GemmArgs &args) { return new GemmInterleaved<cls_a64_interleaved_bf16fp32_dot_8x12, bfloat16, float>(args); }
},
-#endif // V8P6_BF
+#endif // ARM_COMPUTE_ENABLE_BF16
#ifdef __aarch64__
{
GemmMethod::GEMM_INTERLEAVED,
diff --git a/src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp b/src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp
index 8e355c8f2c..b41d8dd097 100644
--- a/src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp
+++ b/src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp
@@ -43,7 +43,7 @@
namespace arm_gemm {
static const GemmImplementation<__fp16, __fp16> gemm_fp16_methods[] = {
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
{
GemmMethod::GEMM_HYBRID,
"sve_hybrid_fp16_mla_6x4VL",
diff --git a/src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp b/src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp
index d94814fb4c..1632e301ac 100644
--- a/src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp
+++ b/src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp
@@ -59,7 +59,7 @@ static const GemmImplementation<float, float> gemm_fp32_methods[] =
[](const GemmArgs &args) { return new GemvBatched<float, float>(args); }
},
#ifdef __aarch64__
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
{
GemmMethod::GEMM_HYBRID,
"sve_gemv_fp32_mla_8VL",
@@ -77,17 +77,17 @@ static const GemmImplementation<float, float> gemm_fp32_methods[] =
},
// MMLA next due to higher throughput (SVE only)
-#if defined(__ARM_FEATURE_SVE) && defined(MMLA_FP32)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVEF32MM)
{
GemmMethod::GEMM_INTERLEAVED,
"sve_interleaved_fp32_mmla_8x3VL",
- [](const GemmArgs &args) { return args._ci->has_sve() && (args._Ksize>4); },
+ [](const GemmArgs &args) { return args._ci->has_svef32mm() && (args._Ksize>4); },
[](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args) { return new GemmInterleaved<cls_sve_interleaved_fp32_mmla_8x3VL, float, float>(args); }
},
-#endif // __ARM_FEATURE_SVE && MMLA_FP32
+#endif // ARM_COMPUTE_ENABLE_SVE && ARM_COMPUTE_ENABLE_SVEF32MM
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
// SVE smallk / hybrid methods
{
GemmMethod::GEMM_HYBRID,
@@ -110,7 +110,7 @@ static const GemmImplementation<float, float> gemm_fp32_methods[] =
[](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN && (((args._Ksize <= 256) && (args._Nsize <= 256)) || ((args._nmulti > 1) && ((args._Msize / args._maxthreads) < 8))); },
[](const GemmArgs &args) { return new GemmHybridIndirect<cls_sve_hybrid_fp32_mla_6x4VL, float, float>(args); }
},
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
// Cortex-A35 specific kernel - use for any problem on A35, and never in any other cases.
{
GemmMethod::GEMM_INTERLEAVED,
@@ -148,7 +148,7 @@ GemmImplementation<float, float>::with_estimate(
[](const GemmArgs &args) { return GemmHybridIndirect<cls_a64_hybrid_fp32_mla_6x16, float, float>::estimate_cycles(args, cls_a64_hybrid_fp32_mla_6x16::get_performance_parameters(args._ci)); },
[](const GemmArgs &args) { return new GemmHybridIndirect<cls_a64_hybrid_fp32_mla_6x16, float, float>(args); }
),
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
{
GemmMethod::GEMM_INTERLEAVED,
"sve_interleaved_fp32_mla_8x3VL",
@@ -156,7 +156,7 @@ GemmImplementation<float, float>::with_estimate(
[](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args) { return new GemmInterleaved<cls_sve_interleaved_fp32_mla_8x3VL, float, float>(args); }
},
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
GemmImplementation<float, float>::with_estimate(
GemmMethod::GEMM_INTERLEAVED,
"a64_sgemm_8x12",
diff --git a/src/core/NEON/kernels/arm_gemm/gemm_int8.cpp b/src/core/NEON/kernels/arm_gemm/gemm_int8.cpp
index 60cf82f9c6..bfb3ca901f 100644
--- a/src/core/NEON/kernels/arm_gemm/gemm_int8.cpp
+++ b/src/core/NEON/kernels/arm_gemm/gemm_int8.cpp
@@ -46,16 +46,16 @@
namespace arm_gemm {
static const GemmImplementation<int8_t, int32_t> gemm_s8_methods[] = {
-#ifdef __ARM_FEATURE_SVE
-#ifdef MMLA_INT8
+#ifdef ARM_COMPUTE_ENABLE_SVE
+#ifdef ARM_COMPUTE_ENABLE_I8MM
{
GemmMethod::GEMM_INTERLEAVED,
"sve_interleaved_s8s32_mmla_8x3VL",
- [](const GemmArgs &args) { return args._ci->has_sve() && (args._Ksize>8); },
+ [](const GemmArgs &args) { return args._ci->has_svei8mm() && (args._Ksize>8); },
[](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args) { return new GemmInterleaved<cls_sve_interleaved_s8s32_mmla_8x3VL, int8_t, int32_t>(args); }
},
-#endif
+#endif // ARM_COMPUTE_ENABLE_I8MM
{
GemmMethod::GEMM_HYBRID,
"sve_smallK_hybrid_s8s32_dot_8x1VL",
@@ -78,15 +78,15 @@ static const GemmImplementation<int8_t, int32_t> gemm_s8_methods[] = {
[](const GemmArgs &args) { return new GemmInterleaved<cls_sve_interleaved_s8s32_dot_8x3VL, int8_t, int32_t>(args); }
},
#endif // SVE
-#ifdef MMLA_INT8
+#ifdef ARM_COMPUTE_ENABLE_I8MM
{
GemmMethod::GEMM_INTERLEAVED,
"a64_interleaved_s8s32_mmla_8x12",
- [](const GemmArgs &args) { return (args._Ksize>8); },
+ [](const GemmArgs &args) { return args._ci->has_svei8mm() && (args._Ksize>8); },
nullptr,
[](const GemmArgs &args) { return new GemmInterleaved<cls_a64_interleaved_s8s32_mmla_8x12, int8_t, int32_t>(args); }
},
-#endif
+#endif // ARM_COMPUTE_ENABLE_I8MM
{
GemmMethod::GEMM_HYBRID,
"a64_smallK_hybrid_s8s32_dot_8x4",
diff --git a/src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp b/src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp
index 094b6fdff4..985567f6f3 100644
--- a/src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp
+++ b/src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp
@@ -53,16 +53,16 @@ namespace arm_gemm {
static const GemmImplementation<int8_t, int8_t, Requantize32> gemm_qint8_methods[] =
{
-#ifdef __ARM_FEATURE_SVE
-#ifdef MMLA_INT8
+#ifdef ARM_COMPUTE_ENABLE_SVE
+#ifdef ARM_COMPUTE_ENABLE_I8MM
{
GemmMethod::GEMM_INTERLEAVED,
"sve_interleaved_s8s32_mmla_8x3VL",
- [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_sve() && (args._Ksize>8); },
+ [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_svei8mm() && (args._Ksize>8); },
[](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized<cls_sve_interleaved_s8s32_mmla_8x3VL, int8_t, int8_t>(args, qp); }
},
-#endif
+#endif // ARM_COMPUTE_ENABLE_I8MM
{
GemmMethod::GEMM_HYBRID_QUANTIZED,
"sve_smallK_hybrid_s8s32_dot_8x1VL",
@@ -70,22 +70,22 @@ static const GemmImplementation<int8_t, int8_t, Requantize32> gemm_qint8_methods
[](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridQuantized<cls_sve_smallK_hybrid_s8s32_dot_8x1VL, int8_t, int8_t>(args, qp); }
},
-#ifdef SVE2
+#ifdef ARM_COMPUTE_ENABLE_SVE2
{
GemmMethod::GEMM_HYBRID,
"sve_hybrid_s8qs_dot_6x4VL",
- [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sve() && quant_hybrid_symmetric(qp); },
+ [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sve2() && quant_hybrid_symmetric(qp); },
[](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_sve_hybrid_s8qs_dot_6x4VL, int8_t, int8_t, Requantize32>(args, qp); }
},
{
GemmMethod::GEMM_HYBRID,
"sve_hybrid_s8qa_dot_4x4VL",
- [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sve() && quant_hybrid_asymmetric(qp); },
+ [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sve2() && quant_hybrid_asymmetric(qp); },
[](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_sve_hybrid_s8qa_dot_4x4VL, int8_t, int8_t, Requantize32>(args, qp); }
},
-#endif
+#endif // ARM_COMPUTE_ENABLE_SVE2
{
GemmMethod::GEMM_HYBRID,
"sve_hybrid_s8s32_dot_6x4VL",
@@ -101,15 +101,15 @@ static const GemmImplementation<int8_t, int8_t, Requantize32> gemm_qint8_methods
[](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized<cls_sve_interleaved_s8s32_dot_8x3VL, int8_t, int8_t>(args, qp); }
},
#endif // SVE
-#ifdef MMLA_INT8
+#ifdef ARM_COMPUTE_ENABLE_I8MM
{
GemmMethod::GEMM_INTERLEAVED,
"a64_interleaved_s8s32_mmla_8x12",
- [](const GemmArgs &args, const Requantize32 &) { return (args._Ksize>8); },
+ [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_i8mm() && (args._Ksize>8); },
nullptr,
[](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized<cls_a64_interleaved_s8s32_mmla_8x12, int8_t, int8_t>(args, qp); }
},
-#endif
+#endif // ARM_COMPUTE_ENABLE_I8MM
{
GemmMethod::GEMM_HYBRID_QUANTIZED,
"a64_smallK_hybrid_s8s32_dot_8x4",
diff --git a/src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp b/src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp
index be27b3a117..f3f2f335fd 100644
--- a/src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp
+++ b/src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp
@@ -50,12 +50,12 @@ namespace arm_gemm {
static const GemmImplementation<uint8_t, uint8_t, Requantize32> gemm_quint8_methods[] =
{
-#ifdef __ARM_FEATURE_SVE
-#ifdef MMLA_INT8
+#ifdef ARM_COMPUTE_ENABLE_SVE
+#ifdef ARM_COMPUTE_ENABLE_I8MM
{
GemmMethod::GEMM_INTERLEAVED,
"sve_interleaved_u8u32_mmla_8x3VL",
- [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_sve() && (args._Ksize>8); },
+ [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_svei8mm() && (args._Ksize>8); },
[](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized<cls_sve_interleaved_u8u32_mmla_8x3VL, uint8_t, uint8_t>(args, qp); }
},
@@ -67,15 +67,15 @@ static const GemmImplementation<uint8_t, uint8_t, Requantize32> gemm_quint8_meth
[](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridQuantized<cls_sve_smallK_hybrid_u8u32_dot_8x1VL, uint8_t, uint8_t>(args, qp); }
},
-#ifdef SVE2 // Requantizing kernels include some SVE2 only instructions (SQRDMULH, SRSHL)
+#ifdef ARM_COMPUTE_ENABLE_SVE2 // Requantizing kernels include some SVE2 only instructions (SQRDMULH, SRSHL)
{
GemmMethod::GEMM_HYBRID,
"sve_hybrid_u8qa_dot_4x4VL",
- [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sve() && quant_hybrid_asymmetric(qp); },
+ [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sve2() && quant_hybrid_asymmetric(qp); },
[](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_sve_hybrid_u8qa_dot_4x4VL, uint8_t, uint8_t, Requantize32>(args, qp); }
},
-#endif
+#endif // ARM_COMPUTE_ENABLE_SVE2
{
GemmMethod::GEMM_HYBRID,
"sve_hybrid_u8u32_dot_6x4VL",
@@ -91,11 +91,11 @@ static const GemmImplementation<uint8_t, uint8_t, Requantize32> gemm_quint8_meth
[](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized<cls_sve_interleaved_u8u32_dot_8x3VL, uint8_t, uint8_t>(args, qp); }
},
#endif
-#ifdef MMLA_INT8
+#ifdef ARM_COMPUTE_ENABLE_I8MM
{
GemmMethod::GEMM_INTERLEAVED,
"a64_interleaved_u8u32_mmla_8x12",
- [](const GemmArgs &args, const Requantize32 &) { return (args._Ksize>8); },
+ [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_i8mm() && (args._Ksize>8); },
[](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized<cls_a64_interleaved_u8u32_mmla_8x12, uint8_t, uint8_t>(args, qp); }
},
diff --git a/src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp b/src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp
index 4de3d2b18a..4c05fd1b73 100644
--- a/src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp
+++ b/src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp
@@ -46,12 +46,12 @@
namespace arm_gemm {
static const GemmImplementation<uint8_t, uint32_t> gemm_u8_methods[] = {
-#ifdef __ARM_FEATURE_SVE
-#ifdef MMLA_INT8
+#ifdef ARM_COMPUTE_ENABLE_SVE
+#ifdef ARM_COMPUTE_ENABLE_I8MM
{
GemmMethod::GEMM_INTERLEAVED,
"sve_interleaved_u8u32_mmla_8x3VL",
- [](const GemmArgs &args) { return args._ci->has_sve() && (args._Ksize>8); },
+ [](const GemmArgs &args) { return args._ci->has_svei8mm() && (args._Ksize>8); },
[](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args) { return new GemmInterleaved<cls_sve_interleaved_u8u32_mmla_8x3VL, uint8_t, uint32_t>(args); }
},
@@ -78,11 +78,11 @@ static const GemmImplementation<uint8_t, uint32_t> gemm_u8_methods[] = {
[](const GemmArgs &args) { return new GemmInterleaved<cls_sve_interleaved_u8u32_dot_8x3VL, uint8_t, uint32_t>(args); }
},
#endif
-#ifdef MMLA_INT8
+#ifdef ARM_COMPUTE_ENABLE_I8MM
{
GemmMethod::GEMM_INTERLEAVED,
"a64_interleaved_u8u32_mmla_8x12",
- [](const GemmArgs &args) { return (args._Ksize>8); },
+ [](const GemmArgs &args) { return args._ci->has_i8mm() && (args._Ksize>8); },
nullptr,
[](const GemmArgs &args) { return new GemmInterleaved<cls_a64_interleaved_u8u32_mmla_8x12, uint8_t, uint32_t>(args); }
},
diff --git a/src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp b/src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp
index 0d56b46e19..a6b1269927 100644
--- a/src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp
+++ b/src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp
@@ -320,12 +320,12 @@ template void IndirectInterleave<8, 1, VLType::None>(float *, const float * cons
template void ConvolutionInterleave<8, 1, VLType::None>(float *, const float *, size_t, const convolver<float> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<8, 1, VLType::None>(float *, const float *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
-#if defined(__ARM_FEATURE_SVE) && defined(MMLA_FP32)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVEF32MM)
/* FMMLA */
template void IndirectInterleave<8, 2, VLType::None>(float *, const float * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void ConvolutionInterleave<8, 2, VLType::None>(float *, const float *, size_t, const convolver<float> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<8, 2, VLType::None>(float *, const float *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
-#endif // SVE && MMLA_FP32
+#endif // ARM_COMPUTE_ENABLE_SVE && ARM_COMPUTE_ENABLE_SVEF32MM
/* FP16 */
#if defined(FP16_KERNELS) || defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
@@ -340,7 +340,7 @@ template void Interleave<8, 1, VLType::None>(float *, const __fp16 *, size_t, un
/* BF16 */
/* Arm® Neon™/SVE BFDOT */
-#ifdef V8P6_BF
+#ifdef ARM_COMPUTE_ENABLE_BF16
template void IndirectInterleave<8, 2, VLType::None>(bfloat16 *, const bfloat16 * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void ConvolutionInterleave<8, 2, VLType::None>(bfloat16 *, const bfloat16 *, size_t, const convolver<bfloat16> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<8, 2, VLType::None>(bfloat16 *, const bfloat16 *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
@@ -348,7 +348,7 @@ template void Interleave<8, 2, VLType::None>(bfloat16 *, const bfloat16 *, size_
template void IndirectInterleave<8, 4, VLType::None>(bfloat16 *, const bfloat16 * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void ConvolutionInterleave<8, 4, VLType::None>(bfloat16 *, const bfloat16 *, size_t, const convolver<bfloat16> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<8, 4, VLType::None>(bfloat16 *, const bfloat16 *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
-#endif // V8P6_BF
+#endif // ARM_COMPUTE_ENABLE_BF16
/* Arm® Neon™/SVE using FP32 kernel */
template void IndirectInterleave<8, 1, VLType::None>(float *, const bfloat16 * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
@@ -375,12 +375,12 @@ template void IndirectInterleave<8, 4, VLType::None>(int8_t *, const int8_t * co
template void ConvolutionInterleave<8, 4, VLType::None>(int8_t *, const int8_t *, size_t, const convolver<int8_t> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<8, 4, VLType::None>(int8_t *, const int8_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
-#ifdef MMLA_INT8
+#ifdef ARM_COMPUTE_ENABLE_I8MM
/* MMLA SMMLA (height 8, block 8) */
template void IndirectInterleave<8, 8, VLType::None>(int8_t *, const int8_t * const * const *, unsigned int, unsigned int, unsigned int y0, unsigned int ymax, unsigned int k0, unsigned int kmax, bool, int32_t);
template void ConvolutionInterleave<8, 8, VLType::None>(int8_t *, const int8_t *, size_t, const convolver<int8_t> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<8, 8, VLType::None>(int8_t *, const int8_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
-#endif // MMLA_INT8
+#endif // ARM_COMPUTE_ENABLE_I8MM
/* Arm® Neon™ SDOT (height 8, block 1) */
template void IndirectInterleave<8, 1, VLType::None>(int16_t *, const int8_t * const * const *, unsigned int, unsigned int, unsigned int y0, unsigned int ymax, unsigned int k0, unsigned int kmax, bool, int32_t);
@@ -397,12 +397,12 @@ template void IndirectInterleave<8, 4, VLType::None>(uint8_t *, const uint8_t *
template void ConvolutionInterleave<8, 4, VLType::None>(uint8_t *, const uint8_t *, size_t, const convolver<uint8_t> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<8, 4, VLType::None>(uint8_t *, const uint8_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
-#ifdef MMLA_INT8
+#ifdef ARM_COMPUTE_ENABLE_I8MM
/* MMLA SMMLA (height 8, block 8) */
template void IndirectInterleave<8, 8, VLType::None>(uint8_t *, const uint8_t * const * const *, unsigned int, unsigned int, unsigned int y0, unsigned int ymax, unsigned int k0, unsigned int kmax, bool, int32_t);
template void ConvolutionInterleave<8, 8, VLType::None>(uint8_t *, const uint8_t *, size_t, const convolver<uint8_t> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<8, 8, VLType::None>(uint8_t *, const uint8_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
-#endif // MMLA_INT8
+#endif // ARM_COMPUTE_ENABLE_I8MM
/* Arm® Neon™ 16-bit (height 8, block 1) */
template void IndirectInterleave<8, 1, VLType::None>(uint16_t *, const uint8_t * const * const *, unsigned int, unsigned int, unsigned int y0, unsigned int ymax, unsigned int k0, unsigned int kmax, bool, int32_t);
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_gemv_fp32_mla_8VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_gemv_fp32_mla_8VL/generic.cpp
index c62e31936c..78387de90c 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_gemv_fp32_mla_8VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_gemv_fp32_mla_8VL/generic.cpp
@@ -21,7 +21,7 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "arm_gemm.hpp"
#include "../../utils.hpp"
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL.hpp
index 066bff4602..7b0282fa32 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL.hpp
@@ -22,7 +22,7 @@
* IN THE SOFTWARE.
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../std_transforms_sve.hpp"
#include "../bfloat.hpp"
@@ -81,4 +81,4 @@ public:
} // namespace arm_gemm
#undef ARGLIST
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL/generic.cpp
index 1233a98531..34a657f64f 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL/generic.cpp
@@ -21,7 +21,7 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "arm_gemm.hpp"
#include "../../utils.hpp"
@@ -2153,4 +2153,4 @@ void sve_hybrid_bf16fp32_dot_6x4VL (
}
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL.hpp
index 5c8563952f..f98ccdc7d3 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL.hpp
@@ -22,7 +22,7 @@
* IN THE SOFTWARE.
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../std_transforms_sve.hpp"
@@ -80,4 +80,4 @@ public:
} // namespace arm_gemm
#undef ARGLIST
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/generic.cpp
index 7cc03bbfb5..c151179a1f 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/generic.cpp
@@ -21,7 +21,7 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "arm_gemm.hpp"
#include "../../utils.hpp"
@@ -3094,4 +3094,4 @@ void sve_hybrid_fp16_mla_6x4VL (
}
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL.hpp
index b696e73637..4c0a3a11e0 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL.hpp
@@ -22,7 +22,7 @@
* IN THE SOFTWARE.
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../std_transforms_sve.hpp"
@@ -80,4 +80,4 @@ public:
} // namespace arm_gemm
#undef ARGLIST
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL/generic.cpp
index dee9a107ff..25d65826b9 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL/generic.cpp
@@ -21,7 +21,7 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "arm_gemm.hpp"
#include "../../utils.hpp"
@@ -2152,4 +2152,4 @@ void sve_hybrid_fp32_mla_6x4VL (
}
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL.hpp
index 2273d97d5f..87f063d224 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL.hpp
@@ -22,7 +22,7 @@
* IN THE SOFTWARE.
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../std_transforms_sve.hpp"
@@ -80,4 +80,4 @@ public:
} // namespace arm_gemm
#undef ARGLIST
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL/generic.cpp
index 863325f7f5..943e0ac148 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL/generic.cpp
@@ -21,7 +21,7 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "arm_gemm.hpp"
#include "../../utils.hpp"
@@ -1616,4 +1616,4 @@ void sve_hybrid_fp32_mla_8x1VL (
}
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL.hpp
index bc93ced25b..c278b3fc6b 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL.hpp
@@ -22,7 +22,7 @@
* IN THE SOFTWARE.
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../std_transforms_sve.hpp"
@@ -80,4 +80,4 @@ public:
} // namespace arm_gemm
#undef ARGLIST
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL/generic.cpp
index 50b9ba524d..8a7465ba6b 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL/generic.cpp
@@ -21,7 +21,7 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "arm_gemm.hpp"
#include "../../utils.hpp"
@@ -1529,4 +1529,4 @@ void sve_hybrid_s8qa_dot_4x4VL (
}
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL.hpp
index 61927236ad..57056b4c2a 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL.hpp
@@ -22,7 +22,7 @@
* IN THE SOFTWARE.
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../std_transforms_sve.hpp"
@@ -80,4 +80,4 @@ public:
} // namespace arm_gemm
#undef ARGLIST
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL/generic.cpp
index f901a814f9..0328c107e2 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL/generic.cpp
@@ -21,7 +21,7 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "arm_gemm.hpp"
#include "../../utils.hpp"
@@ -2665,4 +2665,4 @@ void sve_hybrid_s8qs_dot_6x4VL (
}
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL.hpp
index b2c376196f..37258978d3 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL.hpp
@@ -22,7 +22,7 @@
* IN THE SOFTWARE.
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../std_transforms_sve.hpp"
@@ -80,4 +80,4 @@ public:
} // namespace arm_gemm
#undef ARGLIST
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL/generic.cpp
index 8862b3665a..9cddee941e 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL/generic.cpp
@@ -21,7 +21,7 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "arm_gemm.hpp"
#include "../../utils.hpp"
@@ -1819,4 +1819,4 @@ void sve_hybrid_s8s32_dot_6x4VL (
}
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL.hpp
index cfb8adfc87..3de8d178cd 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL.hpp
@@ -22,7 +22,7 @@
* IN THE SOFTWARE.
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../std_transforms_sve.hpp"
@@ -80,4 +80,4 @@ public:
} // namespace arm_gemm
#undef ARGLIST
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL/generic.cpp
index 373d82930b..0bfc28776f 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL/generic.cpp
@@ -21,7 +21,7 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "arm_gemm.hpp"
#include "../../utils.hpp"
@@ -1529,4 +1529,4 @@ void sve_hybrid_u8qa_dot_4x4VL (
}
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL.hpp
index 4ea1d17c4e..a2883bfa30 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL.hpp
@@ -22,7 +22,7 @@
* IN THE SOFTWARE.
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../std_transforms_sve.hpp"
@@ -80,4 +80,4 @@ public:
} // namespace arm_gemm
#undef ARGLIST
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL/generic.cpp
index 97f6665d85..413bc65288 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL/generic.cpp
@@ -21,7 +21,7 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "arm_gemm.hpp"
#include "../../utils.hpp"
@@ -1819,4 +1819,4 @@ void sve_hybrid_u8u32_dot_6x4VL (
}
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL.hpp
index 12bb758b68..d717b745c9 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL.hpp
@@ -23,7 +23,7 @@
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../bfloat.hpp"
#include "../std_transforms_sve.hpp"
@@ -69,4 +69,4 @@ public:
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL/generic.cpp
index adee900337..4f774b133f 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL/generic.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../../bfloat.hpp"
#include "../../asmlib.hpp"
@@ -326,4 +326,4 @@ void sve_interleaved_bf16fp32_dot_8x3VL(const bfloat16 *Apanel, const bfloat16 *
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL.hpp
index 2889dd7f0f..b7fc515341 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL.hpp
@@ -23,7 +23,7 @@
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../bfloat.hpp"
#include "../std_transforms_sve.hpp"
@@ -69,4 +69,4 @@ public:
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL/generic.cpp
index e43404e608..c720942140 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL/generic.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../../bfloat.hpp"
#include "../../asmlib.hpp"
@@ -394,4 +394,4 @@ void sve_interleaved_bf16fp32_mmla_8x3VL(const bfloat16 *Apanel, const bfloat16
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL.hpp
index eb946d9dfa..b797b8bec1 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL.hpp
@@ -23,7 +23,7 @@
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../std_transforms_sve.hpp"
@@ -69,4 +69,4 @@ public:
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL/generic.cpp
index 46b8770409..0f1937acc5 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL/generic.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../../asmlib.hpp"
@@ -316,4 +316,4 @@ void sve_interleaved_fp16_mla_8x3VL(const __fp16 *Apanel, const __fp16 *Bpanel,
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL.hpp
index b84ba83b6a..f4bb809fe8 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL.hpp
@@ -23,7 +23,7 @@
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../std_transforms_sve.hpp"
@@ -69,4 +69,4 @@ public:
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL/generic.cpp
index 1e05a308b5..10feaa130b 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL/generic.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../../asmlib.hpp"
@@ -325,4 +325,4 @@ void sve_interleaved_fp32_mla_8x3VL(const float *Apanel, const float *Bpanel, fl
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL.hpp
index 96216960ff..a355262fe2 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL.hpp
@@ -23,7 +23,7 @@
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../std_transforms_sve.hpp"
@@ -69,4 +69,4 @@ public:
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL/generic.cpp
index 39daf0ff20..a985a91b90 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL/generic.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../../asmlib.hpp"
@@ -394,4 +394,4 @@ void sve_interleaved_fp32_mmla_8x3VL(const float *Apanel, const float *Bpanel, f
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL.hpp
index 3e16915cd4..aa6d9e7ec8 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL.hpp
@@ -23,7 +23,7 @@
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include <cstdint>
#include "../std_transforms_sve.hpp"
@@ -70,4 +70,4 @@ public:
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL/generic.cpp
index 674c2400bf..01c0f8cddc 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL/generic.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include <cstdint>
#include "../../asmlib.hpp"
@@ -326,4 +326,4 @@ void sve_interleaved_s8s32_dot_8x3VL(const int8_t *Apanel, const int8_t *Bpanel,
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL.hpp
index 02b3451c54..671946b262 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL.hpp
@@ -23,7 +23,7 @@
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include <cstdint>
#include "../std_transforms_sve.hpp"
@@ -70,4 +70,4 @@ public:
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL/generic.cpp
index 578aa01732..9420210aae 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL/generic.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include <cstdint>
#include "../../asmlib.hpp"
@@ -394,4 +394,4 @@ void sve_interleaved_s8s32_mmla_8x3VL(const int8_t *Apanel, const int8_t *Bpanel
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL.hpp
index 832a224199..7d39485164 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL.hpp
@@ -23,7 +23,7 @@
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include <cstdint>
#include "../std_transforms_sve.hpp"
@@ -70,4 +70,4 @@ public:
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL/generic.cpp
index 891869c767..2139bab69d 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL/generic.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include <cstdint>
#include "../../asmlib.hpp"
@@ -326,4 +326,4 @@ void sve_interleaved_u8u32_dot_8x3VL(const uint8_t *Apanel, const uint8_t *Bpane
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL.hpp
index 4fdaab84bd..ca9cadd6d7 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL.hpp
@@ -23,7 +23,7 @@
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include <cstdint>
#include "../std_transforms_sve.hpp"
@@ -70,4 +70,4 @@ public:
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL/generic.cpp
index fa08a9d091..d42385789c 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL/generic.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include <cstdint>
#include "../../asmlib.hpp"
@@ -394,4 +394,4 @@ void sve_interleaved_u8u32_mmla_8x3VL(const uint8_t *Apanel, const uint8_t *Bpan
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL.hpp
index 2097d76a54..ab225589e1 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL.hpp
@@ -23,7 +23,7 @@
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
@@ -85,4 +85,4 @@ public:
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL/generic.cpp
index e07cfa8218..cdad98c5f1 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL/generic.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include <algorithm>
@@ -18804,4 +18804,4 @@ void sve_smallK_hybrid_fp32_mla_8x1VL(const float *A, int lda, const float *B, f
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL.hpp
index e50c05ba39..e735567e95 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL.hpp
@@ -23,7 +23,7 @@
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include <cstdint>
@@ -85,4 +85,4 @@ public:
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp
index 98004e98a5..cd01411722 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include <algorithm>
@@ -8968,4 +8968,4 @@ void sve_smallK_hybrid_s8s32_dot_8x1VL(const int8_t *A, int lda, const int8_t *B
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL.hpp
index 60184be043..25dd10019d 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL.hpp
@@ -23,7 +23,7 @@
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include <cstdint>
@@ -85,4 +85,4 @@ public:
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp
index 6a8553216b..99a287b4f5 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include <algorithm>
@@ -8968,4 +8968,4 @@ void sve_smallK_hybrid_u8u32_dot_8x1VL(const uint8_t *A, int lda, const uint8_t
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/common/cpuinfo/target/CpuInfoSveUtils.cpp b/src/core/NEON/kernels/arm_gemm/mergeresults-sve.cpp
index 750a1b01d1..77d86b7dd8 100644
--- a/src/common/cpuinfo/target/CpuInfoSveUtils.cpp
+++ b/src/core/NEON/kernels/arm_gemm/mergeresults-sve.cpp
@@ -21,20 +21,21 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "src/common/cpuinfo/target/CpuInfoSveUtils.h"
-namespace arm_compute
-{
-namespace cpuinfo
-{
-uint64_t get_sve_feature_reg()
-{
- uint64_t reg = 0;
-#if defined(ENABLE_SVE)
- __asm __volatile("MRS %0, ID_AA64ZFR0_EL1"
- : "=r"(reg));
-#endif /* defined(DENABLE_SVE) */
- return reg;
-}
-} // namespace cpuinfo
-} // namespace arm_compute
+/* As some of the merges need these headers, but are all included in the
+ * arm_gemm namespace, put these headers here. */
+#include <algorithm>
+
+#include <arm_neon.h>
+
+#include "arm_gemm.hpp"
+#include "asmlib.hpp"
+#include "utils.hpp"
+
+#include "mergeresults.hpp"
+
+namespace arm_gemm {
+
+#include "merges/list-sve.hpp"
+
+} // namespace arm_gemm \ No newline at end of file
diff --git a/src/core/NEON/kernels/arm_gemm/mergeresults.cpp b/src/core/NEON/kernels/arm_gemm/mergeresults.cpp
index 17566db375..bbfe8f23d9 100644
--- a/src/core/NEON/kernels/arm_gemm/mergeresults.cpp
+++ b/src/core/NEON/kernels/arm_gemm/mergeresults.cpp
@@ -37,9 +37,13 @@ namespace arm_gemm {
template<unsigned int twidth, unsigned int height, bool sve=false, typename Tin, typename Tout>
void MergeResults(Tout * out, const Tin * in, int ldc, int y0, int ymax, int x0, int xmax, const Tout *bias, Activation act, bool append) {
+ // NOTE: The following code is disabled to avoid calling get_vector_length(), so templated MergeResults will not
+ // be correct for SVE cases. This is OK as we have specialisations for all needed SVE cases anyway.
+ //
// For SVE cases, multiply the width up by the vector length.
// Use the *input* type to determine this, since this will be what the kernel operated on.
- const int width = twidth * (sve ? get_vector_length<Tin>() : 1);
+ // const int width = twidth * (sve ? get_vector_length<Tin>() : 1);
+ const int width = twidth;
const int full_y_blocks = (ymax - y0) / height;
const int y_remainder = (ymax - y0) % height;
diff --git a/src/common/cpuinfo/target/CpuInfoSveUtils.h b/src/core/NEON/kernels/arm_gemm/merges/list-sve.hpp
index 73862b131c..aded4b3b8c 100644
--- a/src/common/cpuinfo/target/CpuInfoSveUtils.h
+++ b/src/core/NEON/kernels/arm_gemm/merges/list-sve.hpp
@@ -21,20 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_COMMON_CPUINFO_TARGET_CPUINFO_SVE_UTILS_H
-#define SRC_COMMON_CPUINFO_TARGET_CPUINFO_SVE_UTILS_H
-#include <cstdint>
-
-namespace arm_compute
-{
-namespace cpuinfo
-{
-/** Returns the contents of the SVE feature register (ID_AA64ZFR0_EL1)
- *
- * @return uint64_t The value of the register
- */
-uint64_t get_sve_feature_reg();
-} // namespace cpuinfo
-} // namespace arm_compute
-#endif /* SRC_COMMON_CPUINFO_CPUISAINFO_H */
+#include "sve_merge_fp16_3VLx8.hpp"
+#include "sve_merge_fp32_3VLx8.hpp"
+#include "sve_merge_s32_3VLx8.hpp"
+#include "sve_merge_u32_3VLx8.hpp" \ No newline at end of file
diff --git a/src/core/NEON/kernels/arm_gemm/merges/list.hpp b/src/core/NEON/kernels/arm_gemm/merges/list.hpp
index 825c2fd020..dae874ef94 100644
--- a/src/core/NEON/kernels/arm_gemm/merges/list.hpp
+++ b/src/core/NEON/kernels/arm_gemm/merges/list.hpp
@@ -27,8 +27,4 @@
#include "a64_merge_s32_12x8.hpp"
#include "a64_merge_s32_4x4.hpp"
#include "a64_merge_u32_12x8.hpp"
-#include "a64_merge_u32_4x4.hpp"
-#include "sve_merge_fp16_3VLx8.hpp"
-#include "sve_merge_fp32_3VLx8.hpp"
-#include "sve_merge_s32_3VLx8.hpp"
-#include "sve_merge_u32_3VLx8.hpp"
+#include "a64_merge_u32_4x4.hpp" \ No newline at end of file
diff --git a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp16_3VLx8.hpp b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp16_3VLx8.hpp
index cf1d10329b..4da32b459c 100644
--- a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp16_3VLx8.hpp
+++ b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp16_3VLx8.hpp
@@ -23,7 +23,7 @@
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
template<>
void MergeResults<3, 8, true>(__fp16 *out, const __fp16 *in, const int ldout, const int y0, const int ymax, const int x0, const int xmax, const __fp16 *bias, Activation act, bool append)
@@ -1872,4 +1872,4 @@ void MergeResults<3, 8, true>(__fp16 *out, const __fp16 *in, const int ldout, co
}
}
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp32_3VLx8.hpp b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp32_3VLx8.hpp
index b0d10c085d..5505f1efe4 100644
--- a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp32_3VLx8.hpp
+++ b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp32_3VLx8.hpp
@@ -23,7 +23,7 @@
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
template<>
void MergeResults<3, 8, true>(float *out, const float *in, const int ldout, const int y0, const int ymax, const int x0, const int xmax, const float *bias, Activation act, bool append)
@@ -1872,4 +1872,4 @@ void MergeResults<3, 8, true>(float *out, const float *in, const int ldout, cons
}
}
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_s32_3VLx8.hpp b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_s32_3VLx8.hpp
index 34b6fe3ef5..c009881254 100644
--- a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_s32_3VLx8.hpp
+++ b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_s32_3VLx8.hpp
@@ -23,7 +23,7 @@
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
template<>
void MergeResults<3, 8, true>(int32_t *out, const int32_t *in, const int ldout, const int y0, const int ymax, const int x0, const int xmax, const int32_t *bias, Activation , bool append)
@@ -1394,4 +1394,4 @@ void MergeResults<3, 8, true>(int32_t *out, const int32_t *in, const int ldout,
}
}
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_u32_3VLx8.hpp b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_u32_3VLx8.hpp
index c4b2bb56d6..e992f6722c 100644
--- a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_u32_3VLx8.hpp
+++ b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_u32_3VLx8.hpp
@@ -23,7 +23,7 @@
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
template<>
void MergeResults<3, 8, true>(uint32_t *out, const uint32_t *in, const int ldout, const int y0, const int ymax, const int x0, const int xmax, const uint32_t *bias, Activation , bool append)
@@ -1394,4 +1394,4 @@ void MergeResults<3, 8, true>(uint32_t *out, const uint32_t *in, const int ldout
}
}
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/utils.hpp b/src/core/NEON/kernels/arm_gemm/utils.hpp
index 1269ef62a6..e648ce2fb5 100644
--- a/src/core/NEON/kernels/arm_gemm/utils.hpp
+++ b/src/core/NEON/kernels/arm_gemm/utils.hpp
@@ -141,52 +141,36 @@ struct IndirectInputArg {
};
namespace utils {
-namespace {
-
-#ifdef __ARM_FEATURE_SVE
-template<size_t sz>
-inline unsigned long get_vector_length_sz() {
- unsigned long v;
-
- __asm (
- "cntb %0"
- : "=r" (v)
- );
-
- return v / sz;
-}
-
-#define VEC_LEN_SPEC(sz, opcode) template <> inline unsigned long get_vector_length_sz<sz>() { unsigned long v; __asm ( opcode " %0" : "=r" (v)); return v; }
-
-VEC_LEN_SPEC(8, "cntd")
-VEC_LEN_SPEC(4, "cntw")
-VEC_LEN_SPEC(2, "cnth")
-VEC_LEN_SPEC(1, "cntb")
-#endif
-
-} // anonymous namespace
-
template <typename T>
inline unsigned long get_vector_length() {
-#ifdef __ARM_FEATURE_SVE
- return get_vector_length_sz<sizeof(T)>();
-#else
+#if defined(ARM_COMPUTE_ENABLE_SVE)
+ uint64_t vl;
+
+ __asm __volatile (
+ ".inst 0x0420e3e0\n" // CNTB X0, ALL, MUL #1
+ "mov %0, X0\n"
+ : "=r" (vl)
+ :
+ : "x0"
+ );
+
+ return vl / sizeof(T);
+#else // !defined(ARM_COMPUTE_ENABLE_SVE)
return 16 / sizeof(T);
-#endif
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
}
template <typename T>
inline unsigned long get_vector_length(VLType vl_type) {
switch (vl_type) {
-#ifdef __ARM_FEATURE_SVE
+#if defined(ARM_COMPUTE_ENABLE_SVE)
case VLType::SVE:
- return get_vector_length_sz<sizeof(T)>();
-#endif
+ return get_vector_length<T>();
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
default:
return 16 / sizeof(T);
}
}
-
} // utils namespace
} // arm_gemm namespace
diff --git a/src/core/NEON/kernels/batchnormalization/impl/SVE/fp16.cpp b/src/core/NEON/kernels/batchnormalization/impl/SVE/fp16.cpp
index a715b9d3ee..c7cfd7457d 100644
--- a/src/core/NEON/kernels/batchnormalization/impl/SVE/fp16.cpp
+++ b/src/core/NEON/kernels/batchnormalization/impl/SVE/fp16.cpp
@@ -29,7 +29,7 @@
#include <cmath>
#include <cstddef>
-#if defined(ENABLE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
#include <arm_sve.h>
namespace arm_compute
diff --git a/src/core/NEON/kernels/batchnormalization/impl/SVE/fp32.cpp b/src/core/NEON/kernels/batchnormalization/impl/SVE/fp32.cpp
index 7cc570d8aa..b8a540158b 100644
--- a/src/core/NEON/kernels/batchnormalization/impl/SVE/fp32.cpp
+++ b/src/core/NEON/kernels/batchnormalization/impl/SVE/fp32.cpp
@@ -29,7 +29,7 @@
#include <cmath>
#include <cstddef>
-#if defined(ENABLE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
#include <arm_sve.h>
namespace arm_compute
diff --git a/src/core/NEON/wrapper/svtraits.h b/src/core/NEON/wrapper/svtraits.h
index 8d2d660659..1d599a246c 100644
--- a/src/core/NEON/wrapper/svtraits.h
+++ b/src/core/NEON/wrapper/svtraits.h
@@ -23,7 +23,7 @@
*/
#ifndef SRC_CORE_NEON_WRAPPER_SVTRAITS_H
#define SRC_CORE_NEON_WRAPPER_SVTRAITS_H
-#if defined(ENABLE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
#include "src/core/NEON/SVEMath.h"
#include <arm_sve.h>
@@ -66,5 +66,5 @@ DEFINE_TYPES(bfloat16_t)
} // namespace wrapper
} // namespace arm_compute
-#endif /* defined(ENABLE_SVE) */
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
#endif /* #ifndef SRC_CORE_NEON_WRAPPER_SVTRAITS_H */
diff --git a/src/core/NEON/wrapper/traits.h b/src/core/NEON/wrapper/traits.h
index 81685140f1..ebb64d9d76 100644
--- a/src/core/NEON/wrapper/traits.h
+++ b/src/core/NEON/wrapper/traits.h
@@ -26,9 +26,9 @@
#include <arm_neon.h>
-#if defined(ENABLE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
#include <arm_sve.h>
-#endif /* defined(ENABLE_SVE) */
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
namespace arm_compute
{
@@ -116,13 +116,13 @@ template <> struct neon_bitvector<float16_t, BitWidth::W128>{ using type = float
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-#if defined(ENABLE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
/** Create the appropriate SVE vector given its type */
template <typename T> struct sve_vector;
template <> struct sve_vector<uint8_t>{ using scalar_type = uint8_t; using type = svuint8_t; };
template <> struct sve_vector<int8_t>{ using scalar_type = int8_t; using type = svint8_t; };
-#endif /* defined(ENABLE_SVE) */
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
#endif /* DOXYGEN_SKIP_THIS */
diff --git a/src/core/common/Registrars.h b/src/core/common/Registrars.h
index 44ddf9808d..65f6c7093d 100644
--- a/src/core/common/Registrars.h
+++ b/src/core/common/Registrars.h
@@ -26,17 +26,17 @@
#if defined(ENABLE_FP16_KERNELS)
-#if defined(ENABLE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
#define REGISTER_FP16_SVE(func_name) &(func_name)
-#else /* !defined(ENABLE_SVE) */
+#else /* !defined(ARM_COMPUTE_ENABLE_SVE) */
#define REGISTER_FP16_SVE(func_name) nullptr
-#endif /* defined(ENABLE_SVE) */
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
-#if defined(ENABLE_NEON) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
+#if defined(ARM_COMPUTE_ENABLE_NEON) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
#define REGISTER_FP16_NEON(func_name) &(func_name)
-#else /* !defined(ENABLE_NEON) */
+#else /* !defined(ARM_COMPUTE_ENABLE_NEON) */
#define REGISTER_FP16_NEON(func_name) nullptr
-#endif /* defined(ENABLE_NEON) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */
+#endif /* defined(ARM_COMPUTE_ENABLE_NEON) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */
#else /* !defined(ENABLE_FP16_KERNELS) */
#define REGISTER_FP16_NEON(func_name) nullptr
@@ -45,17 +45,17 @@
#if defined(ENABLE_FP32_KERNELS)
-#if defined(ENABLE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
#define REGISTER_FP32_SVE(func_name) &(func_name)
-#else /* !defined(ENABLE_SVE) */
+#else /* !defined(ARM_COMPUTE_ENABLE_SVE) */
#define REGISTER_FP32_SVE(func_name) nullptr
-#endif /* defined(ENABLE_SVE) */
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
-#if defined(ENABLE_NEON)
+#if defined(ARM_COMPUTE_ENABLE_NEON)
#define REGISTER_FP32_NEON(func_name) &(func_name)
-#else /* !defined(ENABLE_NEON) */
+#else /* !defined(ARM_COMPUTE_ENABLE_NEON) */
#define REGISTER_FP32_NEON(func_name) nullptr
-#endif /* defined(ENABLE_NEON) */
+#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */
#else /* defined(ENABLE_FP32_KERNELS) */
#define REGISTER_FP32_NEON(func_name) nullptr
@@ -66,11 +66,11 @@
#define REGISTER_QASYMM8_SIGNED_NEON(func_name) &(func_name)
-#if defined(ENABLE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
#define REGISTER_QASYMM8_SIGNED_SVE(func_name) &(func_name)
-#else /* !defined(ENABLE_SVE) */
+#else /* !defined(ARM_COMPUTE_ENABLE_SVE) */
#define REGISTER_QASYMM8_SIGNED_SVE(func_name) nullptr
-#endif /* defined(ENABLE_SVE) */
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
#else /* defined(ENABLE_QASYMM8_SIGNED_KERNELS) */
#define REGISTER_QASYMM8_SIGNED_NEON(func_name) nullptr
@@ -80,11 +80,11 @@
#if defined(ENABLE_QASYMM8_KERNELS)
#define REGISTER_QASYMM8_NEON(func_name) &(func_name)
-#if defined(ENABLE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
#define REGISTER_QASYMM8_SVE(func_name) &(func_name)
-#else /* !defined(ENABLE_SVE) */
+#else /* !defined(ARM_COMPUTE_ENABLE_SVE) */
#define REGISTER_QASYMM8_SVE(func_name) nullptr
-#endif /* defined(ENABLE_SVE) */
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
#else /* defined(ENABLE_QASYMM8_KERNELS) */
#define REGISTER_QASYMM8_NEON(func_name) nullptr
@@ -95,11 +95,11 @@
#define REGISTER_QSYMM16_NEON(func_name) &(func_name)
-#if defined(ENABLE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
#define REGISTER_QSYMM16_SVE(func_name) &(func_name)
-#else /* !defined(ENABLE_SVE) */
+#else /* !defined(ARM_COMPUTE_ENABLE_SVE) */
#define REGISTER_QSYMM16_SVE(func_name) nullptr
-#endif /* defined(ENABLE_SVE) */
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
#else /* defined(ENABLE_QSYMM16_KERNELS) */
#define REGISTER_QSYMM16_NEON(func_name) nullptr
@@ -108,17 +108,17 @@
#if defined(ENABLE_INTEGER_KERNELS)
-#if defined(ENABLE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
#define REGISTER_INTEGER_SVE(func_name) &(func_name)
-#else /* !defined(ENABLE_SVE) */
+#else /* !defined(ARM_COMPUTE_ENABLE_SVE) */
#define REGISTER_INTEGER_SVE(func_name) nullptr
-#endif /* defined(ENABLE_SVE) */
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
-#if defined(ENABLE_NEON)
+#if defined(ARM_COMPUTE_ENABLE_NEON)
#define REGISTER_INTEGER_NEON(func_name) &(func_name)
-#else /* !defined(ENABLE_NEON) */
+#else /* !defined(ARM_COMPUTE_ENABLE_NEON) */
#define REGISTER_INTEGER_NEON(func_name) nullptr
-#endif /* defined(ENABLE_NEON) */
+#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */
#else /* defined(ENABLE_INTEGER_KERNELS) */
#define REGISTER_INTEGER_NEON(func_name) nullptr
diff --git a/src/core/cpu/kernels/CpuActivationKernel.cpp b/src/core/cpu/kernels/CpuActivationKernel.cpp
index 8a57a3b529..24642f1efb 100644
--- a/src/core/cpu/kernels/CpuActivationKernel.cpp
+++ b/src/core/cpu/kernels/CpuActivationKernel.cpp
@@ -45,7 +45,8 @@ namespace
{
struct ActivationSelectorData
{
- DataType dt;
+ DataType dt;
+ const CPUInfo &ci;
};
using ActivationSelectorPtr = std::add_pointer<bool(const ActivationSelectorData &data)>::type;
@@ -60,19 +61,19 @@ struct ActivationKernel
static const ActivationKernel available_kernels[] =
{
-#if defined(ENABLE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
{
"fp16_sve_activation",
- [](const ActivationSelectorData & data) { return data.dt == DataType::F16; },
+ [](const ActivationSelectorData & data) { return data.dt == DataType::F16 && data.ci.has_sve(); },
REGISTER_FP16_SVE(arm_compute::cpu::fp16_sve_activation)
},
{
"fp32_sve_activation",
- [](const ActivationSelectorData & data) { return data.dt == DataType::F32; },
+ [](const ActivationSelectorData & data) { return data.dt == DataType::F32 && data.ci.has_sve(); },
REGISTER_FP32_SVE(arm_compute::cpu::fp32_sve_activation)
},
-#endif /* defined(ENABLE_SVE) */
-#if defined(ENABLE_NEON)
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
+#if defined(ARM_COMPUTE_ENABLE_NEON)
{
"fp16_neon_activation",
[](const ActivationSelectorData & data) { return data.dt == DataType::F16; },
@@ -83,24 +84,24 @@ static const ActivationKernel available_kernels[] =
[](const ActivationSelectorData & data) { return data.dt == DataType::F32; },
REGISTER_FP32_NEON(arm_compute::cpu::fp32_neon_activation)
},
-#endif /* defined(ENABLE_NEON) */
-#if defined(__ARM_FEATURE_SVE2)
+#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
{
"qasymm8_sve_activation",
- [](const ActivationSelectorData & data) { return data.dt == DataType::QASYMM8; },
+ [](const ActivationSelectorData & data) { return data.dt == DataType::QASYMM8 && data.ci.has_sve2(); },
REGISTER_QASYMM8_SVE(arm_compute::cpu::qasymm8_sve_activation)
},
{
"qasymm8_signed_sve_activation",
- [](const ActivationSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; },
+ [](const ActivationSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED && data.ci.has_sve2(); },
REGISTER_QASYMM8_SIGNED_SVE(arm_compute::cpu::qasymm8_signed_sve_activation)
},
{
"qsymm16_sve_activation",
- [](const ActivationSelectorData & data) { return data.dt == DataType::QSYMM16; },
+ [](const ActivationSelectorData & data) { return data.dt == DataType::QSYMM16 && data.ci.has_sve2(); },
REGISTER_QSYMM16_SVE(arm_compute::cpu::qsymm16_sve_activation)
},
-#else /* !defined(__ARM_FEATURE_SVE2) */
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
{
"qasymm8_neon_activation",
[](const ActivationSelectorData & data) { return data.dt == DataType::QASYMM8; },
@@ -116,7 +117,6 @@ static const ActivationKernel available_kernels[] =
[](const ActivationSelectorData & data) { return data.dt == DataType::QSYMM16; },
REGISTER_QSYMM16_NEON(arm_compute::cpu::qsymm16_neon_activation)
},
-#endif /* defined(__ARM_FEATURE_SVE2) */
};
const ActivationKernel *get_implementation(const ActivationSelectorData &data)
@@ -155,7 +155,7 @@ Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst, const
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::QSYMM16, DataType::F16, DataType::F32);
- const auto *uk = get_implementation(ActivationSelectorData{ src->data_type() });
+ const auto *uk = get_implementation(ActivationSelectorData{ src->data_type(), CPUInfo::get() });
ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
const DataType data_type = src->data_type();
@@ -243,7 +243,7 @@ void CpuActivationKernel::run_op(ITensorPack &tensors, const Window &window, con
const ITensor *src = tensors.get_const_tensor(TensorType::ACL_SRC);
ITensor *dst = tensors.get_tensor(TensorType::ACL_DST);
- const auto *uk = get_implementation(ActivationSelectorData{ src->info()->data_type() });
+ const auto *uk = get_implementation(ActivationSelectorData{ src->info()->data_type(), CPUInfo::get() });
uk->ukernel(src, dst, _act_info, window);
}
diff --git a/src/core/cpu/kernels/CpuAddKernel.cpp b/src/core/cpu/kernels/CpuAddKernel.cpp
index 7afdceae38..8d74b4027b 100644
--- a/src/core/cpu/kernels/CpuAddKernel.cpp
+++ b/src/core/cpu/kernels/CpuAddKernel.cpp
@@ -45,9 +45,15 @@ namespace
{
struct AddSelectorData
{
- DataType dt1;
- DataType dt2;
- DataType dt3;
+ /* Data types for all ITensorInfos:
+ dt1 -> src0
+ dt2 -> src1
+ dt3 -> dst
+ */
+ DataType dt1;
+ DataType dt2;
+ DataType dt3;
+ const CPUInfo &ci;
};
using AddSelectorPtr = std::add_pointer<bool(const AddSelectorData &data)>::type;
@@ -61,49 +67,99 @@ struct AddKernel
static const AddKernel available_kernels[] =
{
-#if defined(ENABLE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
+ {
+ "add_qasymm8_sve",
+ [](const AddSelectorData & data)
+ {
+ return ((data.dt1 == data.dt2) && (data.dt1 == DataType::QASYMM8)) && data.ci.has_sve();
+ },
+ REGISTER_QASYMM8_SVE(arm_compute::cpu::add_qasymm8_sve)
+ },
+ {
+ "add_qasymm8_signed_sve",
+ [](const AddSelectorData & data)
+ {
+ return ((data.dt1 == data.dt2) && (data.dt1 == DataType::QASYMM8_SIGNED)) && data.ci.has_sve();
+ },
+ REGISTER_QASYMM8_SIGNED_SVE(arm_compute::cpu::add_qasymm8_signed_sve)
+ },
+ {
+ "add_qsymm16_sve",
+ [](const AddSelectorData & data)
+ {
+ return ((data.dt1 == data.dt2) && (data.dt1 == DataType::QSYMM16)) && data.ci.has_sve();
+ },
+ REGISTER_QSYMM16_SVE(arm_compute::cpu::add_qsymm16_sve)
+ },
+#endif /* !defined(ARM_COMPUTE_ENABLE_SVE2) */
+#if defined(ARM_COMPUTE_ENABLE_SVE)
{
"add_same_sve",
- [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == DataType::F32)); },
+ [](const AddSelectorData & data)
+ {
+ return ((data.dt1 == data.dt2) && (data.dt1 == DataType::F32)) && data.ci.has_sve();
+ },
REGISTER_FP32_SVE(arm_compute::cpu::add_same_sve<float>)
},
{
"add_same_sve",
- [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == DataType::F16)); },
+ [](const AddSelectorData & data)
+ {
+ return ((data.dt1 == data.dt2) && (data.dt1 == DataType::F16)) && data.ci.has_sve();
+ },
REGISTER_FP16_SVE(arm_compute::cpu::add_same_sve<float16_t>)
},
{
"add_same_sve",
- [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == data.dt3) && (data.dt1 == DataType::U8)); },
+ [](const AddSelectorData & data)
+ {
+ return ((data.dt1 == data.dt2) && (data.dt1 == data.dt3) && (data.dt1 == DataType::U8)) && data.ci.has_sve();
+ },
REGISTER_INTEGER_SVE(arm_compute::cpu::add_same_sve<uint8_t>)
},
{
"add_same_sve",
- [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == data.dt3) && (data.dt1 == DataType::S16)); },
+ [](const AddSelectorData & data)
+ {
+ return ((data.dt1 == data.dt2) && (data.dt1 == data.dt3) && (data.dt1 == DataType::S16)) && data.ci.has_sve();
+ },
REGISTER_INTEGER_SVE(arm_compute::cpu::add_same_sve<int16_t>)
},
{
"add_same_sve",
- [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == data.dt3) && (data.dt1 == DataType::S32)); },
+ [](const AddSelectorData & data)
+ {
+ return ((data.dt1 == data.dt2) && (data.dt1 == data.dt3) && (data.dt1 == DataType::S32)) && data.ci.has_sve();
+ },
REGISTER_INTEGER_SVE(arm_compute::cpu::add_same_sve<int32_t>)
},
{
"add_u8_s16_s16_sve",
- [](const AddSelectorData & data) { return ((data.dt1 == DataType::U8) && (data.dt2 == DataType::S16)); },
+ [](const AddSelectorData & data)
+ {
+ return ((data.dt1 == DataType::U8) && (data.dt2 == DataType::S16)) && data.ci.has_sve();
+ },
REGISTER_INTEGER_SVE(arm_compute::cpu::add_u8_s16_s16_sve)
},
{
"add_s16_u8_s16_sve",
- [](const AddSelectorData & data) { return ((data.dt1 == DataType::S16) && (data.dt2 == DataType::U8)); },
+ [](const AddSelectorData & data)
+ {
+ return ((data.dt1 == DataType::S16) && (data.dt2 == DataType::U8)) && data.ci.has_sve();
+ },
REGISTER_INTEGER_SVE(arm_compute::cpu::add_s16_u8_s16_sve)
},
{
"add_u8_u8_s16_sve",
- [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt3 == DataType::S16)); },
+ [](const AddSelectorData & data)
+ {
+ return ((data.dt1 == data.dt2) && (data.dt3 == DataType::S16)) && data.ci.has_sve();
+ },
REGISTER_INTEGER_SVE(arm_compute::cpu::add_u8_u8_s16_sve)
},
-#endif /* defined(ENABLE_SVE) */
-#if defined(ENABLE_NEON)
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
+#if defined(ARM_COMPUTE_ENABLE_NEON)
{
"add_same_neon",
[](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == DataType::F32)); },
@@ -112,7 +168,10 @@ static const AddKernel available_kernels[] =
#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
{
"add_same_neon",
- [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == DataType::F16)); },
+ [](const AddSelectorData & data)
+ {
+ return ((data.dt1 == data.dt2) && (data.dt1 == DataType::F16)) && data.ci.has_fp16();
+ },
REGISTER_FP16_NEON(arm_compute::cpu::add_same_neon<float16_t>)
},
#endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */
@@ -146,24 +205,8 @@ static const AddKernel available_kernels[] =
[](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt3 == DataType::S16)); },
REGISTER_INTEGER_NEON(arm_compute::cpu::add_u8_u8_s16_neon)
},
-#endif /* defined(ENABLE_NEON) */
-#if defined(__ARM_FEATURE_SVE2)
- {
- "add_qasymm8_sve",
- [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == DataType::QASYMM8)); },
- REGISTER_QASYMM8_SVE(arm_compute::cpu::add_qasymm8_sve)
- },
- {
- "add_qasymm8_signed_sve",
- [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == DataType::QASYMM8_SIGNED)); },
- REGISTER_QASYMM8_SIGNED_SVE(arm_compute::cpu::add_qasymm8_signed_sve)
- },
- {
- "add_qsymm16_sve",
- [](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == DataType::QSYMM16)); },
- REGISTER_QSYMM16_SVE(arm_compute::cpu::add_qsymm16_sve)
- },
-#else /* !defined(__ARM_FEATURE_SVE2) */
+#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */
+#if defined(ARM_COMPUTE_ENABLE_NEON) || defined(ARM_COMPUTE_ENABLE_SVE)
{
"add_qasymm8_neon",
[](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == DataType::QASYMM8)); },
@@ -179,8 +222,7 @@ static const AddKernel available_kernels[] =
[](const AddSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == DataType::QSYMM16)); },
REGISTER_QSYMM16_NEON(arm_compute::cpu::add_qsymm16_neon)
},
-#endif /* defined(ENABLE_NEON) */
-
+#endif /* defined(ARM_COMPUTE_ENABLE_NEON) || defined(ARM_COMPUTE_ENABLE_SVE) */
};
/** Micro-kernel selector
@@ -189,11 +231,11 @@ static const AddKernel available_kernels[] =
*
* @return A matching micro-kernel else nullptr
*/
-const AddKernel *get_implementation(DataType dt1, DataType dt2, DataType dt3)
+const AddKernel *get_implementation(const CPUInfo &cpuinfo, DataType dt1, DataType dt2, DataType dt3)
{
for(const auto &uk : available_kernels)
{
- if(uk.is_selected({ dt1, dt2, dt3 }))
+ if(uk.is_selected({ dt1, dt2, dt3, cpuinfo }))
{
return &uk;
}
@@ -241,7 +283,7 @@ Status validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, cons
"Wrong shape for dst");
}
- const auto *uk = get_implementation(src0.data_type(), src1.data_type(), dst.data_type());
+ const auto *uk = get_implementation(CPUInfo::get(), src0.data_type(), src1.data_type(), dst.data_type());
ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
return Status{};
@@ -327,7 +369,7 @@ void CpuAddKernel::run_op(ITensorPack &tensors, const Window &window, const Thre
const ITensor *src1 = tensors.get_const_tensor(TensorType::ACL_SRC_1);
ITensor *dst = tensors.get_tensor(TensorType::ACL_DST);
- const auto *uk = get_implementation(src0->info()->data_type(), src1->info()->data_type(), dst->info()->data_type());
+ const auto *uk = get_implementation(CPUInfo::get(), src0->info()->data_type(), src1->info()->data_type(), dst->info()->data_type());
ARM_COMPUTE_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
uk->ukernel(src0, src1, dst, _policy, window);
diff --git a/src/core/cpu/kernels/CpuElementwiseKernel.cpp b/src/core/cpu/kernels/CpuElementwiseKernel.cpp
index 643a870540..dc0c5b210d 100644
--- a/src/core/cpu/kernels/CpuElementwiseKernel.cpp
+++ b/src/core/cpu/kernels/CpuElementwiseKernel.cpp
@@ -43,7 +43,13 @@ namespace kernels
{
namespace
{
-using ElementwiseSelector = std::add_pointer<bool(DataType)>::type;
+struct ElementwiseSelectorData
+{
+ DataType dt;
+ const CPUInfo &ci;
+};
+
+using ElementwiseSelector = std::add_pointer<bool(const ElementwiseSelectorData &)>::type;
using UKernelType = CpuElementwiseKernel::ElementwiseFunction;
struct ElementwiseKernel
{
@@ -52,23 +58,6 @@ struct ElementwiseKernel
UKernelType *ukernel;
};
-template <DataType dt>
-inline bool is_selected(DataType data_type)
-{
- return dt == data_type;
-}
-
-template <DataType input_data_type, DataType output_data_type = input_data_type>
-static ElementwiseKernel generate_kernel(UKernelType *ukernel)
-{
- std::string kernel_name("op_");
- kernel_name += string_from_data_type(input_data_type) + "_";
- kernel_name += string_from_data_type(input_data_type) + "_";
- kernel_name += string_from_data_type(output_data_type);
-
- return { kernel_name.c_str(), is_selected<input_data_type>, ukernel };
-}
-
template <ArithmeticOperation op>
std::function<void(const ITensor *, const ITensor *, ITensor *, const Window &)>
configure_arithm_func(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst)
@@ -76,36 +65,85 @@ configure_arithm_func(const ITensorInfo *src0, const ITensorInfo *src1, ITensorI
ARM_COMPUTE_UNUSED(src1, dst);
static ElementwiseKernel kernels[] =
{
-#if defined(ENABLE_SVE)
- generate_kernel<DataType::F32>(REGISTER_FP32_SVE((arm_compute::cpu::elementwise_arithmetic_op<op, float32_t>))),
- generate_kernel<DataType::S32>(REGISTER_INTEGER_SVE((arm_compute::cpu::elementwise_arithmetic_op<op, int32_t>))),
- generate_kernel<DataType::S16>(REGISTER_INTEGER_SVE((arm_compute::cpu::elementwise_arithmetic_op<op, int16_t>))),
-#endif /* defined(ENABLE_SVE) */
-#if defined(ENABLE_NEON)
- generate_kernel<DataType::F32>(REGISTER_FP32_NEON((arm_compute::cpu::elementwise_arithm_op<op, typename wrapper::traits::neon_vector<float, 4>>))),
- generate_kernel<DataType::S32>(REGISTER_INTEGER_NEON((arm_compute::cpu::elementwise_arithm_op<op, typename wrapper::traits::neon_vector<int32_t, 4>>))),
-#endif /* defined(ENABLE_NEON) */
-#if defined(__ARM_FEATURE_SVE2)
- generate_kernel<DataType::QASYMM8>(REGISTER_QASYMM8_SVE((arm_compute::cpu::elementwise_arithmetic_quantized_op<op, uint8_t>))),
- generate_kernel<DataType::QASYMM8_SIGNED>(REGISTER_QASYMM8_SIGNED_SVE((arm_compute::cpu::elementwise_arithmetic_quantized_op<op, int8_t>))),
-#else /* !defined(__ARM_FEATURE_SVE2) */
- generate_kernel<DataType::QASYMM8>(REGISTER_QASYMM8_NEON((arm_compute::cpu::elementwise_arithm_op_quantized<op>))),
- generate_kernel<DataType::QASYMM8_SIGNED>(REGISTER_QASYMM8_SIGNED_NEON((arm_compute::cpu::elementwise_arithm_op_quantized_signed<op>))),
-#endif /* defined(__ARM_FEATURE_SVE2) */
-#if defined(ENABLE_SVE)
- generate_kernel<DataType::F16>(REGISTER_FP16_SVE((arm_compute::cpu::elementwise_arithmetic_op<op, float16_t>))),
-#endif /* defined(ENABLE_SVE) */
-#if defined(ENABLE_NEON)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
+ {
+ "sve_elementwise_fp32",
+ [](const ElementwiseSelectorData & data) { return data.dt == DataType::F32 && data.ci.has_sve(); },
+ REGISTER_FP32_SVE((arm_compute::cpu::elementwise_arithmetic_op<op, float32_t>))
+ },
+ {
+ "sve_elementwise_s32",
+ [](const ElementwiseSelectorData & data) { return data.dt == DataType::S32 && data.ci.has_sve(); },
+ REGISTER_INTEGER_SVE((arm_compute::cpu::elementwise_arithmetic_op<op, int32_t>))
+ },
+ {
+ "sve_elementwise_s16",
+ [](const ElementwiseSelectorData & data) { return data.dt == DataType::S16 && data.ci.has_sve(); },
+ REGISTER_INTEGER_SVE((arm_compute::cpu::elementwise_arithmetic_op<op, int16_t>))
+ },
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
+#if defined(ARM_COMPUTE_ENABLE_NEON)
+ {
+ "neon_elementwise_f32",
+ [](const ElementwiseSelectorData & data) { return data.dt == DataType::F32; },
+ REGISTER_FP32_NEON((arm_compute::cpu::elementwise_arithm_op<op, typename wrapper::traits::neon_vector<float, 4>>))
+ },
+ {
+ "neon_elementwise_s32",
+ [](const ElementwiseSelectorData & data) { return data.dt == DataType::S32; },
+ REGISTER_INTEGER_NEON((arm_compute::cpu::elementwise_arithm_op<op, typename wrapper::traits::neon_vector<int32_t, 4>>))
+ },
+#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
+ {
+ "sve2_elementwise_qu8",
+ [](const ElementwiseSelectorData & data) { return data.dt == DataType::QASYMM8 && data.ci.has_sve2(); },
+ REGISTER_QASYMM8_SVE((arm_compute::cpu::elementwise_arithmetic_quantized_op<op, uint8_t>))
+ },
+ {
+ "sve2_elementwise_qs8",
+ [](const ElementwiseSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED && data.ci.has_sve2(); },
+ REGISTER_QASYMM8_SIGNED_SVE((arm_compute::cpu::elementwise_arithmetic_quantized_op<op, int8_t>))
+ },
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
+#if defined(ARM_COMPUTE_ENABLE_NEON) || defined(ARM_COMPUTE_ENABLE_SVE)
+ {
+ "neon_elementwise_qu8",
+ [](const ElementwiseSelectorData & data) { return data.dt == DataType::QASYMM8; },
+ REGISTER_QASYMM8_NEON((arm_compute::cpu::elementwise_arithm_op_quantized<op>))
+ },
+ {
+ "neon_elementwise_qs8",
+ [](const ElementwiseSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; },
+ REGISTER_QASYMM8_SIGNED_NEON((arm_compute::cpu::elementwise_arithm_op_quantized_signed<op>))
+ },
+#endif /* defined(ARM_COMPUTE_ENABLE_NEON) || defined(ARM_COMPUTE_ENABLE_SVE) */
+#if defined(ARM_COMPUTE_ENABLE_SVE)
+ {
+ "sve_elementwise_f16",
+ [](const ElementwiseSelectorData & data) { return data.dt == DataType::F16 && data.ci.has_sve(); },
+ REGISTER_FP16_SVE((arm_compute::cpu::elementwise_arithmetic_op<op, float16_t>))
+ },
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
+#if defined(ARM_COMPUTE_ENABLE_NEON)
#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
- generate_kernel<DataType::F16>(REGISTER_FP16_NEON((arm_compute::cpu::elementwise_arithm_op<op, typename wrapper::traits::neon_vector<float16_t, 8>>))),
+ {
+ "neon_elementwise_f16",
+ [](const ElementwiseSelectorData & data) { return data.dt == DataType::F16 && data.ci.has_fp16(); },
+ REGISTER_FP16_NEON((arm_compute::cpu::elementwise_arithm_op<op, typename wrapper::traits::neon_vector<float16_t, 8>>))
+ },
#endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */
- generate_kernel<DataType::S16>(REGISTER_INTEGER_NEON((arm_compute::cpu::elementwise_arithm_op<op, typename wrapper::traits::neon_vector<int16_t, 8>>))),
-#endif /* defined(ENABLE_NEON) */
+ {
+ "neon_elementwise_s16",
+ [](const ElementwiseSelectorData & data) { return data.dt == DataType::S16; },
+ REGISTER_INTEGER_NEON((arm_compute::cpu::elementwise_arithm_op<op, typename wrapper::traits::neon_vector<int16_t, 8>>))
+ },
+#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */
};
for(const auto &uk : kernels)
{
- if(uk.is_selected(src0->data_type()))
+ if(uk.is_selected({ src0->data_type(), CPUInfo::get() }))
{
return uk.ukernel;
}
@@ -121,36 +159,93 @@ configure_comp_func(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInf
ARM_COMPUTE_UNUSED(src1, dst);
static ElementwiseKernel kernels[] =
{
-#if defined(ENABLE_SVE)
- generate_kernel<DataType::U8, DataType::U8>(REGISTER_INTEGER_SVE((arm_compute::cpu::elementwise_comparison_op<op, uint8_t>))),
- generate_kernel<DataType::F32, DataType::U8>(REGISTER_FP32_SVE((arm_compute::cpu::elementwise_comparison_op<op, float>))),
- generate_kernel<DataType::S16, DataType::U8>(REGISTER_INTEGER_SVE((arm_compute::cpu::elementwise_comparison_op<op, int16_t>))),
- generate_kernel<DataType::S32, DataType::U8>(REGISTER_INTEGER_SVE((arm_compute::cpu::elementwise_comparison_op<op, int32_t>))),
-#endif /* defined(ENABLE_SVE) */
-#if defined(ENABLE_NEON)
- generate_kernel<DataType::U8, DataType::U8>(REGISTER_INTEGER_NEON((arm_compute::cpu::elementwise_comp_op_8<op, uint8_t, uint8x16_t>))),
- generate_kernel<DataType::F32, DataType::U8>(REGISTER_FP32_NEON((arm_compute::cpu::elementwise_comp_op_32<op, float, float32x4_t>))),
- generate_kernel<DataType::S16, DataType::U8>(REGISTER_INTEGER_NEON((arm_compute::cpu::elementwise_comp_op_16<op, int16_t, int16x8_t>))),
- generate_kernel<DataType::S32, DataType::U8>(REGISTER_INTEGER_NEON((arm_compute::cpu::elementwise_comp_op_32<op, int32_t, int32x4_t>))),
-#endif /* defined(ENABLE_NEON) */
-#if defined(__ARM_FEATURE_SVE2)
- generate_kernel<DataType::QASYMM8_SIGNED, DataType::U8>(REGISTER_QASYMM8_SIGNED_SVE((arm_compute::cpu::elementwise_comparison_quantized_op<op, int8_t>))),
- generate_kernel<DataType::QASYMM8, DataType::U8>(REGISTER_QASYMM8_SVE((arm_compute::cpu::elementwise_comparison_quantized_op<op, uint8_t>))),
-#else /* !defined(__ARM_FEATURE_SVE2) */
- generate_kernel<DataType::QASYMM8_SIGNED, DataType::U8>(REGISTER_QASYMM8_SIGNED_NEON((arm_compute::cpu::elementwise_comp_op_quantized_signed<op>))),
- generate_kernel<DataType::QASYMM8, DataType::U8>(REGISTER_QASYMM8_NEON((arm_compute::cpu::elementwise_comp_op_quantized<op>))),
-#endif /* defined(__ARM_FEATURE_SVE2) */
-#if defined(ENABLE_SVE)
- generate_kernel<DataType::F16, DataType::U8>(REGISTER_FP16_SVE((arm_compute::cpu::elementwise_comparison_op<op, float16_t>))),
-#endif /* defined(ENABLE_SVE) */
-#if defined(ENABLE_NEON) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
- generate_kernel<DataType::F16, DataType::U8>(REGISTER_FP16_NEON((arm_compute::cpu::elementwise_comp_op_16<op, float16_t, float16x8_t>))),
-#endif /* defined(ENABLE_NEON) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */
+#if defined(ARM_COMPUTE_ENABLE_SVE)
+ {
+ "sve_comparison_u8",
+ [](const ElementwiseSelectorData & data) { return data.dt == DataType::U8 && data.ci.has_sve(); },
+ REGISTER_INTEGER_SVE((arm_compute::cpu::elementwise_comparison_op<op, uint8_t>))
+ },
+ {
+ "sve_comparison_f32",
+ [](const ElementwiseSelectorData & data) { return data.dt == DataType::F32 && data.ci.has_sve(); },
+ REGISTER_FP32_SVE((arm_compute::cpu::elementwise_comparison_op<op, float>))
+ },
+ {
+ "sve_comparison_s16",
+ [](const ElementwiseSelectorData & data) { return data.dt == DataType::S16 && data.ci.has_sve(); },
+ REGISTER_INTEGER_SVE((arm_compute::cpu::elementwise_comparison_op<op, int16_t>))
+ },
+ {
+ "sve_comparison_s32",
+ [](const ElementwiseSelectorData & data) { return data.dt == DataType::S32 && data.ci.has_sve(); },
+ REGISTER_INTEGER_SVE((arm_compute::cpu::elementwise_comparison_op<op, int32_t>))
+ },
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
+#if defined(ARM_COMPUTE_ENABLE_NEON)
+ {
+ "neon_comparison_u8",
+ [](const ElementwiseSelectorData & data) { return data.dt == DataType::U8; },
+ REGISTER_INTEGER_NEON((arm_compute::cpu::elementwise_comp_op_8<op, uint8_t, uint8x16_t>))
+ },
+ {
+ "neon_comparison_f32",
+ [](const ElementwiseSelectorData & data) { return data.dt == DataType::F32; },
+ REGISTER_FP32_NEON((arm_compute::cpu::elementwise_comp_op_32<op, float, float32x4_t>))
+ },
+ {
+ "neon_comparison_s16",
+ [](const ElementwiseSelectorData & data) { return data.dt == DataType::S16; },
+ REGISTER_INTEGER_NEON((arm_compute::cpu::elementwise_comp_op_16<op, int16_t, int16x8_t>))
+ },
+ {
+ "neon_comparison_s32",
+ [](const ElementwiseSelectorData & data) { return data.dt == DataType::S32; },
+ REGISTER_INTEGER_NEON((arm_compute::cpu::elementwise_comp_op_32<op, int32_t, int32x4_t>))
+ },
+#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
+ {
+ "sve_comparison_qu8",
+ [](const ElementwiseSelectorData & data) { return data.dt == DataType::QASYMM8 && data.ci.has_sve2(); },
+ REGISTER_QASYMM8_SVE((arm_compute::cpu::elementwise_comparison_quantized_op<op, uint8_t>))
+ },
+ {
+ "sve_comparison_qs8",
+ [](const ElementwiseSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED && data.ci.has_sve2(); },
+ REGISTER_QASYMM8_SIGNED_SVE((arm_compute::cpu::elementwise_comparison_quantized_op<op, int8_t>))
+ },
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
+#if defined(ARM_COMPUTE_ENABLE_NEON) || defined(ARM_COMPUTE_ENABLE_SVE)
+ {
+ "neon_comparison_qu8",
+ [](const ElementwiseSelectorData & data) { return data.dt == DataType::QASYMM8; },
+ REGISTER_QASYMM8_NEON((arm_compute::cpu::elementwise_comp_op_quantized<op>))
+ },
+ {
+ "neon_comparison_qs8",
+ [](const ElementwiseSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; },
+ REGISTER_QASYMM8_SIGNED_NEON((arm_compute::cpu::elementwise_comp_op_quantized_signed<op>))
+ },
+#endif /* defined(ARM_COMPUTE_ENABLE_NEON) || defined(ARM_COMPUTE_ENABLE_SVE) */
+#if defined(ARM_COMPUTE_ENABLE_SVE)
+ {
+ "sve_comparison_f16",
+ [](const ElementwiseSelectorData & data) { return data.dt == DataType::F16 && data.ci.has_sve(); },
+ REGISTER_FP16_SVE((arm_compute::cpu::elementwise_comparison_op<op, float16_t>))
+ },
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
+#if defined(ARM_COMPUTE_ENABLE_NEON) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
+ {
+ "neon_comparison_f16",
+ [](const ElementwiseSelectorData & data) { return data.dt == DataType::F16 && data.ci.has_fp16(); },
+ REGISTER_FP16_NEON((arm_compute::cpu::elementwise_comp_op_16<op, float16_t, float16x8_t>))
+ },
+#endif /* defined(ARM_COMPUTE_ENABLE_NEON) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */
};
for(const auto &uk : kernels)
{
- if(uk.is_selected(src0->data_type()))
+ if(uk.is_selected({ src0->data_type(), CPUInfo::get() }))
{
return uk.ukernel;
}
diff --git a/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp b/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp
index 2600a49b70..91fa75ebaf 100644
--- a/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp
+++ b/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp
@@ -54,7 +54,7 @@ struct ElementwiseUnaryKernel
static const ElementwiseUnaryKernel available_kernels[] =
{
-#if defined(ENABLE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
{
"fp32_sve_elementwise_unary",
[](DataType dt) { return dt == DataType::F32; },
@@ -70,8 +70,8 @@ static const ElementwiseUnaryKernel available_kernels[] =
[](DataType dt) { return dt == DataType::S32; },
REGISTER_INTEGER_SVE(arm_compute::cpu::elementwise_sve_op<int32_t>),
},
-#endif // defined(ENABLE_SVE)
-#if defined(ENABLE_NEON)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_NEON)
{
"fp32_neon_elementwise_unary",
[](DataType dt) { return dt == DataType::F32; },
@@ -89,7 +89,7 @@ static const ElementwiseUnaryKernel available_kernels[] =
[](DataType dt) { return dt == DataType::S32; },
REGISTER_INTEGER_NEON(arm_compute::cpu::elementwise_op<int32_t>),
},
-#endif // defined(ENABLE_NEON)
+#endif // defined(ARM_COMPUTE_ENABLE_NEON)
};
const ElementwiseUnaryKernel *get_implementation(DataType dt)
diff --git a/src/core/cpu/kernels/CpuScaleKernel.cpp b/src/core/cpu/kernels/CpuScaleKernel.cpp
index 29475fa63f..a072dbd896 100644
--- a/src/core/cpu/kernels/CpuScaleKernel.cpp
+++ b/src/core/cpu/kernels/CpuScaleKernel.cpp
@@ -50,7 +50,8 @@ namespace
{
struct ScaleSelectorData
{
- DataType dt;
+ DataType dt;
+ const CPUInfo &ci;
};
using ScaleSelectorPtr = std::add_pointer<bool(const ScaleSelectorData &data)>::type;
using ScaleKernelPtr = std::add_pointer<void(const ITensor *, ITensor *, const ITensor *, const ITensor *, const ITensor *,
@@ -64,43 +65,43 @@ struct ScaleKernel
static const ScaleKernel available_kernels[] =
{
-#if defined(ENABLE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
{
"fp16_sve_scale",
- [](const ScaleSelectorData & data) { return data.dt == DataType::F16; },
+ [](const ScaleSelectorData & data) { return data.dt == DataType::F16 && data.ci.has_sve(); },
REGISTER_FP16_SVE(arm_compute::cpu::fp16_sve_scale)
},
{
"f32_sve_scale",
- [](const ScaleSelectorData & data) { return data.dt == DataType::F32; },
+ [](const ScaleSelectorData & data) { return data.dt == DataType::F32 && data.ci.has_sve(); },
REGISTER_FP32_SVE(arm_compute::cpu::fp32_sve_scale)
},
{
"qasymm8_sve_scale",
- [](const ScaleSelectorData & data) { return data.dt == DataType::QASYMM8; },
+ [](const ScaleSelectorData & data) { return data.dt == DataType::QASYMM8 && data.ci.has_sve(); },
REGISTER_QASYMM8_SVE(arm_compute::cpu::qasymm8_sve_scale)
},
{
"qasymm8_signed_sve_scale",
- [](const ScaleSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; },
+ [](const ScaleSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED && data.ci.has_sve(); },
REGISTER_QASYMM8_SIGNED_SVE(arm_compute::cpu::qasymm8_signed_sve_scale)
},
{
"u8_sve_scale",
- [](const ScaleSelectorData & data) { return data.dt == DataType::U8; },
+ [](const ScaleSelectorData & data) { return data.dt == DataType::U8 && data.ci.has_sve(); },
REGISTER_INTEGER_SVE(arm_compute::cpu::u8_sve_scale)
},
{
"s16_sve_scale",
- [](const ScaleSelectorData & data) { return data.dt == DataType::S16; },
+ [](const ScaleSelectorData & data) { return data.dt == DataType::S16 && data.ci.has_sve(); },
REGISTER_INTEGER_SVE(arm_compute::cpu::s16_sve_scale)
},
-#endif /* defined(ENABLE_SVE) */
-#if defined(ENABLE_NEON)
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
+#if defined(ARM_COMPUTE_ENABLE_NEON)
#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
{
"common_neon_scale",
- [](const ScaleSelectorData & data) { return data.dt == DataType::F16; },
+ [](const ScaleSelectorData & data) { return data.dt == DataType::F16 && data.ci.has_fp16(); },
REGISTER_FP16_NEON(arm_compute::cpu::common_neon_scale<float16_t>)
},
#endif /* !defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */
@@ -129,7 +130,7 @@ static const ScaleKernel available_kernels[] =
[](const ScaleSelectorData & data) { return data.dt == DataType::S16; },
REGISTER_INTEGER_NEON(arm_compute::cpu::common_neon_scale<int16_t>)
},
-#endif /* defined(ENABLE_NEON) */
+#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */
};
/** Micro-kernel selector
@@ -153,7 +154,7 @@ const ScaleKernel *get_implementation(const ScaleSelectorData &data)
Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dx, const ITensorInfo *dy,
const ITensorInfo *offsets, ITensorInfo *dst, const ScaleKernelInfo &info)
{
- const auto *uk = get_implementation(ScaleSelectorData{ src->data_type() });
+ const auto *uk = get_implementation(ScaleSelectorData{ src->data_type(), CPUInfo::get() });
ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(dst);
@@ -607,7 +608,7 @@ void CpuScaleKernel::run_op(ITensorPack &tensors, const Window &window, const Th
}
else
{
- const auto *uk = get_implementation(ScaleSelectorData{ src->info()->data_type() });
+ const auto *uk = get_implementation(ScaleSelectorData{ src->info()->data_type(), CPUInfo::get() });
uk->ukernel(src, dst, offsets, dx, dy, _policy, _border_mode, _constant_border_value, _sampling_offset, _align_corners, window);
}
}
diff --git a/src/core/cpu/kernels/CpuSoftmaxKernel.cpp b/src/core/cpu/kernels/CpuSoftmaxKernel.cpp
index 8ea186b16a..1e00e12050 100644
--- a/src/core/cpu/kernels/CpuSoftmaxKernel.cpp
+++ b/src/core/cpu/kernels/CpuSoftmaxKernel.cpp
@@ -47,7 +47,8 @@ namespace
{
struct SoftmaxSelectorData
{
- DataType dt;
+ DataType dt;
+ const CPUInfo &ci;
};
using SoftmaxSelectorPtr = std::add_pointer<bool(const SoftmaxSelectorData &data)>::type;
using SoftmaxLogits1DMaxKernelPtr = std::add_pointer<void(const ITensor *, ITensor *, const Window &)>::type;
@@ -69,20 +70,20 @@ struct SoftmaxLogits1DMaxKernel
static const SoftmaxLogits1DKernel available_logits_1d_kernels[] =
{
-#if defined(ENABLE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
{
"sve_softmax_logits_1d_float",
- [](const SoftmaxSelectorData & data) { return (data.dt == DataType::F32); },
+ [](const SoftmaxSelectorData & data) { return (data.dt == DataType::F32) && data.ci.has_sve(); },
REGISTER_FP32_SVE(arm_compute::cpu::sve_softmax_logits_1d_float<float>)
},
{
"sve_softmax_logits_1d_float",
- [](const SoftmaxSelectorData & data) { return (data.dt == DataType::F16); },
+ [](const SoftmaxSelectorData & data) { return (data.dt == DataType::F16) && data.ci.has_sve(); },
REGISTER_FP16_SVE(arm_compute::cpu::sve_softmax_logits_1d_float<float16_t>)
},
-#endif /* defined(ENABLE_SVE) */
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
-#if defined(ENABLE_NEON)
+#if defined(ARM_COMPUTE_ENABLE_NEON)
{
"neon_softmax_logits_1d_float",
[](const SoftmaxSelectorData & data) { return (data.dt == DataType::F32); },
@@ -95,20 +96,20 @@ static const SoftmaxLogits1DKernel available_logits_1d_kernels[] =
REGISTER_FP16_NEON(arm_compute::cpu::neon_softmax_logits_1d_float<float16_t>)
},
#endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */
-#endif /* !defined(ENABLE_NEON) */
+#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */
-#if defined(__ARM_FEATURE_SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
{
"sve_softmax_logits_1d_quantized",
- [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8); },
+ [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8) && data.ci.has_sve2(); },
REGISTER_QASYMM8_SVE(arm_compute::cpu::sve_softmax_logits_1d_quantized<qasymm8_t>)
},
{
"sve_softmax_logits_1d_quantized",
- [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED); },
+ [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED) && data.ci.has_sve2(); },
REGISTER_QASYMM8_SIGNED_SVE(arm_compute::cpu::sve_softmax_logits_1d_quantized<qasymm8_signed_t>)
},
-#else /* !defined(__ARM_FEATURE_SVE2) */
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
{
"neon_softmax_logits_1d_quantized",
[](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8); },
@@ -119,35 +120,33 @@ static const SoftmaxLogits1DKernel available_logits_1d_kernels[] =
[](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED); },
REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::neon_softmax_logits_1d_quantized<qasymm8_signed_t>)
},
-#endif /* defined(__ARM_FEATURE_SVE2) */
-
};
static const SoftmaxLogits1DMaxKernel available_logits_1d_max_kernels[] =
{
-#if defined(ENABLE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
{
"sve_logits_1d_max",
- [](const SoftmaxSelectorData & data) { return (data.dt == DataType::F32); },
+ [](const SoftmaxSelectorData & data) { return (data.dt == DataType::F32) && data.ci.has_sve(); },
REGISTER_FP32_SVE(arm_compute::cpu::sve_logits_1d_max<float>)
},
{
"sve_logits_1d_max",
- [](const SoftmaxSelectorData & data) { return (data.dt == DataType::F16); },
+ [](const SoftmaxSelectorData & data) { return (data.dt == DataType::F16) && data.ci.has_sve(); },
REGISTER_FP16_SVE(arm_compute::cpu::sve_logits_1d_max<float16_t>)
},
{
"sve_logits_1d_max",
- [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8); },
+ [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8) && data.ci.has_sve(); },
REGISTER_QASYMM8_SVE(arm_compute::cpu::sve_logits_1d_max<qasymm8_t>)
},
{
"sve_logits_1d_max",
- [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED); },
+ [](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED) && data.ci.has_sve(); },
REGISTER_QASYMM8_SIGNED_SVE(arm_compute::cpu::sve_logits_1d_max<qasymm8_signed_t>)
},
-#endif /* defined(ENABLE_SVE) */
-#if defined(ENABLE_NEON)
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
+#if defined(ARM_COMPUTE_ENABLE_NEON)
{
"neon_logits_1d_max",
[](const SoftmaxSelectorData & data) { return (data.dt == DataType::F32); },
@@ -170,14 +169,14 @@ static const SoftmaxLogits1DMaxKernel available_logits_1d_max_kernels[] =
[](const SoftmaxSelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED); },
REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::neon_logits_1d_max<qasymm8_signed_t>)
},
-#endif /* defined(ENABLE_NEON) */
+#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */
};
const SoftmaxLogits1DKernel *get_implementation_logits(const SoftmaxSelectorData &data)
{
for(const auto &uk : available_logits_1d_kernels)
{
- if(uk.is_selected({ data.dt }))
+ if(uk.is_selected({ data.dt, CPUInfo::get() }))
{
return &uk;
}
@@ -189,7 +188,7 @@ const SoftmaxLogits1DMaxKernel *get_implementation_logits_max(const SoftmaxSelec
{
for(const auto &uk : available_logits_1d_max_kernels)
{
- if(uk.is_selected({ data.dt }))
+ if(uk.is_selected({ data.dt, CPUInfo::get() }))
{
return &uk;
}
@@ -253,7 +252,7 @@ void CpuLogits1DMaxKernel::run_op(ITensorPack &tensors, const Window &window, co
const auto src = tensors.get_const_tensor(TensorType::ACL_SRC);
auto dst = tensors.get_tensor(TensorType::ACL_DST);
- const auto *uk = get_implementation_logits_max(SoftmaxSelectorData{ src->info()->data_type() });
+ const auto *uk = get_implementation_logits_max(SoftmaxSelectorData{ src->info()->data_type(), CPUInfo::get() });
uk->ukernel(src, dst, window);
}
@@ -364,7 +363,7 @@ void CpuLogits1DSoftmaxKernel<IS_LOG>::run_op(ITensorPack &tensors, const Window
void *tmp_for_thread = tmp->buffer() + (info.thread_id * tmp_size_for_thread);
- const auto *uk = get_implementation_logits(SoftmaxSelectorData{ src->info()->data_type() });
+ const auto *uk = get_implementation_logits(SoftmaxSelectorData{ src->info()->data_type(), CPUInfo::get() });
uk->ukernel(src, max, tmp_for_thread, dst, _beta, IS_LOG, window);
}
diff --git a/src/core/cpu/kernels/activation/sve/qasymm8.cpp b/src/core/cpu/kernels/activation/sve/qasymm8.cpp
index 228b4ae530..69fffd96c5 100644
--- a/src/core/cpu/kernels/activation/sve/qasymm8.cpp
+++ b/src/core/cpu/kernels/activation/sve/qasymm8.cpp
@@ -21,14 +21,13 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Window.h"
#include <cmath>
#include <cstddef>
-#if defined(__ARM_FEATURE_SVE2)
#include "src/core/NEON/SVEAsymm.h"
#include "src/core/NEON/SVEMath.h"
#include <arm_sve.h>
@@ -251,4 +250,4 @@ void qasymm8_sve_activation(const ITensor *src, ITensor *dst, const ActivationLa
}
} // namespace cpu
} // namespace arm_compute
-#endif /* defined(__ARM_FEATURE_SVE2) */ \ No newline at end of file
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ \ No newline at end of file
diff --git a/src/core/cpu/kernels/activation/sve/qasymm8_signed.cpp b/src/core/cpu/kernels/activation/sve/qasymm8_signed.cpp
index 989f825eb9..53ee515ff9 100644
--- a/src/core/cpu/kernels/activation/sve/qasymm8_signed.cpp
+++ b/src/core/cpu/kernels/activation/sve/qasymm8_signed.cpp
@@ -28,7 +28,7 @@
#include <cmath>
#include <cstddef>
-#if defined(__ARM_FEATURE_SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
#include "src/core/NEON/SVEAsymm.h"
#include "src/core/NEON/SVEMath.h"
#include <arm_sve.h>
@@ -250,4 +250,4 @@ void qasymm8_signed_sve_activation(const ITensor *src, ITensor *dst, const Activ
}
} // namespace cpu
} // namespace arm_compute
-#endif /* defined(__ARM_FEATURE_SVE2) */
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
diff --git a/src/core/cpu/kernels/activation/sve/qsymm16.cpp b/src/core/cpu/kernels/activation/sve/qsymm16.cpp
index 66974875da..ac549770a2 100644
--- a/src/core/cpu/kernels/activation/sve/qsymm16.cpp
+++ b/src/core/cpu/kernels/activation/sve/qsymm16.cpp
@@ -29,7 +29,7 @@
#include <cmath>
#include <cstddef>
-#if defined(__ARM_FEATURE_SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
#include "src/core/NEON/SVEMath.h"
#include "src/core/NEON/SVESymm.h"
#include <arm_sve.h>
@@ -117,4 +117,4 @@ void qsymm16_sve_activation(const ITensor *src, ITensor *dst, const ActivationLa
}
} // namespace cpu
} // namespace arm_compute
-#endif /* defined(__ARM_FEATURE_SVE2) */
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
diff --git a/src/core/cpu/kernels/add/sve/impl.h b/src/core/cpu/kernels/add/sve/impl.h
index c38b1d47e0..32ff5d0496 100644
--- a/src/core/cpu/kernels/add/sve/impl.h
+++ b/src/core/cpu/kernels/add/sve/impl.h
@@ -24,7 +24,7 @@
#ifndef SRC_CORE_SVE_KERNELS_ADD_IMPL_H
#define SRC_CORE_SVE_KERNELS_ADD_IMPL_H
-#if defined(ENABLE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/Traits.h"
@@ -36,5 +36,5 @@ template <typename ScalarType>
void add_same_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
} // namespace cpu
} // namespace arm_compute
-#endif // defined(ENABLE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
#endif // SRC_CORE_SVE_KERNELS_ADD_IMPL_H \ No newline at end of file
diff --git a/src/core/cpu/kernels/add/sve/list.h b/src/core/cpu/kernels/add/sve/list.h
index aebb43bb60..9e439497c9 100644
--- a/src/core/cpu/kernels/add/sve/list.h
+++ b/src/core/cpu/kernels/add/sve/list.h
@@ -24,7 +24,7 @@
#ifndef SRC_CORE_SVE_KERNELS_ADD_LIST_H
#define SRC_CORE_SVE_KERNELS_ADD_LIST_H
-#if defined(ENABLE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/Traits.h"
#include "src/core/NEON/SVEMath.h"
@@ -50,5 +50,5 @@ DECLARE_ADD_KERNEL(add_u8_u8_s16_sve);
} // namespace cpu
} // namespace arm_compute
-#endif // defined(ENABLE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
#endif // SRC_CORE_SVE_KERNELS_ADD_LIST_H \ No newline at end of file
diff --git a/src/core/cpu/kernels/add/sve/qasymm8.cpp b/src/core/cpu/kernels/add/sve/qasymm8.cpp
index f6d1485e61..888ad878ca 100644
--- a/src/core/cpu/kernels/add/sve/qasymm8.cpp
+++ b/src/core/cpu/kernels/add/sve/qasymm8.cpp
@@ -21,13 +21,13 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#if defined(__ARM_FEATURE_SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/Traits.h"
-#include "src/core/NEON/wrapper/intrinsics/intrinsics.h"
#include "src/core/NEON/SVEMath.h"
+#include "src/core/NEON/wrapper/intrinsics/intrinsics.h"
#include <arm_sve.h>
namespace arm_compute
@@ -179,4 +179,4 @@ void add_qasymm8_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, con
}
} // namespace cpu
} // namespace arm_compute
-#endif /* defined(__ARM_FEATURE_SVE2) */ \ No newline at end of file
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ \ No newline at end of file
diff --git a/src/core/cpu/kernels/add/sve/qasymm8_signed.cpp b/src/core/cpu/kernels/add/sve/qasymm8_signed.cpp
index 8102aa5c65..3b922c6c21 100644
--- a/src/core/cpu/kernels/add/sve/qasymm8_signed.cpp
+++ b/src/core/cpu/kernels/add/sve/qasymm8_signed.cpp
@@ -21,13 +21,13 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#if defined(__ARM_FEATURE_SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/Traits.h"
-#include "src/core/NEON/wrapper/intrinsics/intrinsics.h"
#include "src/core/NEON/SVEMath.h"
+#include "src/core/NEON/wrapper/intrinsics/intrinsics.h"
#include <arm_sve.h>
namespace arm_compute
@@ -178,4 +178,4 @@ void add_qasymm8_signed_sve(const ITensor *src0, const ITensor *src1, ITensor *d
}
} // namespace cpu
} // namespace arm_compute
-#endif /* defined(__ARM_FEATURE_SVE2) */ \ No newline at end of file
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ \ No newline at end of file
diff --git a/src/core/cpu/kernels/add/sve/qsymm16.cpp b/src/core/cpu/kernels/add/sve/qsymm16.cpp
index fb62257b0a..eef5d245d3 100644
--- a/src/core/cpu/kernels/add/sve/qsymm16.cpp
+++ b/src/core/cpu/kernels/add/sve/qsymm16.cpp
@@ -21,13 +21,13 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#if defined(__ARM_FEATURE_SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/Traits.h"
-#include "src/core/NEON/wrapper/intrinsics/intrinsics.h"
#include "src/core/NEON/SVEMath.h"
+#include "src/core/NEON/wrapper/intrinsics/intrinsics.h"
#include <arm_sve.h>
namespace arm_compute
@@ -153,4 +153,4 @@ void add_qsymm16_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, con
}
} // namespace cpu
} // namespace arm_compute
-#endif /* defined(__ARM_FEATURE_SVE2) */ \ No newline at end of file
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ \ No newline at end of file
diff --git a/src/core/cpu/kernels/elementwise/sve/elementwise_list.h b/src/core/cpu/kernels/elementwise/sve/elementwise_list.h
index a92a8648a8..fea38d2995 100644
--- a/src/core/cpu/kernels/elementwise/sve/elementwise_list.h
+++ b/src/core/cpu/kernels/elementwise/sve/elementwise_list.h
@@ -23,7 +23,7 @@
*/
#ifndef SRC_CORE_SVE_KERNELS_ELEMENTWISE_LIST_H
#define SRC_CORE_SVE_KERNELS_ELEMENTWISE_LIST_H
-#if defined(ENABLE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Types.h"
@@ -167,5 +167,5 @@ template <ComparisonOperation op, typename ScalarType, typename OutputScalarType
void elementwise_comparison_op(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window);
} // namespace cpu
} // namespace arm_compute
-#endif // defined(ENABLE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
#endif /* SRC_CORE_SVE_KERNELS_ELEMENTWISE_LIST_H */
diff --git a/src/core/cpu/kernels/elementwise/sve/elementwise_quantized_list.h b/src/core/cpu/kernels/elementwise/sve/elementwise_quantized_list.h
index 6c5524e284..5e04128b44 100644
--- a/src/core/cpu/kernels/elementwise/sve/elementwise_quantized_list.h
+++ b/src/core/cpu/kernels/elementwise/sve/elementwise_quantized_list.h
@@ -24,7 +24,7 @@
#ifndef SRC_CORE_SVE_KERNELS_ELEMENTWISE_QUANTIZED_LIST_H
#define SRC_CORE_SVE_KERNELS_ELEMENTWISE_QUANTIZED_LIST_H
-#if defined(__ARM_FEATURE_SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
#include "src/core/NEON/wrapper/svtraits.h"
#include "src/core/cpu/kernels/elementwise/sve/elementwise_list.h"
@@ -362,5 +362,5 @@ void elementwise_comparison_quantized_op(const ITensor *in1, const ITensor *in2,
} // namespace cpu
} // namespace arm_compute
-#endif /* defined(__ARM_FEATURE_SVE2) */
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
#endif /* SRC_CORE_SVE_KERNELS_ELEMENTWISE_QUANTIZED_LIST_H */ \ No newline at end of file
diff --git a/src/core/cpu/kernels/elementwise/sve/elementwise_unary_list.h b/src/core/cpu/kernels/elementwise/sve/elementwise_unary_list.h
index 63490421e9..c2b495f27c 100644
--- a/src/core/cpu/kernels/elementwise/sve/elementwise_unary_list.h
+++ b/src/core/cpu/kernels/elementwise/sve/elementwise_unary_list.h
@@ -25,7 +25,7 @@
#define SRC_CORE_SVE_KERNELS_ELEMENTWISE_UNARY_LIST_H
#include "arm_compute/core/Types.h"
-#if defined(ENABLE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
namespace arm_compute
{
@@ -35,5 +35,5 @@ template <typename ScalarType>
void elementwise_sve_op(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op);
} // namespace cpu
} // namespace arm_compute
-#endif // defined(ENABLE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
#endif // SRC_CORE_NEON_KERNELS_ELEMENTWISE_UNARY_LIST_H \ No newline at end of file
diff --git a/src/core/cpu/kernels/scale/sve/fp16.cpp b/src/core/cpu/kernels/scale/sve/fp16.cpp
index 5b9377c6e6..76e7735b8a 100644
--- a/src/core/cpu/kernels/scale/sve/fp16.cpp
+++ b/src/core/cpu/kernels/scale/sve/fp16.cpp
@@ -22,7 +22,7 @@
* SOFTWARE.
*/
-#if defined(ENABLE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensorPack.h"
#include "arm_compute/core/Window.h"
@@ -173,4 +173,4 @@ void fp16_sve_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, co
} // namespace cpu
} // namespace arm_compute
-#endif // ENABLE_SVE \ No newline at end of file
+#endif // ARM_COMPUTE_ENABLE_SVE \ No newline at end of file
diff --git a/src/core/cpu/kernels/scale/sve/fp32.cpp b/src/core/cpu/kernels/scale/sve/fp32.cpp
index 05fbedf20d..030e109cdf 100644
--- a/src/core/cpu/kernels/scale/sve/fp32.cpp
+++ b/src/core/cpu/kernels/scale/sve/fp32.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#if defined(ENABLE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensorPack.h"
#include "arm_compute/core/Window.h"
@@ -171,4 +171,4 @@ void fp32_sve_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, co
} // namespace cpu
} // namespace arm_compute
-#endif // ENABLE_SVE \ No newline at end of file
+#endif // ARM_COMPUTE_ENABLE_SVE \ No newline at end of file
diff --git a/src/core/cpu/kernels/scale/sve/integer.cpp b/src/core/cpu/kernels/scale/sve/integer.cpp
index d7e270c661..486c674612 100644
--- a/src/core/cpu/kernels/scale/sve/integer.cpp
+++ b/src/core/cpu/kernels/scale/sve/integer.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#if defined(ENABLE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensorPack.h"
#include "arm_compute/core/Window.h"
@@ -297,4 +297,4 @@ void s16_sve_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, con
} // namespace cpu
} // namespace arm_compute
-#endif // ENABLE_SVE \ No newline at end of file
+#endif // ARM_COMPUTE_ENABLE_SVE \ No newline at end of file
diff --git a/src/core/cpu/kernels/scale/sve/qasymm8.cpp b/src/core/cpu/kernels/scale/sve/qasymm8.cpp
index f747037938..c9122ad40b 100644
--- a/src/core/cpu/kernels/scale/sve/qasymm8.cpp
+++ b/src/core/cpu/kernels/scale/sve/qasymm8.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#if defined(ENABLE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensorPack.h"
#include "arm_compute/core/Window.h"
@@ -204,4 +204,4 @@ void qasymm8_sve_scale(const ITensor *src, ITensor *dst, const ITensor *offsets,
} // namespace cpu
} // namespace arm_compute
-#endif // defined(ENABLE_SVE) \ No newline at end of file
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) \ No newline at end of file
diff --git a/src/core/cpu/kernels/scale/sve/qasymm8_signed.cpp b/src/core/cpu/kernels/scale/sve/qasymm8_signed.cpp
index 584ec7a0da..0843e61fd4 100644
--- a/src/core/cpu/kernels/scale/sve/qasymm8_signed.cpp
+++ b/src/core/cpu/kernels/scale/sve/qasymm8_signed.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#if defined(ENABLE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensorPack.h"
#include "arm_compute/core/Window.h"
@@ -204,4 +204,4 @@ void qasymm8_signed_sve_scale(const ITensor *src, ITensor *dst, const ITensor *o
} // namespace cpu
} // namespace arm_compute
-#endif // ENABLE_SVE \ No newline at end of file
+#endif // ARM_COMPUTE_ENABLE_SVE \ No newline at end of file
diff --git a/src/core/cpu/kernels/softmax/impl/sve/impl.cpp b/src/core/cpu/kernels/softmax/impl/sve/impl.cpp
index 4ed5a4fbea..7a577fd565 100644
--- a/src/core/cpu/kernels/softmax/impl/sve/impl.cpp
+++ b/src/core/cpu/kernels/softmax/impl/sve/impl.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#if defined(ENABLE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Types.h"
@@ -182,4 +182,4 @@ template void sve_softmax_logits_1d_float<float16_t>(const ITensor *in, const IT
ITensor *out, const float beta, bool is_log, const Window &window);
} // namespace cpu
} // namespace arm_compute
-#endif /* defined(ENABLE_SVE) */
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
diff --git a/src/core/cpu/kernels/softmax/impl/sve/list.h b/src/core/cpu/kernels/softmax/impl/sve/list.h
index 7ddb358b8e..b4e1e1b186 100644
--- a/src/core/cpu/kernels/softmax/impl/sve/list.h
+++ b/src/core/cpu/kernels/softmax/impl/sve/list.h
@@ -24,7 +24,7 @@
#ifndef SRC_CORE_SVE_KERNELS_SOFTMAX_LIST_H
#define SRC_CORE_SVE_KERNELS_SOFTMAX_LIST_H
-#if defined(ENABLE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/Traits.h"
#include "src/core/NEON/SVEMath.h"
@@ -42,7 +42,7 @@ template <typename ScalarType>
void sve_softmax_logits_1d_float(const ITensor *in, const ITensor *max, void *const tmp,
ITensor *out, const float beta, bool is_log, const Window &window);
-#if defined(__ARM_FEATURE_SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
template <typename ScalarType>
void sve_softmax_logits_1d_quantized(const ITensor *in, const ITensor *max, void *const tmp,
ITensor *out, float beta, bool is_log, const Window &window)
@@ -215,9 +215,9 @@ void sve_softmax_logits_1d_quantized(const ITensor *in, const ITensor *max, void
},
in_it, max_it, out_it);
}
-#endif /* defined(__ARM_FEATURE_SVE2) */
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
} // namespace cpu
} // namespace arm_compute
-#endif /* defined(ENABLE_SVE) */
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
#endif /* SRC_CORE_SVE_KERNELS_SOFTMAX_LIST_H */
diff --git a/src/cpu/CpuContext.cpp b/src/cpu/CpuContext.cpp
index 18fa2e7469..a1c6413c98 100644
--- a/src/cpu/CpuContext.cpp
+++ b/src/cpu/CpuContext.cpp
@@ -90,67 +90,66 @@ AllocatorWrapper populate_allocator(AclAllocator *external_allocator)
return is_valid ? AllocatorWrapper(*external_allocator) : AllocatorWrapper(default_allocator);
}
-CpuCapabilities populate_capabilities_legacy(const CPUInfo &cpu_info)
+cpuinfo::CpuIsaInfo populate_capabilities_legacy(const CPUInfo &cpu_info)
{
- CpuCapabilities caps;
+ cpuinfo::CpuIsaInfo isa_caps;
// Extract SIMD extension
- caps.neon = true;
-#ifdef SVE2
- caps.sve2 = true;
-#endif /* SVE2 */
+ isa_caps.neon = true;
+ isa_caps.sve = cpu_info.has_sve();
+ isa_caps.sve2 = cpu_info.has_sve2();
+
// Extract data-type support
- caps.fp16 = cpu_info.has_fp16();
-#ifdef V8P6_BF
- caps.bf16 = true;
-#endif /* V8P6_BF */
+ isa_caps.fp16 = cpu_info.has_fp16();
+ isa_caps.bf16 = cpu_info.has_bf16();
+ isa_caps.svebf16 = cpu_info.has_svebf16();
// Extract ISA extensions
- caps.dot = cpu_info.has_dotprod();
-#ifdef MMLA_FP32
- caps.mmla_fp = true;
-#endif /* MMLA_FP32 */
-#ifdef MMLA_INT8
- caps.mmla_int8 = true;
-#endif /* MMLA_INT8 */
+ isa_caps.dot = cpu_info.has_dotprod();
+ isa_caps.i8mm = cpu_info.has_i8mm();
+ isa_caps.svei8mm = cpu_info.has_svei8mm();
+ isa_caps.svef32mm = cpu_info.has_svef32mm();
- return caps;
+ return isa_caps;
}
-CpuCapabilities populate_capabilities_flags(AclTargetCapabilities external_caps)
+cpuinfo::CpuIsaInfo populate_capabilities_flags(AclTargetCapabilities external_caps)
{
- CpuCapabilities caps;
+ cpuinfo::CpuIsaInfo isa_caps;
// Extract SIMD extension
- caps.neon = external_caps & AclCpuCapabilitiesNeon;
- caps.sve = external_caps & AclCpuCapabilitiesSve;
- caps.sve2 = external_caps & AclCpuCapabilitiesSve2;
+ isa_caps.neon = external_caps & AclCpuCapabilitiesNeon;
+ isa_caps.sve = external_caps & AclCpuCapabilitiesSve;
+ isa_caps.sve2 = external_caps & AclCpuCapabilitiesSve2;
+
// Extract data-type support
- caps.fp16 = external_caps & AclCpuCapabilitiesFp16;
- caps.bf16 = external_caps & AclCpuCapabilitiesBf16;
+ isa_caps.fp16 = external_caps & AclCpuCapabilitiesFp16;
+ isa_caps.bf16 = external_caps & AclCpuCapabilitiesBf16;
+
// Extract ISA extensions
- caps.dot = external_caps & AclCpuCapabilitiesDot;
- caps.mmla_fp = external_caps & AclCpuCapabilitiesMmlaFp;
- caps.mmla_int8 = external_caps & AclCpuCapabilitiesMmlaInt8;
+ isa_caps.dot = external_caps & AclCpuCapabilitiesDot;
+ isa_caps.i8mm = external_caps & AclCpuCapabilitiesMmlaInt8;
+ isa_caps.svef32mm = external_caps & AclCpuCapabilitiesMmlaFp;
- return caps;
+ return isa_caps;
}
CpuCapabilities populate_capabilities(AclTargetCapabilities external_caps,
int32_t max_threads)
{
- // Extract legacy structure
- CPUInfo cpu_info;
-
CpuCapabilities caps;
+
+ // Extract legacy structure
+ cpuinfo::CpuIsaInfo isa_caps;
if(external_caps != AclCpuCapabilitiesAuto)
{
- caps = populate_capabilities_flags(external_caps);
+ isa_caps = populate_capabilities_flags(external_caps);
}
else
{
- caps = populate_capabilities_legacy(cpu_info);
+ isa_caps = populate_capabilities_legacy(CPUInfo::get());
}
+ caps.cpu_info = cpuinfo::CpuInfo(isa_caps, {});
// Set max number of threads
#if defined(BARE_METAL)
diff --git a/src/cpu/CpuContext.h b/src/cpu/CpuContext.h
index e909767a7b..9a59af39c1 100644
--- a/src/cpu/CpuContext.h
+++ b/src/cpu/CpuContext.h
@@ -26,6 +26,7 @@
#include "src/common/AllocatorWrapper.h"
#include "src/common/IContext.h"
+#include "src/common/cpuinfo/CpuInfo.h"
namespace arm_compute
{
@@ -34,17 +35,8 @@ namespace cpu
/** Structure that encodes the CPU capabilities to be used */
struct CpuCapabilities
{
- bool neon{ false };
- bool sve{ false };
- bool sve2{ false };
-
- bool fp16{ false };
- bool bf16{ false };
- bool dot{ false };
- bool mmla_int8{ false };
- bool mmla_fp{ false };
-
- int32_t max_threads{ -1 };
+ cpuinfo::CpuInfo cpu_info{};
+ int32_t max_threads{ -1 };
};
/** CPU context implementation class */
diff --git a/src/runtime/CPP/CPPScheduler.cpp b/src/runtime/CPP/CPPScheduler.cpp
index f112d456c7..3d7f1f16b1 100644
--- a/src/runtime/CPP/CPPScheduler.cpp
+++ b/src/runtime/CPP/CPPScheduler.cpp
@@ -493,7 +493,7 @@ void CPPScheduler::run_workloads(std::vector<IScheduler::Workload> &workloads)
}
ThreadFeeder feeder(num_threads_to_use, workloads.size());
ThreadInfo info;
- info.cpu_info = &_cpu_info;
+ info.cpu_info = &cpu_info();
info.num_threads = num_threads_to_use;
unsigned int t = 0;
auto thread_it = _impl->_threads.begin();
diff --git a/src/runtime/CPP/SingleThreadScheduler.cpp b/src/runtime/CPP/SingleThreadScheduler.cpp
index 70536b7ccc..5890553f6f 100644
--- a/src/runtime/CPP/SingleThreadScheduler.cpp
+++ b/src/runtime/CPP/SingleThreadScheduler.cpp
@@ -49,7 +49,7 @@ void SingleThreadScheduler::schedule(ICPPKernel *kernel, const Hints &hints)
}
ThreadInfo info;
- info.cpu_info = &_cpu_info;
+ info.cpu_info = &cpu_info();
kernel->run(kernel->window(), info);
}
@@ -57,14 +57,14 @@ void SingleThreadScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints,
{
ARM_COMPUTE_UNUSED(hints);
ThreadInfo info;
- info.cpu_info = &_cpu_info;
+ info.cpu_info = &cpu_info();
kernel->run_op(tensors, window, info);
}
void SingleThreadScheduler::run_workloads(std::vector<Workload> &workloads)
{
ThreadInfo info;
- info.cpu_info = &_cpu_info;
+ info.cpu_info = &cpu_info();
for(auto &wl : workloads)
{
wl(info);
diff --git a/src/runtime/IScheduler.cpp b/src/runtime/IScheduler.cpp
index df04fed401..004b8a46b6 100644
--- a/src/runtime/IScheduler.cpp
+++ b/src/runtime/IScheduler.cpp
@@ -32,7 +32,6 @@
namespace arm_compute
{
IScheduler::IScheduler()
- : _cpu_info()
{
// Work out the best possible number of execution threads
_num_threads_hint = cpuinfo::num_threads_hint();
@@ -40,7 +39,7 @@ IScheduler::IScheduler()
CPUInfo &IScheduler::cpu_info()
{
- return _cpu_info;
+ return CPUInfo::get();
}
void IScheduler::set_num_threads_with_affinity(unsigned int num_threads, BindFunc func)
@@ -111,7 +110,7 @@ void IScheduler::schedule_common(ICPPKernel *kernel, const Hints &hints, const W
if(!kernel->is_parallelisable() || num_threads == 1)
{
ThreadInfo info;
- info.cpu_info = &_cpu_info;
+ info.cpu_info = &cpu_info();
if(tensors.empty())
{
kernel->run(max_window, info);
diff --git a/src/runtime/NEON/functions/NEFFT2D.cpp b/src/runtime/NEON/functions/NEFFT2D.cpp
index 3b787cd523..5aaf587cdf 100644
--- a/src/runtime/NEON/functions/NEFFT2D.cpp
+++ b/src/runtime/NEON/functions/NEFFT2D.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,9 +26,6 @@
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/Scheduler.h"
-#include "src/core/NEON/kernels/NEFFTDigitReverseKernel.h"
-#include "src/core/NEON/kernels/NEFFTRadixStageKernel.h"
-#include "src/core/NEON/kernels/NEFFTScaleKernel.h"
namespace arm_compute
{
diff --git a/src/runtime/OMP/OMPScheduler.cpp b/src/runtime/OMP/OMPScheduler.cpp
index ca763f907b..e9b0bf4426 100644
--- a/src/runtime/OMP/OMPScheduler.cpp
+++ b/src/runtime/OMP/OMPScheduler.cpp
@@ -66,7 +66,7 @@ void OMPScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, const Win
if(!kernel->is_parallelisable() || num_threads == 1)
{
ThreadInfo info;
- info.cpu_info = &_cpu_info;
+ info.cpu_info = &cpu_info();
kernel->run_op(tensors, max_window, info);
}
else
@@ -96,7 +96,7 @@ void OMPScheduler::run_workloads(std::vector<arm_compute::IScheduler::Workload>
}
ThreadInfo info;
- info.cpu_info = &_cpu_info;
+ info.cpu_info = &cpu_info();
info.num_threads = num_threads;
#pragma omp parallel firstprivate(info) num_threads(num_threads)
{
diff --git a/tests/validation/cpu/unit/Context.cpp b/tests/validation/cpu/unit/Context.cpp
index 57ca866032..42247ba1da 100644
--- a/tests/validation/cpu/unit/Context.cpp
+++ b/tests/validation/cpu/unit/Context.cpp
@@ -94,13 +94,13 @@ TEST_CASE(CpuCapabilities, framework::DatasetMode::ALL)
opts.copts.capabilities = AclCpuCapabilitiesDot | AclCpuCapabilitiesMmlaInt8 | AclCpuCapabilitiesSve2;
arm_compute::cpu::CpuContext ctx(&opts.copts);
- ARM_COMPUTE_ASSERT(ctx.capabilities().dot == true);
- ARM_COMPUTE_ASSERT(ctx.capabilities().mmla_int8 == true);
- ARM_COMPUTE_ASSERT(ctx.capabilities().sve2 == true);
- ARM_COMPUTE_ASSERT(ctx.capabilities().fp16 == false);
+ ARM_COMPUTE_ASSERT(ctx.capabilities().cpu_info.has_dotprod() == true);
+ ARM_COMPUTE_ASSERT(ctx.capabilities().cpu_info.has_i8mm() == true);
+ ARM_COMPUTE_ASSERT(ctx.capabilities().cpu_info.has_sve2() == true);
+ ARM_COMPUTE_ASSERT(ctx.capabilities().cpu_info.has_fp16() == false);
arm_compute::cpu::CpuContext ctx_legacy(nullptr);
- ARM_COMPUTE_ASSERT(ctx_legacy.capabilities().neon == true);
+ ARM_COMPUTE_ASSERT(ctx_legacy.capabilities().cpu_info.has_neon() == true);
}
TEST_SUITE_END() // Context