From 20fca524baf99402f742ce38c538f2fd07d5fff9 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Mon, 7 Jun 2021 14:23:57 +0100 Subject: Create core library using high priority operators A smaller core library is created using a subset of the operators. Changed the structure of filelist.json in order to include more information about the kernels and make the selection easier. Resolves: COMPMID-4514 Change-Id: I079ca7d8e64346174eebdd13b834e1dd4dc36ca2 Signed-off-by: Michalis Spyrou Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5786 Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas Comments-Addressed: Arm Jenkins --- src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp | 18 ++++---- src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp | 2 +- src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp | 16 +++---- src/core/NEON/kernels/arm_gemm/gemm_int8.cpp | 14 +++--- src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp | 22 +++++----- src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp | 16 +++---- src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp | 10 ++--- .../NEON/kernels/arm_gemm/interleave_indirect.cpp | 16 +++---- .../kernels/sve_gemv_fp32_mla_8VL/generic.cpp | 2 +- .../kernels/sve_hybrid_bf16fp32_dot_6x4VL.hpp | 4 +- .../sve_hybrid_bf16fp32_dot_6x4VL/generic.cpp | 4 +- .../arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL.hpp | 4 +- .../kernels/sve_hybrid_fp16_mla_6x4VL/generic.cpp | 4 +- .../arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL.hpp | 4 +- .../kernels/sve_hybrid_fp32_mla_6x4VL/generic.cpp | 4 +- .../arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL.hpp | 4 +- .../kernels/sve_hybrid_fp32_mla_8x1VL/generic.cpp | 4 +- .../arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL.hpp | 4 +- .../kernels/sve_hybrid_s8qa_dot_4x4VL/generic.cpp | 4 +- .../arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL.hpp | 4 +- .../kernels/sve_hybrid_s8qs_dot_6x4VL/generic.cpp | 4 +- .../kernels/sve_hybrid_s8s32_dot_6x4VL.hpp | 4 +- .../kernels/sve_hybrid_s8s32_dot_6x4VL/generic.cpp | 4 +- .../arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL.hpp | 4 +- .../kernels/sve_hybrid_u8qa_dot_4x4VL/generic.cpp | 4 +- .../kernels/sve_hybrid_u8u32_dot_6x4VL.hpp | 4 +- .../kernels/sve_hybrid_u8u32_dot_6x4VL/generic.cpp | 4 +- .../kernels/sve_interleaved_bf16fp32_dot_8x3VL.hpp | 4 +- .../sve_interleaved_bf16fp32_dot_8x3VL/generic.cpp | 4 +- .../sve_interleaved_bf16fp32_mmla_8x3VL.hpp | 4 +- .../generic.cpp | 4 +- .../kernels/sve_interleaved_fp16_mla_8x3VL.hpp | 4 +- .../sve_interleaved_fp16_mla_8x3VL/generic.cpp | 4 +- .../kernels/sve_interleaved_fp32_mla_8x3VL.hpp | 4 +- .../sve_interleaved_fp32_mla_8x3VL/generic.cpp | 4 +- .../kernels/sve_interleaved_fp32_mmla_8x3VL.hpp | 4 +- .../sve_interleaved_fp32_mmla_8x3VL/generic.cpp | 4 +- .../kernels/sve_interleaved_s8s32_dot_8x3VL.hpp | 4 +- .../sve_interleaved_s8s32_dot_8x3VL/generic.cpp | 4 +- .../kernels/sve_interleaved_s8s32_mmla_8x3VL.hpp | 4 +- .../sve_interleaved_s8s32_mmla_8x3VL/generic.cpp | 4 +- .../kernels/sve_interleaved_u8u32_dot_8x3VL.hpp | 4 +- .../sve_interleaved_u8u32_dot_8x3VL/generic.cpp | 4 +- .../kernels/sve_interleaved_u8u32_mmla_8x3VL.hpp | 4 +- .../sve_interleaved_u8u32_mmla_8x3VL/generic.cpp | 4 +- .../kernels/sve_smallK_hybrid_fp32_mla_8x1VL.hpp | 4 +- .../sve_smallK_hybrid_fp32_mla_8x1VL/generic.cpp | 4 +- .../kernels/sve_smallK_hybrid_s8s32_dot_8x1VL.hpp | 4 +- .../sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp | 4 +- .../kernels/sve_smallK_hybrid_u8u32_dot_8x1VL.hpp | 4 +- .../sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp | 4 +- .../NEON/kernels/arm_gemm/mergeresults-sve.cpp | 41 ++++++++++++++++++ src/core/NEON/kernels/arm_gemm/mergeresults.cpp | 6 ++- src/core/NEON/kernels/arm_gemm/merges/list-sve.hpp | 28 ++++++++++++ src/core/NEON/kernels/arm_gemm/merges/list.hpp | 6 +-- .../arm_gemm/merges/sve_merge_fp16_3VLx8.hpp | 4 +- .../arm_gemm/merges/sve_merge_fp32_3VLx8.hpp | 4 +- .../arm_gemm/merges/sve_merge_s32_3VLx8.hpp | 4 +- .../arm_gemm/merges/sve_merge_u32_3VLx8.hpp | 4 +- src/core/NEON/kernels/arm_gemm/utils.hpp | 50 ++++++++-------------- 60 files changed, 242 insertions(+), 189 deletions(-) create mode 100644 src/core/NEON/kernels/arm_gemm/mergeresults-sve.cpp create mode 100644 src/core/NEON/kernels/arm_gemm/merges/list-sve.hpp (limited to 'src/core/NEON/kernels/arm_gemm') diff --git a/src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp b/src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp index d8134c4bb5..8244523696 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp @@ -44,26 +44,26 @@ namespace arm_gemm { static const GemmImplementation gemm_bf16_methods[] = { -#ifdef V8P6_BF -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_BF16 +#ifdef ARM_COMPUTE_ENABLE_SVE { // gemm_bf16_interleaved GemmMethod::GEMM_INTERLEAVED, "sve_interleaved_bf16fp32_mmla_8x3VL", - [](const GemmArgs &args) { return args._ci->has_sve() && (args._Ksize>4); }, + [](const GemmArgs &args) { return args._ci->has_svebf16() && (args._Ksize>4); }, [](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN; }, [](const GemmArgs &args) { return new GemmInterleaved(args); } }, { GemmMethod::GEMM_HYBRID, "sve_hybrid_bf16fp32_dot_6x4VL", - [](const GemmArgs &args) { return args._ci->has_sve(); }, + [](const GemmArgs &args) { return args._ci->has_svebf16(); }, [](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN && ((args._Ksize <= 128) && (args._Nsize <= 128)); }, [](const GemmArgs &args) { return new GemmHybridIndirect(args); } }, { // gemm_bf16_interleaved GemmMethod::GEMM_INTERLEAVED, "sve_interleaved_bf16fp32_dot_8x3VL", - [](const GemmArgs &args) { return args._ci->has_sve() && (args._Ksize>2); }, + [](const GemmArgs &args) { return args._ci->has_svebf16() && (args._Ksize>2); }, [](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN; }, [](const GemmArgs &args) { return new GemmInterleaved(args); } }, @@ -71,25 +71,25 @@ static const GemmImplementation gemm_bf16_methods[] = { // gemm_bf16_interleaved GemmMethod::GEMM_INTERLEAVED, "a64_interleaved_bf16fp32_mmla_8x12", - [](const GemmArgs &args) { return (args._Ksize>4); }, + [](const GemmArgs &args) { return args._ci->has_bf16() && (args._Ksize>4); }, nullptr, [](const GemmArgs &args) { return new GemmInterleaved(args); } }, { GemmMethod::GEMM_HYBRID, "a64_hybrid_bf16fp32_dot_6x16", - nullptr, + [](const GemmArgs &args) { return args._ci->has_bf16(); }, nullptr, [](const GemmArgs &args) { return new GemmHybridIndirect(args); } }, { // gemm_bf16_interleaved GemmMethod::GEMM_INTERLEAVED, "a64_interleaved_bf16fp32_dot_8x12", - [](const GemmArgs &args) { return (args._Ksize>2); }, + [](const GemmArgs &args) { return args._ci->has_bf16() && (args._Ksize>2); }, nullptr, [](const GemmArgs &args) { return new GemmInterleaved(args); } }, -#endif // V8P6_BF +#endif // ARM_COMPUTE_ENABLE_BF16 #ifdef __aarch64__ { GemmMethod::GEMM_INTERLEAVED, diff --git a/src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp b/src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp index 8e355c8f2c..b41d8dd097 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp @@ -43,7 +43,7 @@ namespace arm_gemm { static const GemmImplementation<__fp16, __fp16> gemm_fp16_methods[] = { -#if defined(__ARM_FEATURE_SVE) +#if defined(ARM_COMPUTE_ENABLE_SVE) { GemmMethod::GEMM_HYBRID, "sve_hybrid_fp16_mla_6x4VL", diff --git a/src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp b/src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp index d94814fb4c..1632e301ac 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp @@ -59,7 +59,7 @@ static const GemmImplementation gemm_fp32_methods[] = [](const GemmArgs &args) { return new GemvBatched(args); } }, #ifdef __aarch64__ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE { GemmMethod::GEMM_HYBRID, "sve_gemv_fp32_mla_8VL", @@ -77,17 +77,17 @@ static const GemmImplementation gemm_fp32_methods[] = }, // MMLA next due to higher throughput (SVE only) -#if defined(__ARM_FEATURE_SVE) && defined(MMLA_FP32) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVEF32MM) { GemmMethod::GEMM_INTERLEAVED, "sve_interleaved_fp32_mmla_8x3VL", - [](const GemmArgs &args) { return args._ci->has_sve() && (args._Ksize>4); }, + [](const GemmArgs &args) { return args._ci->has_svef32mm() && (args._Ksize>4); }, [](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN; }, [](const GemmArgs &args) { return new GemmInterleaved(args); } }, -#endif // __ARM_FEATURE_SVE && MMLA_FP32 +#endif // ARM_COMPUTE_ENABLE_SVE && ARM_COMPUTE_ENABLE_SVEF32MM -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE // SVE smallk / hybrid methods { GemmMethod::GEMM_HYBRID, @@ -110,7 +110,7 @@ static const GemmImplementation gemm_fp32_methods[] = [](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN && (((args._Ksize <= 256) && (args._Nsize <= 256)) || ((args._nmulti > 1) && ((args._Msize / args._maxthreads) < 8))); }, [](const GemmArgs &args) { return new GemmHybridIndirect(args); } }, -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE // Cortex-A35 specific kernel - use for any problem on A35, and never in any other cases. { GemmMethod::GEMM_INTERLEAVED, @@ -148,7 +148,7 @@ GemmImplementation::with_estimate( [](const GemmArgs &args) { return GemmHybridIndirect::estimate_cycles(args, cls_a64_hybrid_fp32_mla_6x16::get_performance_parameters(args._ci)); }, [](const GemmArgs &args) { return new GemmHybridIndirect(args); } ), -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE { GemmMethod::GEMM_INTERLEAVED, "sve_interleaved_fp32_mla_8x3VL", @@ -156,7 +156,7 @@ GemmImplementation::with_estimate( [](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN; }, [](const GemmArgs &args) { return new GemmInterleaved(args); } }, -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE GemmImplementation::with_estimate( GemmMethod::GEMM_INTERLEAVED, "a64_sgemm_8x12", diff --git a/src/core/NEON/kernels/arm_gemm/gemm_int8.cpp b/src/core/NEON/kernels/arm_gemm/gemm_int8.cpp index 60cf82f9c6..bfb3ca901f 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_int8.cpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_int8.cpp @@ -46,16 +46,16 @@ namespace arm_gemm { static const GemmImplementation gemm_s8_methods[] = { -#ifdef __ARM_FEATURE_SVE -#ifdef MMLA_INT8 +#ifdef ARM_COMPUTE_ENABLE_SVE +#ifdef ARM_COMPUTE_ENABLE_I8MM { GemmMethod::GEMM_INTERLEAVED, "sve_interleaved_s8s32_mmla_8x3VL", - [](const GemmArgs &args) { return args._ci->has_sve() && (args._Ksize>8); }, + [](const GemmArgs &args) { return args._ci->has_svei8mm() && (args._Ksize>8); }, [](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN; }, [](const GemmArgs &args) { return new GemmInterleaved(args); } }, -#endif +#endif // ARM_COMPUTE_ENABLE_I8MM { GemmMethod::GEMM_HYBRID, "sve_smallK_hybrid_s8s32_dot_8x1VL", @@ -78,15 +78,15 @@ static const GemmImplementation gemm_s8_methods[] = { [](const GemmArgs &args) { return new GemmInterleaved(args); } }, #endif // SVE -#ifdef MMLA_INT8 +#ifdef ARM_COMPUTE_ENABLE_I8MM { GemmMethod::GEMM_INTERLEAVED, "a64_interleaved_s8s32_mmla_8x12", - [](const GemmArgs &args) { return (args._Ksize>8); }, + [](const GemmArgs &args) { return args._ci->has_svei8mm() && (args._Ksize>8); }, nullptr, [](const GemmArgs &args) { return new GemmInterleaved(args); } }, -#endif +#endif // ARM_COMPUTE_ENABLE_I8MM { GemmMethod::GEMM_HYBRID, "a64_smallK_hybrid_s8s32_dot_8x4", diff --git a/src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp b/src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp index 094b6fdff4..985567f6f3 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp @@ -53,16 +53,16 @@ namespace arm_gemm { static const GemmImplementation gemm_qint8_methods[] = { -#ifdef __ARM_FEATURE_SVE -#ifdef MMLA_INT8 +#ifdef ARM_COMPUTE_ENABLE_SVE +#ifdef ARM_COMPUTE_ENABLE_I8MM { GemmMethod::GEMM_INTERLEAVED, "sve_interleaved_s8s32_mmla_8x3VL", - [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_sve() && (args._Ksize>8); }, + [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_svei8mm() && (args._Ksize>8); }, [](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; }, [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized(args, qp); } }, -#endif +#endif // ARM_COMPUTE_ENABLE_I8MM { GemmMethod::GEMM_HYBRID_QUANTIZED, "sve_smallK_hybrid_s8s32_dot_8x1VL", @@ -70,22 +70,22 @@ static const GemmImplementation gemm_qint8_methods [](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; }, [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridQuantized(args, qp); } }, -#ifdef SVE2 +#ifdef ARM_COMPUTE_ENABLE_SVE2 { GemmMethod::GEMM_HYBRID, "sve_hybrid_s8qs_dot_6x4VL", - [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sve() && quant_hybrid_symmetric(qp); }, + [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sve2() && quant_hybrid_symmetric(qp); }, [](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; }, [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect(args, qp); } }, { GemmMethod::GEMM_HYBRID, "sve_hybrid_s8qa_dot_4x4VL", - [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sve() && quant_hybrid_asymmetric(qp); }, + [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sve2() && quant_hybrid_asymmetric(qp); }, [](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; }, [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect(args, qp); } }, -#endif +#endif // ARM_COMPUTE_ENABLE_SVE2 { GemmMethod::GEMM_HYBRID, "sve_hybrid_s8s32_dot_6x4VL", @@ -101,15 +101,15 @@ static const GemmImplementation gemm_qint8_methods [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized(args, qp); } }, #endif // SVE -#ifdef MMLA_INT8 +#ifdef ARM_COMPUTE_ENABLE_I8MM { GemmMethod::GEMM_INTERLEAVED, "a64_interleaved_s8s32_mmla_8x12", - [](const GemmArgs &args, const Requantize32 &) { return (args._Ksize>8); }, + [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_i8mm() && (args._Ksize>8); }, nullptr, [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized(args, qp); } }, -#endif +#endif // ARM_COMPUTE_ENABLE_I8MM { GemmMethod::GEMM_HYBRID_QUANTIZED, "a64_smallK_hybrid_s8s32_dot_8x4", diff --git a/src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp b/src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp index be27b3a117..f3f2f335fd 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp @@ -50,12 +50,12 @@ namespace arm_gemm { static const GemmImplementation gemm_quint8_methods[] = { -#ifdef __ARM_FEATURE_SVE -#ifdef MMLA_INT8 +#ifdef ARM_COMPUTE_ENABLE_SVE +#ifdef ARM_COMPUTE_ENABLE_I8MM { GemmMethod::GEMM_INTERLEAVED, "sve_interleaved_u8u32_mmla_8x3VL", - [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_sve() && (args._Ksize>8); }, + [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_svei8mm() && (args._Ksize>8); }, [](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; }, [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized(args, qp); } }, @@ -67,15 +67,15 @@ static const GemmImplementation gemm_quint8_meth [](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; }, [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridQuantized(args, qp); } }, -#ifdef SVE2 // Requantizing kernels include some SVE2 only instructions (SQRDMULH, SRSHL) +#ifdef ARM_COMPUTE_ENABLE_SVE2 // Requantizing kernels include some SVE2 only instructions (SQRDMULH, SRSHL) { GemmMethod::GEMM_HYBRID, "sve_hybrid_u8qa_dot_4x4VL", - [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sve() && quant_hybrid_asymmetric(qp); }, + [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sve2() && quant_hybrid_asymmetric(qp); }, [](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; }, [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect(args, qp); } }, -#endif +#endif // ARM_COMPUTE_ENABLE_SVE2 { GemmMethod::GEMM_HYBRID, "sve_hybrid_u8u32_dot_6x4VL", @@ -91,11 +91,11 @@ static const GemmImplementation gemm_quint8_meth [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized(args, qp); } }, #endif -#ifdef MMLA_INT8 +#ifdef ARM_COMPUTE_ENABLE_I8MM { GemmMethod::GEMM_INTERLEAVED, "a64_interleaved_u8u32_mmla_8x12", - [](const GemmArgs &args, const Requantize32 &) { return (args._Ksize>8); }, + [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_i8mm() && (args._Ksize>8); }, [](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; }, [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized(args, qp); } }, diff --git a/src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp b/src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp index 4de3d2b18a..4c05fd1b73 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp @@ -46,12 +46,12 @@ namespace arm_gemm { static const GemmImplementation gemm_u8_methods[] = { -#ifdef __ARM_FEATURE_SVE -#ifdef MMLA_INT8 +#ifdef ARM_COMPUTE_ENABLE_SVE +#ifdef ARM_COMPUTE_ENABLE_I8MM { GemmMethod::GEMM_INTERLEAVED, "sve_interleaved_u8u32_mmla_8x3VL", - [](const GemmArgs &args) { return args._ci->has_sve() && (args._Ksize>8); }, + [](const GemmArgs &args) { return args._ci->has_svei8mm() && (args._Ksize>8); }, [](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN; }, [](const GemmArgs &args) { return new GemmInterleaved(args); } }, @@ -78,11 +78,11 @@ static const GemmImplementation gemm_u8_methods[] = { [](const GemmArgs &args) { return new GemmInterleaved(args); } }, #endif -#ifdef MMLA_INT8 +#ifdef ARM_COMPUTE_ENABLE_I8MM { GemmMethod::GEMM_INTERLEAVED, "a64_interleaved_u8u32_mmla_8x12", - [](const GemmArgs &args) { return (args._Ksize>8); }, + [](const GemmArgs &args) { return args._ci->has_i8mm() && (args._Ksize>8); }, nullptr, [](const GemmArgs &args) { return new GemmInterleaved(args); } }, diff --git a/src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp b/src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp index 0d56b46e19..a6b1269927 100644 --- a/src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp +++ b/src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp @@ -320,12 +320,12 @@ template void IndirectInterleave<8, 1, VLType::None>(float *, const float * cons template void ConvolutionInterleave<8, 1, VLType::None>(float *, const float *, size_t, const convolver &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void Interleave<8, 1, VLType::None>(float *, const float *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); -#if defined(__ARM_FEATURE_SVE) && defined(MMLA_FP32) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVEF32MM) /* FMMLA */ template void IndirectInterleave<8, 2, VLType::None>(float *, const float * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void ConvolutionInterleave<8, 2, VLType::None>(float *, const float *, size_t, const convolver &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void Interleave<8, 2, VLType::None>(float *, const float *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); -#endif // SVE && MMLA_FP32 +#endif // ARM_COMPUTE_ENABLE_SVE && ARM_COMPUTE_ENABLE_SVEF32MM /* FP16 */ #if defined(FP16_KERNELS) || defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) @@ -340,7 +340,7 @@ template void Interleave<8, 1, VLType::None>(float *, const __fp16 *, size_t, un /* BF16 */ /* Arm® Neon™/SVE BFDOT */ -#ifdef V8P6_BF +#ifdef ARM_COMPUTE_ENABLE_BF16 template void IndirectInterleave<8, 2, VLType::None>(bfloat16 *, const bfloat16 * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void ConvolutionInterleave<8, 2, VLType::None>(bfloat16 *, const bfloat16 *, size_t, const convolver &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void Interleave<8, 2, VLType::None>(bfloat16 *, const bfloat16 *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); @@ -348,7 +348,7 @@ template void Interleave<8, 2, VLType::None>(bfloat16 *, const bfloat16 *, size_ template void IndirectInterleave<8, 4, VLType::None>(bfloat16 *, const bfloat16 * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void ConvolutionInterleave<8, 4, VLType::None>(bfloat16 *, const bfloat16 *, size_t, const convolver &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void Interleave<8, 4, VLType::None>(bfloat16 *, const bfloat16 *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); -#endif // V8P6_BF +#endif // ARM_COMPUTE_ENABLE_BF16 /* Arm® Neon™/SVE using FP32 kernel */ template void IndirectInterleave<8, 1, VLType::None>(float *, const bfloat16 * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); @@ -375,12 +375,12 @@ template void IndirectInterleave<8, 4, VLType::None>(int8_t *, const int8_t * co template void ConvolutionInterleave<8, 4, VLType::None>(int8_t *, const int8_t *, size_t, const convolver &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void Interleave<8, 4, VLType::None>(int8_t *, const int8_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); -#ifdef MMLA_INT8 +#ifdef ARM_COMPUTE_ENABLE_I8MM /* MMLA SMMLA (height 8, block 8) */ template void IndirectInterleave<8, 8, VLType::None>(int8_t *, const int8_t * const * const *, unsigned int, unsigned int, unsigned int y0, unsigned int ymax, unsigned int k0, unsigned int kmax, bool, int32_t); template void ConvolutionInterleave<8, 8, VLType::None>(int8_t *, const int8_t *, size_t, const convolver &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void Interleave<8, 8, VLType::None>(int8_t *, const int8_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); -#endif // MMLA_INT8 +#endif // ARM_COMPUTE_ENABLE_I8MM /* Arm® Neon™ SDOT (height 8, block 1) */ template void IndirectInterleave<8, 1, VLType::None>(int16_t *, const int8_t * const * const *, unsigned int, unsigned int, unsigned int y0, unsigned int ymax, unsigned int k0, unsigned int kmax, bool, int32_t); @@ -397,12 +397,12 @@ template void IndirectInterleave<8, 4, VLType::None>(uint8_t *, const uint8_t * template void ConvolutionInterleave<8, 4, VLType::None>(uint8_t *, const uint8_t *, size_t, const convolver &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void Interleave<8, 4, VLType::None>(uint8_t *, const uint8_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); -#ifdef MMLA_INT8 +#ifdef ARM_COMPUTE_ENABLE_I8MM /* MMLA SMMLA (height 8, block 8) */ template void IndirectInterleave<8, 8, VLType::None>(uint8_t *, const uint8_t * const * const *, unsigned int, unsigned int, unsigned int y0, unsigned int ymax, unsigned int k0, unsigned int kmax, bool, int32_t); template void ConvolutionInterleave<8, 8, VLType::None>(uint8_t *, const uint8_t *, size_t, const convolver &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void Interleave<8, 8, VLType::None>(uint8_t *, const uint8_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); -#endif // MMLA_INT8 +#endif // ARM_COMPUTE_ENABLE_I8MM /* Arm® Neon™ 16-bit (height 8, block 1) */ template void IndirectInterleave<8, 1, VLType::None>(uint16_t *, const uint8_t * const * const *, unsigned int, unsigned int, unsigned int y0, unsigned int ymax, unsigned int k0, unsigned int kmax, bool, int32_t); diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_gemv_fp32_mla_8VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_gemv_fp32_mla_8VL/generic.cpp index c62e31936c..78387de90c 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_gemv_fp32_mla_8VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_gemv_fp32_mla_8VL/generic.cpp @@ -21,7 +21,7 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "arm_gemm.hpp" #include "../../utils.hpp" diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL.hpp index 066bff4602..7b0282fa32 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL.hpp @@ -22,7 +22,7 @@ * IN THE SOFTWARE. */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../std_transforms_sve.hpp" #include "../bfloat.hpp" @@ -81,4 +81,4 @@ public: } // namespace arm_gemm #undef ARGLIST -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL/generic.cpp index 1233a98531..34a657f64f 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL/generic.cpp @@ -21,7 +21,7 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "arm_gemm.hpp" #include "../../utils.hpp" @@ -2153,4 +2153,4 @@ void sve_hybrid_bf16fp32_dot_6x4VL ( } } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL.hpp index 5c8563952f..f98ccdc7d3 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL.hpp @@ -22,7 +22,7 @@ * IN THE SOFTWARE. */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../std_transforms_sve.hpp" @@ -80,4 +80,4 @@ public: } // namespace arm_gemm #undef ARGLIST -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/generic.cpp index 7cc03bbfb5..c151179a1f 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/generic.cpp @@ -21,7 +21,7 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "arm_gemm.hpp" #include "../../utils.hpp" @@ -3094,4 +3094,4 @@ void sve_hybrid_fp16_mla_6x4VL ( } } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL.hpp index b696e73637..4c0a3a11e0 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL.hpp @@ -22,7 +22,7 @@ * IN THE SOFTWARE. */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../std_transforms_sve.hpp" @@ -80,4 +80,4 @@ public: } // namespace arm_gemm #undef ARGLIST -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL/generic.cpp index dee9a107ff..25d65826b9 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL/generic.cpp @@ -21,7 +21,7 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "arm_gemm.hpp" #include "../../utils.hpp" @@ -2152,4 +2152,4 @@ void sve_hybrid_fp32_mla_6x4VL ( } } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL.hpp index 2273d97d5f..87f063d224 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL.hpp @@ -22,7 +22,7 @@ * IN THE SOFTWARE. */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../std_transforms_sve.hpp" @@ -80,4 +80,4 @@ public: } // namespace arm_gemm #undef ARGLIST -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL/generic.cpp index 863325f7f5..943e0ac148 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL/generic.cpp @@ -21,7 +21,7 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "arm_gemm.hpp" #include "../../utils.hpp" @@ -1616,4 +1616,4 @@ void sve_hybrid_fp32_mla_8x1VL ( } } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL.hpp index bc93ced25b..c278b3fc6b 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL.hpp @@ -22,7 +22,7 @@ * IN THE SOFTWARE. */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../std_transforms_sve.hpp" @@ -80,4 +80,4 @@ public: } // namespace arm_gemm #undef ARGLIST -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL/generic.cpp index 50b9ba524d..8a7465ba6b 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL/generic.cpp @@ -21,7 +21,7 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "arm_gemm.hpp" #include "../../utils.hpp" @@ -1529,4 +1529,4 @@ void sve_hybrid_s8qa_dot_4x4VL ( } } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL.hpp index 61927236ad..57056b4c2a 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL.hpp @@ -22,7 +22,7 @@ * IN THE SOFTWARE. */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../std_transforms_sve.hpp" @@ -80,4 +80,4 @@ public: } // namespace arm_gemm #undef ARGLIST -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL/generic.cpp index f901a814f9..0328c107e2 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL/generic.cpp @@ -21,7 +21,7 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "arm_gemm.hpp" #include "../../utils.hpp" @@ -2665,4 +2665,4 @@ void sve_hybrid_s8qs_dot_6x4VL ( } } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL.hpp index b2c376196f..37258978d3 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL.hpp @@ -22,7 +22,7 @@ * IN THE SOFTWARE. */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../std_transforms_sve.hpp" @@ -80,4 +80,4 @@ public: } // namespace arm_gemm #undef ARGLIST -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL/generic.cpp index 8862b3665a..9cddee941e 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL/generic.cpp @@ -21,7 +21,7 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "arm_gemm.hpp" #include "../../utils.hpp" @@ -1819,4 +1819,4 @@ void sve_hybrid_s8s32_dot_6x4VL ( } } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL.hpp index cfb8adfc87..3de8d178cd 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL.hpp @@ -22,7 +22,7 @@ * IN THE SOFTWARE. */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../std_transforms_sve.hpp" @@ -80,4 +80,4 @@ public: } // namespace arm_gemm #undef ARGLIST -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL/generic.cpp index 373d82930b..0bfc28776f 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL/generic.cpp @@ -21,7 +21,7 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "arm_gemm.hpp" #include "../../utils.hpp" @@ -1529,4 +1529,4 @@ void sve_hybrid_u8qa_dot_4x4VL ( } } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL.hpp index 4ea1d17c4e..a2883bfa30 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL.hpp @@ -22,7 +22,7 @@ * IN THE SOFTWARE. */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../std_transforms_sve.hpp" @@ -80,4 +80,4 @@ public: } // namespace arm_gemm #undef ARGLIST -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL/generic.cpp index 97f6665d85..413bc65288 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL/generic.cpp @@ -21,7 +21,7 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "arm_gemm.hpp" #include "../../utils.hpp" @@ -1819,4 +1819,4 @@ void sve_hybrid_u8u32_dot_6x4VL ( } } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL.hpp index 12bb758b68..d717b745c9 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL.hpp @@ -23,7 +23,7 @@ */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../bfloat.hpp" #include "../std_transforms_sve.hpp" @@ -69,4 +69,4 @@ public: } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL/generic.cpp index adee900337..4f774b133f 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL/generic.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../../bfloat.hpp" #include "../../asmlib.hpp" @@ -326,4 +326,4 @@ void sve_interleaved_bf16fp32_dot_8x3VL(const bfloat16 *Apanel, const bfloat16 * } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL.hpp index 2889dd7f0f..b7fc515341 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL.hpp @@ -23,7 +23,7 @@ */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../bfloat.hpp" #include "../std_transforms_sve.hpp" @@ -69,4 +69,4 @@ public: } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL/generic.cpp index e43404e608..c720942140 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL/generic.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../../bfloat.hpp" #include "../../asmlib.hpp" @@ -394,4 +394,4 @@ void sve_interleaved_bf16fp32_mmla_8x3VL(const bfloat16 *Apanel, const bfloat16 } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL.hpp index eb946d9dfa..b797b8bec1 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL.hpp @@ -23,7 +23,7 @@ */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../std_transforms_sve.hpp" @@ -69,4 +69,4 @@ public: } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL/generic.cpp index 46b8770409..0f1937acc5 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL/generic.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../../asmlib.hpp" @@ -316,4 +316,4 @@ void sve_interleaved_fp16_mla_8x3VL(const __fp16 *Apanel, const __fp16 *Bpanel, } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL.hpp index b84ba83b6a..f4bb809fe8 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL.hpp @@ -23,7 +23,7 @@ */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../std_transforms_sve.hpp" @@ -69,4 +69,4 @@ public: } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL/generic.cpp index 1e05a308b5..10feaa130b 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL/generic.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../../asmlib.hpp" @@ -325,4 +325,4 @@ void sve_interleaved_fp32_mla_8x3VL(const float *Apanel, const float *Bpanel, fl } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL.hpp index 96216960ff..a355262fe2 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL.hpp @@ -23,7 +23,7 @@ */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../std_transforms_sve.hpp" @@ -69,4 +69,4 @@ public: } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL/generic.cpp index 39daf0ff20..a985a91b90 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL/generic.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include "../../asmlib.hpp" @@ -394,4 +394,4 @@ void sve_interleaved_fp32_mmla_8x3VL(const float *Apanel, const float *Bpanel, f } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL.hpp index 3e16915cd4..aa6d9e7ec8 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL.hpp @@ -23,7 +23,7 @@ */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include #include "../std_transforms_sve.hpp" @@ -70,4 +70,4 @@ public: } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL/generic.cpp index 674c2400bf..01c0f8cddc 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL/generic.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include #include "../../asmlib.hpp" @@ -326,4 +326,4 @@ void sve_interleaved_s8s32_dot_8x3VL(const int8_t *Apanel, const int8_t *Bpanel, } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL.hpp index 02b3451c54..671946b262 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL.hpp @@ -23,7 +23,7 @@ */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include #include "../std_transforms_sve.hpp" @@ -70,4 +70,4 @@ public: } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL/generic.cpp index 578aa01732..9420210aae 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL/generic.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include #include "../../asmlib.hpp" @@ -394,4 +394,4 @@ void sve_interleaved_s8s32_mmla_8x3VL(const int8_t *Apanel, const int8_t *Bpanel } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL.hpp index 832a224199..7d39485164 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL.hpp @@ -23,7 +23,7 @@ */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include #include "../std_transforms_sve.hpp" @@ -70,4 +70,4 @@ public: } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL/generic.cpp index 891869c767..2139bab69d 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL/generic.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include #include "../../asmlib.hpp" @@ -326,4 +326,4 @@ void sve_interleaved_u8u32_dot_8x3VL(const uint8_t *Apanel, const uint8_t *Bpane } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL.hpp index 4fdaab84bd..ca9cadd6d7 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL.hpp @@ -23,7 +23,7 @@ */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include #include "../std_transforms_sve.hpp" @@ -70,4 +70,4 @@ public: } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL/generic.cpp index fa08a9d091..d42385789c 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL/generic.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include #include "../../asmlib.hpp" @@ -394,4 +394,4 @@ void sve_interleaved_u8u32_mmla_8x3VL(const uint8_t *Apanel, const uint8_t *Bpan } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL.hpp index 2097d76a54..ab225589e1 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL.hpp @@ -23,7 +23,7 @@ */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE @@ -85,4 +85,4 @@ public: } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL/generic.cpp index e07cfa8218..cdad98c5f1 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL/generic.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include @@ -18804,4 +18804,4 @@ void sve_smallK_hybrid_fp32_mla_8x1VL(const float *A, int lda, const float *B, f } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL.hpp index e50c05ba39..e735567e95 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL.hpp @@ -23,7 +23,7 @@ */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include @@ -85,4 +85,4 @@ public: } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp index 98004e98a5..cd01411722 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include @@ -8968,4 +8968,4 @@ void sve_smallK_hybrid_s8s32_dot_8x1VL(const int8_t *A, int lda, const int8_t *B } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL.hpp index 60184be043..25dd10019d 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL.hpp @@ -23,7 +23,7 @@ */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include @@ -85,4 +85,4 @@ public: } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp index 6a8553216b..99a287b4f5 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE #include @@ -8968,4 +8968,4 @@ void sve_smallK_hybrid_u8u32_dot_8x1VL(const uint8_t *A, int lda, const uint8_t } // namespace arm_gemm -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/mergeresults-sve.cpp b/src/core/NEON/kernels/arm_gemm/mergeresults-sve.cpp new file mode 100644 index 0000000000..77d86b7dd8 --- /dev/null +++ b/src/core/NEON/kernels/arm_gemm/mergeresults-sve.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* As some of the merges need these headers, but are all included in the + * arm_gemm namespace, put these headers here. */ +#include + +#include + +#include "arm_gemm.hpp" +#include "asmlib.hpp" +#include "utils.hpp" + +#include "mergeresults.hpp" + +namespace arm_gemm { + +#include "merges/list-sve.hpp" + +} // namespace arm_gemm \ No newline at end of file diff --git a/src/core/NEON/kernels/arm_gemm/mergeresults.cpp b/src/core/NEON/kernels/arm_gemm/mergeresults.cpp index 17566db375..bbfe8f23d9 100644 --- a/src/core/NEON/kernels/arm_gemm/mergeresults.cpp +++ b/src/core/NEON/kernels/arm_gemm/mergeresults.cpp @@ -37,9 +37,13 @@ namespace arm_gemm { template void MergeResults(Tout * out, const Tin * in, int ldc, int y0, int ymax, int x0, int xmax, const Tout *bias, Activation act, bool append) { + // NOTE: The following code is disabled to avoid calling get_vector_length(), so templated MergeResults will not + // be correct for SVE cases. This is OK as we have specialisations for all needed SVE cases anyway. + // // For SVE cases, multiply the width up by the vector length. // Use the *input* type to determine this, since this will be what the kernel operated on. - const int width = twidth * (sve ? get_vector_length() : 1); + // const int width = twidth * (sve ? get_vector_length() : 1); + const int width = twidth; const int full_y_blocks = (ymax - y0) / height; const int y_remainder = (ymax - y0) % height; diff --git a/src/core/NEON/kernels/arm_gemm/merges/list-sve.hpp b/src/core/NEON/kernels/arm_gemm/merges/list-sve.hpp new file mode 100644 index 0000000000..aded4b3b8c --- /dev/null +++ b/src/core/NEON/kernels/arm_gemm/merges/list-sve.hpp @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "sve_merge_fp16_3VLx8.hpp" +#include "sve_merge_fp32_3VLx8.hpp" +#include "sve_merge_s32_3VLx8.hpp" +#include "sve_merge_u32_3VLx8.hpp" \ No newline at end of file diff --git a/src/core/NEON/kernels/arm_gemm/merges/list.hpp b/src/core/NEON/kernels/arm_gemm/merges/list.hpp index 825c2fd020..dae874ef94 100644 --- a/src/core/NEON/kernels/arm_gemm/merges/list.hpp +++ b/src/core/NEON/kernels/arm_gemm/merges/list.hpp @@ -27,8 +27,4 @@ #include "a64_merge_s32_12x8.hpp" #include "a64_merge_s32_4x4.hpp" #include "a64_merge_u32_12x8.hpp" -#include "a64_merge_u32_4x4.hpp" -#include "sve_merge_fp16_3VLx8.hpp" -#include "sve_merge_fp32_3VLx8.hpp" -#include "sve_merge_s32_3VLx8.hpp" -#include "sve_merge_u32_3VLx8.hpp" +#include "a64_merge_u32_4x4.hpp" \ No newline at end of file diff --git a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp16_3VLx8.hpp b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp16_3VLx8.hpp index cf1d10329b..4da32b459c 100644 --- a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp16_3VLx8.hpp +++ b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp16_3VLx8.hpp @@ -23,7 +23,7 @@ */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE template<> void MergeResults<3, 8, true>(__fp16 *out, const __fp16 *in, const int ldout, const int y0, const int ymax, const int x0, const int xmax, const __fp16 *bias, Activation act, bool append) @@ -1872,4 +1872,4 @@ void MergeResults<3, 8, true>(__fp16 *out, const __fp16 *in, const int ldout, co } } -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp32_3VLx8.hpp b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp32_3VLx8.hpp index b0d10c085d..5505f1efe4 100644 --- a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp32_3VLx8.hpp +++ b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp32_3VLx8.hpp @@ -23,7 +23,7 @@ */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE template<> void MergeResults<3, 8, true>(float *out, const float *in, const int ldout, const int y0, const int ymax, const int x0, const int xmax, const float *bias, Activation act, bool append) @@ -1872,4 +1872,4 @@ void MergeResults<3, 8, true>(float *out, const float *in, const int ldout, cons } } -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_s32_3VLx8.hpp b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_s32_3VLx8.hpp index 34b6fe3ef5..c009881254 100644 --- a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_s32_3VLx8.hpp +++ b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_s32_3VLx8.hpp @@ -23,7 +23,7 @@ */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE template<> void MergeResults<3, 8, true>(int32_t *out, const int32_t *in, const int ldout, const int y0, const int ymax, const int x0, const int xmax, const int32_t *bias, Activation , bool append) @@ -1394,4 +1394,4 @@ void MergeResults<3, 8, true>(int32_t *out, const int32_t *in, const int ldout, } } -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_u32_3VLx8.hpp b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_u32_3VLx8.hpp index c4b2bb56d6..e992f6722c 100644 --- a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_u32_3VLx8.hpp +++ b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_u32_3VLx8.hpp @@ -23,7 +23,7 @@ */ #pragma once -#ifdef __ARM_FEATURE_SVE +#ifdef ARM_COMPUTE_ENABLE_SVE template<> void MergeResults<3, 8, true>(uint32_t *out, const uint32_t *in, const int ldout, const int y0, const int ymax, const int x0, const int xmax, const uint32_t *bias, Activation , bool append) @@ -1394,4 +1394,4 @@ void MergeResults<3, 8, true>(uint32_t *out, const uint32_t *in, const int ldout } } -#endif // __ARM_FEATURE_SVE +#endif // ARM_COMPUTE_ENABLE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/utils.hpp b/src/core/NEON/kernels/arm_gemm/utils.hpp index 1269ef62a6..e648ce2fb5 100644 --- a/src/core/NEON/kernels/arm_gemm/utils.hpp +++ b/src/core/NEON/kernels/arm_gemm/utils.hpp @@ -141,52 +141,36 @@ struct IndirectInputArg { }; namespace utils { -namespace { - -#ifdef __ARM_FEATURE_SVE -template -inline unsigned long get_vector_length_sz() { - unsigned long v; - - __asm ( - "cntb %0" - : "=r" (v) - ); - - return v / sz; -} - -#define VEC_LEN_SPEC(sz, opcode) template <> inline unsigned long get_vector_length_sz() { unsigned long v; __asm ( opcode " %0" : "=r" (v)); return v; } - -VEC_LEN_SPEC(8, "cntd") -VEC_LEN_SPEC(4, "cntw") -VEC_LEN_SPEC(2, "cnth") -VEC_LEN_SPEC(1, "cntb") -#endif - -} // anonymous namespace - template inline unsigned long get_vector_length() { -#ifdef __ARM_FEATURE_SVE - return get_vector_length_sz(); -#else +#if defined(ARM_COMPUTE_ENABLE_SVE) + uint64_t vl; + + __asm __volatile ( + ".inst 0x0420e3e0\n" // CNTB X0, ALL, MUL #1 + "mov %0, X0\n" + : "=r" (vl) + : + : "x0" + ); + + return vl / sizeof(T); +#else // !defined(ARM_COMPUTE_ENABLE_SVE) return 16 / sizeof(T); -#endif +#endif // defined(ARM_COMPUTE_ENABLE_SVE) } template inline unsigned long get_vector_length(VLType vl_type) { switch (vl_type) { -#ifdef __ARM_FEATURE_SVE +#if defined(ARM_COMPUTE_ENABLE_SVE) case VLType::SVE: - return get_vector_length_sz(); -#endif + return get_vector_length(); +#endif // defined(ARM_COMPUTE_ENABLE_SVE) default: return 16 / sizeof(T); } } - } // utils namespace } // arm_gemm namespace -- cgit v1.2.1