aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/arm_gemm
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2021-06-07 14:23:57 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2021-06-23 12:25:50 +0000
commit20fca524baf99402f742ce38c538f2fd07d5fff9 (patch)
treeb63d98383d1ba22bb3ca59d393e4ab9d47a9c762 /src/core/NEON/kernels/arm_gemm
parent1d359279e22874121def2ce4bfdb633d94ea5ade (diff)
downloadComputeLibrary-20fca524baf99402f742ce38c538f2fd07d5fff9.tar.gz
Create core library using high priority operators
A smaller core library is created using a subset of the operators. Changed the structure of filelist.json in order to include more information about the kernels and make the selection easier. Resolves: COMPMID-4514 Change-Id: I079ca7d8e64346174eebdd13b834e1dd4dc36ca2 Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5786 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/arm_gemm')
-rw-r--r--src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp18
-rw-r--r--src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp2
-rw-r--r--src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp16
-rw-r--r--src/core/NEON/kernels/arm_gemm/gemm_int8.cpp14
-rw-r--r--src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp22
-rw-r--r--src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp16
-rw-r--r--src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp10
-rw-r--r--src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp16
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_gemv_fp32_mla_8VL/generic.cpp2
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/mergeresults-sve.cpp41
-rw-r--r--src/core/NEON/kernels/arm_gemm/mergeresults.cpp6
-rw-r--r--src/core/NEON/kernels/arm_gemm/merges/list-sve.hpp28
-rw-r--r--src/core/NEON/kernels/arm_gemm/merges/list.hpp6
-rw-r--r--src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp16_3VLx8.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp32_3VLx8.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/merges/sve_merge_s32_3VLx8.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/merges/sve_merge_u32_3VLx8.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/utils.hpp50
60 files changed, 242 insertions, 189 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp b/src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp
index d8134c4bb5..8244523696 100644
--- a/src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp
+++ b/src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp
@@ -44,26 +44,26 @@ namespace arm_gemm {
static const GemmImplementation<bfloat16, float> gemm_bf16_methods[] =
{
-#ifdef V8P6_BF
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_BF16
+#ifdef ARM_COMPUTE_ENABLE_SVE
{ // gemm_bf16_interleaved
GemmMethod::GEMM_INTERLEAVED,
"sve_interleaved_bf16fp32_mmla_8x3VL",
- [](const GemmArgs &args) { return args._ci->has_sve() && (args._Ksize>4); },
+ [](const GemmArgs &args) { return args._ci->has_svebf16() && (args._Ksize>4); },
[](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args) { return new GemmInterleaved<cls_sve_interleaved_bf16fp32_mmla_8x3VL, bfloat16, float>(args); }
},
{
GemmMethod::GEMM_HYBRID,
"sve_hybrid_bf16fp32_dot_6x4VL",
- [](const GemmArgs &args) { return args._ci->has_sve(); },
+ [](const GemmArgs &args) { return args._ci->has_svebf16(); },
[](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN && ((args._Ksize <= 128) && (args._Nsize <= 128)); },
[](const GemmArgs &args) { return new GemmHybridIndirect<cls_sve_hybrid_bf16fp32_dot_6x4VL, bfloat16, float>(args); }
},
{ // gemm_bf16_interleaved
GemmMethod::GEMM_INTERLEAVED,
"sve_interleaved_bf16fp32_dot_8x3VL",
- [](const GemmArgs &args) { return args._ci->has_sve() && (args._Ksize>2); },
+ [](const GemmArgs &args) { return args._ci->has_svebf16() && (args._Ksize>2); },
[](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args) { return new GemmInterleaved<cls_sve_interleaved_bf16fp32_dot_8x3VL, bfloat16, float>(args); }
},
@@ -71,25 +71,25 @@ static const GemmImplementation<bfloat16, float> gemm_bf16_methods[] =
{ // gemm_bf16_interleaved
GemmMethod::GEMM_INTERLEAVED,
"a64_interleaved_bf16fp32_mmla_8x12",
- [](const GemmArgs &args) { return (args._Ksize>4); },
+ [](const GemmArgs &args) { return args._ci->has_bf16() && (args._Ksize>4); },
nullptr,
[](const GemmArgs &args) { return new GemmInterleaved<cls_a64_interleaved_bf16fp32_mmla_8x12, bfloat16, float>(args); }
},
{
GemmMethod::GEMM_HYBRID,
"a64_hybrid_bf16fp32_dot_6x16",
- nullptr,
+ [](const GemmArgs &args) { return args._ci->has_bf16(); },
nullptr,
[](const GemmArgs &args) { return new GemmHybridIndirect<cls_a64_hybrid_bf16fp32_dot_6x16, bfloat16, float>(args); }
},
{ // gemm_bf16_interleaved
GemmMethod::GEMM_INTERLEAVED,
"a64_interleaved_bf16fp32_dot_8x12",
- [](const GemmArgs &args) { return (args._Ksize>2); },
+ [](const GemmArgs &args) { return args._ci->has_bf16() && (args._Ksize>2); },
nullptr,
[](const GemmArgs &args) { return new GemmInterleaved<cls_a64_interleaved_bf16fp32_dot_8x12, bfloat16, float>(args); }
},
-#endif // V8P6_BF
+#endif // ARM_COMPUTE_ENABLE_BF16
#ifdef __aarch64__
{
GemmMethod::GEMM_INTERLEAVED,
diff --git a/src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp b/src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp
index 8e355c8f2c..b41d8dd097 100644
--- a/src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp
+++ b/src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp
@@ -43,7 +43,7 @@
namespace arm_gemm {
static const GemmImplementation<__fp16, __fp16> gemm_fp16_methods[] = {
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
{
GemmMethod::GEMM_HYBRID,
"sve_hybrid_fp16_mla_6x4VL",
diff --git a/src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp b/src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp
index d94814fb4c..1632e301ac 100644
--- a/src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp
+++ b/src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp
@@ -59,7 +59,7 @@ static const GemmImplementation<float, float> gemm_fp32_methods[] =
[](const GemmArgs &args) { return new GemvBatched<float, float>(args); }
},
#ifdef __aarch64__
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
{
GemmMethod::GEMM_HYBRID,
"sve_gemv_fp32_mla_8VL",
@@ -77,17 +77,17 @@ static const GemmImplementation<float, float> gemm_fp32_methods[] =
},
// MMLA next due to higher throughput (SVE only)
-#if defined(__ARM_FEATURE_SVE) && defined(MMLA_FP32)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVEF32MM)
{
GemmMethod::GEMM_INTERLEAVED,
"sve_interleaved_fp32_mmla_8x3VL",
- [](const GemmArgs &args) { return args._ci->has_sve() && (args._Ksize>4); },
+ [](const GemmArgs &args) { return args._ci->has_svef32mm() && (args._Ksize>4); },
[](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args) { return new GemmInterleaved<cls_sve_interleaved_fp32_mmla_8x3VL, float, float>(args); }
},
-#endif // __ARM_FEATURE_SVE && MMLA_FP32
+#endif // ARM_COMPUTE_ENABLE_SVE && ARM_COMPUTE_ENABLE_SVEF32MM
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
// SVE smallk / hybrid methods
{
GemmMethod::GEMM_HYBRID,
@@ -110,7 +110,7 @@ static const GemmImplementation<float, float> gemm_fp32_methods[] =
[](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN && (((args._Ksize <= 256) && (args._Nsize <= 256)) || ((args._nmulti > 1) && ((args._Msize / args._maxthreads) < 8))); },
[](const GemmArgs &args) { return new GemmHybridIndirect<cls_sve_hybrid_fp32_mla_6x4VL, float, float>(args); }
},
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
// Cortex-A35 specific kernel - use for any problem on A35, and never in any other cases.
{
GemmMethod::GEMM_INTERLEAVED,
@@ -148,7 +148,7 @@ GemmImplementation<float, float>::with_estimate(
[](const GemmArgs &args) { return GemmHybridIndirect<cls_a64_hybrid_fp32_mla_6x16, float, float>::estimate_cycles(args, cls_a64_hybrid_fp32_mla_6x16::get_performance_parameters(args._ci)); },
[](const GemmArgs &args) { return new GemmHybridIndirect<cls_a64_hybrid_fp32_mla_6x16, float, float>(args); }
),
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
{
GemmMethod::GEMM_INTERLEAVED,
"sve_interleaved_fp32_mla_8x3VL",
@@ -156,7 +156,7 @@ GemmImplementation<float, float>::with_estimate(
[](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args) { return new GemmInterleaved<cls_sve_interleaved_fp32_mla_8x3VL, float, float>(args); }
},
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
GemmImplementation<float, float>::with_estimate(
GemmMethod::GEMM_INTERLEAVED,
"a64_sgemm_8x12",
diff --git a/src/core/NEON/kernels/arm_gemm/gemm_int8.cpp b/src/core/NEON/kernels/arm_gemm/gemm_int8.cpp
index 60cf82f9c6..bfb3ca901f 100644
--- a/src/core/NEON/kernels/arm_gemm/gemm_int8.cpp
+++ b/src/core/NEON/kernels/arm_gemm/gemm_int8.cpp
@@ -46,16 +46,16 @@
namespace arm_gemm {
static const GemmImplementation<int8_t, int32_t> gemm_s8_methods[] = {
-#ifdef __ARM_FEATURE_SVE
-#ifdef MMLA_INT8
+#ifdef ARM_COMPUTE_ENABLE_SVE
+#ifdef ARM_COMPUTE_ENABLE_I8MM
{
GemmMethod::GEMM_INTERLEAVED,
"sve_interleaved_s8s32_mmla_8x3VL",
- [](const GemmArgs &args) { return args._ci->has_sve() && (args._Ksize>8); },
+ [](const GemmArgs &args) { return args._ci->has_svei8mm() && (args._Ksize>8); },
[](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args) { return new GemmInterleaved<cls_sve_interleaved_s8s32_mmla_8x3VL, int8_t, int32_t>(args); }
},
-#endif
+#endif // ARM_COMPUTE_ENABLE_I8MM
{
GemmMethod::GEMM_HYBRID,
"sve_smallK_hybrid_s8s32_dot_8x1VL",
@@ -78,15 +78,15 @@ static const GemmImplementation<int8_t, int32_t> gemm_s8_methods[] = {
[](const GemmArgs &args) { return new GemmInterleaved<cls_sve_interleaved_s8s32_dot_8x3VL, int8_t, int32_t>(args); }
},
#endif // SVE
-#ifdef MMLA_INT8
+#ifdef ARM_COMPUTE_ENABLE_I8MM
{
GemmMethod::GEMM_INTERLEAVED,
"a64_interleaved_s8s32_mmla_8x12",
- [](const GemmArgs &args) { return (args._Ksize>8); },
+ [](const GemmArgs &args) { return args._ci->has_svei8mm() && (args._Ksize>8); },
nullptr,
[](const GemmArgs &args) { return new GemmInterleaved<cls_a64_interleaved_s8s32_mmla_8x12, int8_t, int32_t>(args); }
},
-#endif
+#endif // ARM_COMPUTE_ENABLE_I8MM
{
GemmMethod::GEMM_HYBRID,
"a64_smallK_hybrid_s8s32_dot_8x4",
diff --git a/src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp b/src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp
index 094b6fdff4..985567f6f3 100644
--- a/src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp
+++ b/src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp
@@ -53,16 +53,16 @@ namespace arm_gemm {
static const GemmImplementation<int8_t, int8_t, Requantize32> gemm_qint8_methods[] =
{
-#ifdef __ARM_FEATURE_SVE
-#ifdef MMLA_INT8
+#ifdef ARM_COMPUTE_ENABLE_SVE
+#ifdef ARM_COMPUTE_ENABLE_I8MM
{
GemmMethod::GEMM_INTERLEAVED,
"sve_interleaved_s8s32_mmla_8x3VL",
- [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_sve() && (args._Ksize>8); },
+ [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_svei8mm() && (args._Ksize>8); },
[](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized<cls_sve_interleaved_s8s32_mmla_8x3VL, int8_t, int8_t>(args, qp); }
},
-#endif
+#endif // ARM_COMPUTE_ENABLE_I8MM
{
GemmMethod::GEMM_HYBRID_QUANTIZED,
"sve_smallK_hybrid_s8s32_dot_8x1VL",
@@ -70,22 +70,22 @@ static const GemmImplementation<int8_t, int8_t, Requantize32> gemm_qint8_methods
[](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridQuantized<cls_sve_smallK_hybrid_s8s32_dot_8x1VL, int8_t, int8_t>(args, qp); }
},
-#ifdef SVE2
+#ifdef ARM_COMPUTE_ENABLE_SVE2
{
GemmMethod::GEMM_HYBRID,
"sve_hybrid_s8qs_dot_6x4VL",
- [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sve() && quant_hybrid_symmetric(qp); },
+ [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sve2() && quant_hybrid_symmetric(qp); },
[](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_sve_hybrid_s8qs_dot_6x4VL, int8_t, int8_t, Requantize32>(args, qp); }
},
{
GemmMethod::GEMM_HYBRID,
"sve_hybrid_s8qa_dot_4x4VL",
- [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sve() && quant_hybrid_asymmetric(qp); },
+ [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sve2() && quant_hybrid_asymmetric(qp); },
[](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_sve_hybrid_s8qa_dot_4x4VL, int8_t, int8_t, Requantize32>(args, qp); }
},
-#endif
+#endif // ARM_COMPUTE_ENABLE_SVE2
{
GemmMethod::GEMM_HYBRID,
"sve_hybrid_s8s32_dot_6x4VL",
@@ -101,15 +101,15 @@ static const GemmImplementation<int8_t, int8_t, Requantize32> gemm_qint8_methods
[](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized<cls_sve_interleaved_s8s32_dot_8x3VL, int8_t, int8_t>(args, qp); }
},
#endif // SVE
-#ifdef MMLA_INT8
+#ifdef ARM_COMPUTE_ENABLE_I8MM
{
GemmMethod::GEMM_INTERLEAVED,
"a64_interleaved_s8s32_mmla_8x12",
- [](const GemmArgs &args, const Requantize32 &) { return (args._Ksize>8); },
+ [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_i8mm() && (args._Ksize>8); },
nullptr,
[](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized<cls_a64_interleaved_s8s32_mmla_8x12, int8_t, int8_t>(args, qp); }
},
-#endif
+#endif // ARM_COMPUTE_ENABLE_I8MM
{
GemmMethod::GEMM_HYBRID_QUANTIZED,
"a64_smallK_hybrid_s8s32_dot_8x4",
diff --git a/src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp b/src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp
index be27b3a117..f3f2f335fd 100644
--- a/src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp
+++ b/src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp
@@ -50,12 +50,12 @@ namespace arm_gemm {
static const GemmImplementation<uint8_t, uint8_t, Requantize32> gemm_quint8_methods[] =
{
-#ifdef __ARM_FEATURE_SVE
-#ifdef MMLA_INT8
+#ifdef ARM_COMPUTE_ENABLE_SVE
+#ifdef ARM_COMPUTE_ENABLE_I8MM
{
GemmMethod::GEMM_INTERLEAVED,
"sve_interleaved_u8u32_mmla_8x3VL",
- [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_sve() && (args._Ksize>8); },
+ [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_svei8mm() && (args._Ksize>8); },
[](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized<cls_sve_interleaved_u8u32_mmla_8x3VL, uint8_t, uint8_t>(args, qp); }
},
@@ -67,15 +67,15 @@ static const GemmImplementation<uint8_t, uint8_t, Requantize32> gemm_quint8_meth
[](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridQuantized<cls_sve_smallK_hybrid_u8u32_dot_8x1VL, uint8_t, uint8_t>(args, qp); }
},
-#ifdef SVE2 // Requantizing kernels include some SVE2 only instructions (SQRDMULH, SRSHL)
+#ifdef ARM_COMPUTE_ENABLE_SVE2 // Requantizing kernels include some SVE2 only instructions (SQRDMULH, SRSHL)
{
GemmMethod::GEMM_HYBRID,
"sve_hybrid_u8qa_dot_4x4VL",
- [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sve() && quant_hybrid_asymmetric(qp); },
+ [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sve2() && quant_hybrid_asymmetric(qp); },
[](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_sve_hybrid_u8qa_dot_4x4VL, uint8_t, uint8_t, Requantize32>(args, qp); }
},
-#endif
+#endif // ARM_COMPUTE_ENABLE_SVE2
{
GemmMethod::GEMM_HYBRID,
"sve_hybrid_u8u32_dot_6x4VL",
@@ -91,11 +91,11 @@ static const GemmImplementation<uint8_t, uint8_t, Requantize32> gemm_quint8_meth
[](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized<cls_sve_interleaved_u8u32_dot_8x3VL, uint8_t, uint8_t>(args, qp); }
},
#endif
-#ifdef MMLA_INT8
+#ifdef ARM_COMPUTE_ENABLE_I8MM
{
GemmMethod::GEMM_INTERLEAVED,
"a64_interleaved_u8u32_mmla_8x12",
- [](const GemmArgs &args, const Requantize32 &) { return (args._Ksize>8); },
+ [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_i8mm() && (args._Ksize>8); },
[](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized<cls_a64_interleaved_u8u32_mmla_8x12, uint8_t, uint8_t>(args, qp); }
},
diff --git a/src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp b/src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp
index 4de3d2b18a..4c05fd1b73 100644
--- a/src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp
+++ b/src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp
@@ -46,12 +46,12 @@
namespace arm_gemm {
static const GemmImplementation<uint8_t, uint32_t> gemm_u8_methods[] = {
-#ifdef __ARM_FEATURE_SVE
-#ifdef MMLA_INT8
+#ifdef ARM_COMPUTE_ENABLE_SVE
+#ifdef ARM_COMPUTE_ENABLE_I8MM
{
GemmMethod::GEMM_INTERLEAVED,
"sve_interleaved_u8u32_mmla_8x3VL",
- [](const GemmArgs &args) { return args._ci->has_sve() && (args._Ksize>8); },
+ [](const GemmArgs &args) { return args._ci->has_svei8mm() && (args._Ksize>8); },
[](const GemmArgs &args) { return args._ci->get_cpu_model() != CPUModel::KLEIN; },
[](const GemmArgs &args) { return new GemmInterleaved<cls_sve_interleaved_u8u32_mmla_8x3VL, uint8_t, uint32_t>(args); }
},
@@ -78,11 +78,11 @@ static const GemmImplementation<uint8_t, uint32_t> gemm_u8_methods[] = {
[](const GemmArgs &args) { return new GemmInterleaved<cls_sve_interleaved_u8u32_dot_8x3VL, uint8_t, uint32_t>(args); }
},
#endif
-#ifdef MMLA_INT8
+#ifdef ARM_COMPUTE_ENABLE_I8MM
{
GemmMethod::GEMM_INTERLEAVED,
"a64_interleaved_u8u32_mmla_8x12",
- [](const GemmArgs &args) { return (args._Ksize>8); },
+ [](const GemmArgs &args) { return args._ci->has_i8mm() && (args._Ksize>8); },
nullptr,
[](const GemmArgs &args) { return new GemmInterleaved<cls_a64_interleaved_u8u32_mmla_8x12, uint8_t, uint32_t>(args); }
},
diff --git a/src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp b/src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp
index 0d56b46e19..a6b1269927 100644
--- a/src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp
+++ b/src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp
@@ -320,12 +320,12 @@ template void IndirectInterleave<8, 1, VLType::None>(float *, const float * cons
template void ConvolutionInterleave<8, 1, VLType::None>(float *, const float *, size_t, const convolver<float> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<8, 1, VLType::None>(float *, const float *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
-#if defined(__ARM_FEATURE_SVE) && defined(MMLA_FP32)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVEF32MM)
/* FMMLA */
template void IndirectInterleave<8, 2, VLType::None>(float *, const float * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void ConvolutionInterleave<8, 2, VLType::None>(float *, const float *, size_t, const convolver<float> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<8, 2, VLType::None>(float *, const float *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
-#endif // SVE && MMLA_FP32
+#endif // ARM_COMPUTE_ENABLE_SVE && ARM_COMPUTE_ENABLE_SVEF32MM
/* FP16 */
#if defined(FP16_KERNELS) || defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
@@ -340,7 +340,7 @@ template void Interleave<8, 1, VLType::None>(float *, const __fp16 *, size_t, un
/* BF16 */
/* Arm® Neon™/SVE BFDOT */
-#ifdef V8P6_BF
+#ifdef ARM_COMPUTE_ENABLE_BF16
template void IndirectInterleave<8, 2, VLType::None>(bfloat16 *, const bfloat16 * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void ConvolutionInterleave<8, 2, VLType::None>(bfloat16 *, const bfloat16 *, size_t, const convolver<bfloat16> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<8, 2, VLType::None>(bfloat16 *, const bfloat16 *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
@@ -348,7 +348,7 @@ template void Interleave<8, 2, VLType::None>(bfloat16 *, const bfloat16 *, size_
template void IndirectInterleave<8, 4, VLType::None>(bfloat16 *, const bfloat16 * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void ConvolutionInterleave<8, 4, VLType::None>(bfloat16 *, const bfloat16 *, size_t, const convolver<bfloat16> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<8, 4, VLType::None>(bfloat16 *, const bfloat16 *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
-#endif // V8P6_BF
+#endif // ARM_COMPUTE_ENABLE_BF16
/* Arm® Neon™/SVE using FP32 kernel */
template void IndirectInterleave<8, 1, VLType::None>(float *, const bfloat16 * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
@@ -375,12 +375,12 @@ template void IndirectInterleave<8, 4, VLType::None>(int8_t *, const int8_t * co
template void ConvolutionInterleave<8, 4, VLType::None>(int8_t *, const int8_t *, size_t, const convolver<int8_t> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<8, 4, VLType::None>(int8_t *, const int8_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
-#ifdef MMLA_INT8
+#ifdef ARM_COMPUTE_ENABLE_I8MM
/* MMLA SMMLA (height 8, block 8) */
template void IndirectInterleave<8, 8, VLType::None>(int8_t *, const int8_t * const * const *, unsigned int, unsigned int, unsigned int y0, unsigned int ymax, unsigned int k0, unsigned int kmax, bool, int32_t);
template void ConvolutionInterleave<8, 8, VLType::None>(int8_t *, const int8_t *, size_t, const convolver<int8_t> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<8, 8, VLType::None>(int8_t *, const int8_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
-#endif // MMLA_INT8
+#endif // ARM_COMPUTE_ENABLE_I8MM
/* Arm® Neon™ SDOT (height 8, block 1) */
template void IndirectInterleave<8, 1, VLType::None>(int16_t *, const int8_t * const * const *, unsigned int, unsigned int, unsigned int y0, unsigned int ymax, unsigned int k0, unsigned int kmax, bool, int32_t);
@@ -397,12 +397,12 @@ template void IndirectInterleave<8, 4, VLType::None>(uint8_t *, const uint8_t *
template void ConvolutionInterleave<8, 4, VLType::None>(uint8_t *, const uint8_t *, size_t, const convolver<uint8_t> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<8, 4, VLType::None>(uint8_t *, const uint8_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
-#ifdef MMLA_INT8
+#ifdef ARM_COMPUTE_ENABLE_I8MM
/* MMLA SMMLA (height 8, block 8) */
template void IndirectInterleave<8, 8, VLType::None>(uint8_t *, const uint8_t * const * const *, unsigned int, unsigned int, unsigned int y0, unsigned int ymax, unsigned int k0, unsigned int kmax, bool, int32_t);
template void ConvolutionInterleave<8, 8, VLType::None>(uint8_t *, const uint8_t *, size_t, const convolver<uint8_t> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<8, 8, VLType::None>(uint8_t *, const uint8_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
-#endif // MMLA_INT8
+#endif // ARM_COMPUTE_ENABLE_I8MM
/* Arm® Neon™ 16-bit (height 8, block 1) */
template void IndirectInterleave<8, 1, VLType::None>(uint16_t *, const uint8_t * const * const *, unsigned int, unsigned int, unsigned int y0, unsigned int ymax, unsigned int k0, unsigned int kmax, bool, int32_t);
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_gemv_fp32_mla_8VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_gemv_fp32_mla_8VL/generic.cpp
index c62e31936c..78387de90c 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_gemv_fp32_mla_8VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_gemv_fp32_mla_8VL/generic.cpp
@@ -21,7 +21,7 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "arm_gemm.hpp"
#include "../../utils.hpp"
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL.hpp
index 066bff4602..7b0282fa32 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL.hpp
@@ -22,7 +22,7 @@
* IN THE SOFTWARE.
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../std_transforms_sve.hpp"
#include "../bfloat.hpp"
@@ -81,4 +81,4 @@ public:
} // namespace arm_gemm
#undef ARGLIST
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL/generic.cpp
index 1233a98531..34a657f64f 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL/generic.cpp
@@ -21,7 +21,7 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "arm_gemm.hpp"
#include "../../utils.hpp"
@@ -2153,4 +2153,4 @@ void sve_hybrid_bf16fp32_dot_6x4VL (
}
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL.hpp
index 5c8563952f..f98ccdc7d3 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL.hpp
@@ -22,7 +22,7 @@
* IN THE SOFTWARE.
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../std_transforms_sve.hpp"
@@ -80,4 +80,4 @@ public:
} // namespace arm_gemm
#undef ARGLIST
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/generic.cpp
index 7cc03bbfb5..c151179a1f 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/generic.cpp
@@ -21,7 +21,7 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "arm_gemm.hpp"
#include "../../utils.hpp"
@@ -3094,4 +3094,4 @@ void sve_hybrid_fp16_mla_6x4VL (
}
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL.hpp
index b696e73637..4c0a3a11e0 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL.hpp
@@ -22,7 +22,7 @@
* IN THE SOFTWARE.
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../std_transforms_sve.hpp"
@@ -80,4 +80,4 @@ public:
} // namespace arm_gemm
#undef ARGLIST
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL/generic.cpp
index dee9a107ff..25d65826b9 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL/generic.cpp
@@ -21,7 +21,7 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "arm_gemm.hpp"
#include "../../utils.hpp"
@@ -2152,4 +2152,4 @@ void sve_hybrid_fp32_mla_6x4VL (
}
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL.hpp
index 2273d97d5f..87f063d224 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL.hpp
@@ -22,7 +22,7 @@
* IN THE SOFTWARE.
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../std_transforms_sve.hpp"
@@ -80,4 +80,4 @@ public:
} // namespace arm_gemm
#undef ARGLIST
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL/generic.cpp
index 863325f7f5..943e0ac148 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL/generic.cpp
@@ -21,7 +21,7 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "arm_gemm.hpp"
#include "../../utils.hpp"
@@ -1616,4 +1616,4 @@ void sve_hybrid_fp32_mla_8x1VL (
}
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL.hpp
index bc93ced25b..c278b3fc6b 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL.hpp
@@ -22,7 +22,7 @@
* IN THE SOFTWARE.
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../std_transforms_sve.hpp"
@@ -80,4 +80,4 @@ public:
} // namespace arm_gemm
#undef ARGLIST
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL/generic.cpp
index 50b9ba524d..8a7465ba6b 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL/generic.cpp
@@ -21,7 +21,7 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "arm_gemm.hpp"
#include "../../utils.hpp"
@@ -1529,4 +1529,4 @@ void sve_hybrid_s8qa_dot_4x4VL (
}
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL.hpp
index 61927236ad..57056b4c2a 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL.hpp
@@ -22,7 +22,7 @@
* IN THE SOFTWARE.
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../std_transforms_sve.hpp"
@@ -80,4 +80,4 @@ public:
} // namespace arm_gemm
#undef ARGLIST
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL/generic.cpp
index f901a814f9..0328c107e2 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL/generic.cpp
@@ -21,7 +21,7 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "arm_gemm.hpp"
#include "../../utils.hpp"
@@ -2665,4 +2665,4 @@ void sve_hybrid_s8qs_dot_6x4VL (
}
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL.hpp
index b2c376196f..37258978d3 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL.hpp
@@ -22,7 +22,7 @@
* IN THE SOFTWARE.
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../std_transforms_sve.hpp"
@@ -80,4 +80,4 @@ public:
} // namespace arm_gemm
#undef ARGLIST
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL/generic.cpp
index 8862b3665a..9cddee941e 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL/generic.cpp
@@ -21,7 +21,7 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "arm_gemm.hpp"
#include "../../utils.hpp"
@@ -1819,4 +1819,4 @@ void sve_hybrid_s8s32_dot_6x4VL (
}
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL.hpp
index cfb8adfc87..3de8d178cd 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL.hpp
@@ -22,7 +22,7 @@
* IN THE SOFTWARE.
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../std_transforms_sve.hpp"
@@ -80,4 +80,4 @@ public:
} // namespace arm_gemm
#undef ARGLIST
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL/generic.cpp
index 373d82930b..0bfc28776f 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL/generic.cpp
@@ -21,7 +21,7 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "arm_gemm.hpp"
#include "../../utils.hpp"
@@ -1529,4 +1529,4 @@ void sve_hybrid_u8qa_dot_4x4VL (
}
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL.hpp
index 4ea1d17c4e..a2883bfa30 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL.hpp
@@ -22,7 +22,7 @@
* IN THE SOFTWARE.
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../std_transforms_sve.hpp"
@@ -80,4 +80,4 @@ public:
} // namespace arm_gemm
#undef ARGLIST
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL/generic.cpp
index 97f6665d85..413bc65288 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL/generic.cpp
@@ -21,7 +21,7 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "arm_gemm.hpp"
#include "../../utils.hpp"
@@ -1819,4 +1819,4 @@ void sve_hybrid_u8u32_dot_6x4VL (
}
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL.hpp
index 12bb758b68..d717b745c9 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL.hpp
@@ -23,7 +23,7 @@
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../bfloat.hpp"
#include "../std_transforms_sve.hpp"
@@ -69,4 +69,4 @@ public:
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL/generic.cpp
index adee900337..4f774b133f 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL/generic.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../../bfloat.hpp"
#include "../../asmlib.hpp"
@@ -326,4 +326,4 @@ void sve_interleaved_bf16fp32_dot_8x3VL(const bfloat16 *Apanel, const bfloat16 *
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL.hpp
index 2889dd7f0f..b7fc515341 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL.hpp
@@ -23,7 +23,7 @@
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../bfloat.hpp"
#include "../std_transforms_sve.hpp"
@@ -69,4 +69,4 @@ public:
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL/generic.cpp
index e43404e608..c720942140 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL/generic.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../../bfloat.hpp"
#include "../../asmlib.hpp"
@@ -394,4 +394,4 @@ void sve_interleaved_bf16fp32_mmla_8x3VL(const bfloat16 *Apanel, const bfloat16
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL.hpp
index eb946d9dfa..b797b8bec1 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL.hpp
@@ -23,7 +23,7 @@
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../std_transforms_sve.hpp"
@@ -69,4 +69,4 @@ public:
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL/generic.cpp
index 46b8770409..0f1937acc5 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL/generic.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../../asmlib.hpp"
@@ -316,4 +316,4 @@ void sve_interleaved_fp16_mla_8x3VL(const __fp16 *Apanel, const __fp16 *Bpanel,
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL.hpp
index b84ba83b6a..f4bb809fe8 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL.hpp
@@ -23,7 +23,7 @@
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../std_transforms_sve.hpp"
@@ -69,4 +69,4 @@ public:
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL/generic.cpp
index 1e05a308b5..10feaa130b 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL/generic.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../../asmlib.hpp"
@@ -325,4 +325,4 @@ void sve_interleaved_fp32_mla_8x3VL(const float *Apanel, const float *Bpanel, fl
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL.hpp
index 96216960ff..a355262fe2 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL.hpp
@@ -23,7 +23,7 @@
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../std_transforms_sve.hpp"
@@ -69,4 +69,4 @@ public:
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL/generic.cpp
index 39daf0ff20..a985a91b90 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL/generic.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include "../../asmlib.hpp"
@@ -394,4 +394,4 @@ void sve_interleaved_fp32_mmla_8x3VL(const float *Apanel, const float *Bpanel, f
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL.hpp
index 3e16915cd4..aa6d9e7ec8 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL.hpp
@@ -23,7 +23,7 @@
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include <cstdint>
#include "../std_transforms_sve.hpp"
@@ -70,4 +70,4 @@ public:
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL/generic.cpp
index 674c2400bf..01c0f8cddc 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL/generic.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include <cstdint>
#include "../../asmlib.hpp"
@@ -326,4 +326,4 @@ void sve_interleaved_s8s32_dot_8x3VL(const int8_t *Apanel, const int8_t *Bpanel,
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL.hpp
index 02b3451c54..671946b262 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL.hpp
@@ -23,7 +23,7 @@
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include <cstdint>
#include "../std_transforms_sve.hpp"
@@ -70,4 +70,4 @@ public:
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL/generic.cpp
index 578aa01732..9420210aae 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL/generic.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include <cstdint>
#include "../../asmlib.hpp"
@@ -394,4 +394,4 @@ void sve_interleaved_s8s32_mmla_8x3VL(const int8_t *Apanel, const int8_t *Bpanel
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL.hpp
index 832a224199..7d39485164 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL.hpp
@@ -23,7 +23,7 @@
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include <cstdint>
#include "../std_transforms_sve.hpp"
@@ -70,4 +70,4 @@ public:
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL/generic.cpp
index 891869c767..2139bab69d 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL/generic.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include <cstdint>
#include "../../asmlib.hpp"
@@ -326,4 +326,4 @@ void sve_interleaved_u8u32_dot_8x3VL(const uint8_t *Apanel, const uint8_t *Bpane
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL.hpp
index 4fdaab84bd..ca9cadd6d7 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL.hpp
@@ -23,7 +23,7 @@
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include <cstdint>
#include "../std_transforms_sve.hpp"
@@ -70,4 +70,4 @@ public:
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL/generic.cpp
index fa08a9d091..d42385789c 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL/generic.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include <cstdint>
#include "../../asmlib.hpp"
@@ -394,4 +394,4 @@ void sve_interleaved_u8u32_mmla_8x3VL(const uint8_t *Apanel, const uint8_t *Bpan
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL.hpp
index 2097d76a54..ab225589e1 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL.hpp
@@ -23,7 +23,7 @@
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
@@ -85,4 +85,4 @@ public:
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL/generic.cpp
index e07cfa8218..cdad98c5f1 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL/generic.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include <algorithm>
@@ -18804,4 +18804,4 @@ void sve_smallK_hybrid_fp32_mla_8x1VL(const float *A, int lda, const float *B, f
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL.hpp
index e50c05ba39..e735567e95 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL.hpp
@@ -23,7 +23,7 @@
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include <cstdint>
@@ -85,4 +85,4 @@ public:
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp
index 98004e98a5..cd01411722 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include <algorithm>
@@ -8968,4 +8968,4 @@ void sve_smallK_hybrid_s8s32_dot_8x1VL(const int8_t *A, int lda, const int8_t *B
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL.hpp
index 60184be043..25dd10019d 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL.hpp
@@ -23,7 +23,7 @@
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include <cstdint>
@@ -85,4 +85,4 @@ public:
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp
index 6a8553216b..99a287b4f5 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
#include <algorithm>
@@ -8968,4 +8968,4 @@ void sve_smallK_hybrid_u8u32_dot_8x1VL(const uint8_t *A, int lda, const uint8_t
} // namespace arm_gemm
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/mergeresults-sve.cpp b/src/core/NEON/kernels/arm_gemm/mergeresults-sve.cpp
new file mode 100644
index 0000000000..77d86b7dd8
--- /dev/null
+++ b/src/core/NEON/kernels/arm_gemm/mergeresults-sve.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* As some of the merges need these headers, but are all included in the
+ * arm_gemm namespace, put these headers here. */
+#include <algorithm>
+
+#include <arm_neon.h>
+
+#include "arm_gemm.hpp"
+#include "asmlib.hpp"
+#include "utils.hpp"
+
+#include "mergeresults.hpp"
+
+namespace arm_gemm {
+
+#include "merges/list-sve.hpp"
+
+} // namespace arm_gemm \ No newline at end of file
diff --git a/src/core/NEON/kernels/arm_gemm/mergeresults.cpp b/src/core/NEON/kernels/arm_gemm/mergeresults.cpp
index 17566db375..bbfe8f23d9 100644
--- a/src/core/NEON/kernels/arm_gemm/mergeresults.cpp
+++ b/src/core/NEON/kernels/arm_gemm/mergeresults.cpp
@@ -37,9 +37,13 @@ namespace arm_gemm {
template<unsigned int twidth, unsigned int height, bool sve=false, typename Tin, typename Tout>
void MergeResults(Tout * out, const Tin * in, int ldc, int y0, int ymax, int x0, int xmax, const Tout *bias, Activation act, bool append) {
+ // NOTE: The following code is disabled to avoid calling get_vector_length(), so templated MergeResults will not
+ // be correct for SVE cases. This is OK as we have specialisations for all needed SVE cases anyway.
+ //
// For SVE cases, multiply the width up by the vector length.
// Use the *input* type to determine this, since this will be what the kernel operated on.
- const int width = twidth * (sve ? get_vector_length<Tin>() : 1);
+ // const int width = twidth * (sve ? get_vector_length<Tin>() : 1);
+ const int width = twidth;
const int full_y_blocks = (ymax - y0) / height;
const int y_remainder = (ymax - y0) % height;
diff --git a/src/core/NEON/kernels/arm_gemm/merges/list-sve.hpp b/src/core/NEON/kernels/arm_gemm/merges/list-sve.hpp
new file mode 100644
index 0000000000..aded4b3b8c
--- /dev/null
+++ b/src/core/NEON/kernels/arm_gemm/merges/list-sve.hpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "sve_merge_fp16_3VLx8.hpp"
+#include "sve_merge_fp32_3VLx8.hpp"
+#include "sve_merge_s32_3VLx8.hpp"
+#include "sve_merge_u32_3VLx8.hpp" \ No newline at end of file
diff --git a/src/core/NEON/kernels/arm_gemm/merges/list.hpp b/src/core/NEON/kernels/arm_gemm/merges/list.hpp
index 825c2fd020..dae874ef94 100644
--- a/src/core/NEON/kernels/arm_gemm/merges/list.hpp
+++ b/src/core/NEON/kernels/arm_gemm/merges/list.hpp
@@ -27,8 +27,4 @@
#include "a64_merge_s32_12x8.hpp"
#include "a64_merge_s32_4x4.hpp"
#include "a64_merge_u32_12x8.hpp"
-#include "a64_merge_u32_4x4.hpp"
-#include "sve_merge_fp16_3VLx8.hpp"
-#include "sve_merge_fp32_3VLx8.hpp"
-#include "sve_merge_s32_3VLx8.hpp"
-#include "sve_merge_u32_3VLx8.hpp"
+#include "a64_merge_u32_4x4.hpp" \ No newline at end of file
diff --git a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp16_3VLx8.hpp b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp16_3VLx8.hpp
index cf1d10329b..4da32b459c 100644
--- a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp16_3VLx8.hpp
+++ b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp16_3VLx8.hpp
@@ -23,7 +23,7 @@
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
template<>
void MergeResults<3, 8, true>(__fp16 *out, const __fp16 *in, const int ldout, const int y0, const int ymax, const int x0, const int xmax, const __fp16 *bias, Activation act, bool append)
@@ -1872,4 +1872,4 @@ void MergeResults<3, 8, true>(__fp16 *out, const __fp16 *in, const int ldout, co
}
}
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp32_3VLx8.hpp b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp32_3VLx8.hpp
index b0d10c085d..5505f1efe4 100644
--- a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp32_3VLx8.hpp
+++ b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_fp32_3VLx8.hpp
@@ -23,7 +23,7 @@
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
template<>
void MergeResults<3, 8, true>(float *out, const float *in, const int ldout, const int y0, const int ymax, const int x0, const int xmax, const float *bias, Activation act, bool append)
@@ -1872,4 +1872,4 @@ void MergeResults<3, 8, true>(float *out, const float *in, const int ldout, cons
}
}
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_s32_3VLx8.hpp b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_s32_3VLx8.hpp
index 34b6fe3ef5..c009881254 100644
--- a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_s32_3VLx8.hpp
+++ b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_s32_3VLx8.hpp
@@ -23,7 +23,7 @@
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
template<>
void MergeResults<3, 8, true>(int32_t *out, const int32_t *in, const int ldout, const int y0, const int ymax, const int x0, const int xmax, const int32_t *bias, Activation , bool append)
@@ -1394,4 +1394,4 @@ void MergeResults<3, 8, true>(int32_t *out, const int32_t *in, const int ldout,
}
}
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_u32_3VLx8.hpp b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_u32_3VLx8.hpp
index c4b2bb56d6..e992f6722c 100644
--- a/src/core/NEON/kernels/arm_gemm/merges/sve_merge_u32_3VLx8.hpp
+++ b/src/core/NEON/kernels/arm_gemm/merges/sve_merge_u32_3VLx8.hpp
@@ -23,7 +23,7 @@
*/
#pragma once
-#ifdef __ARM_FEATURE_SVE
+#ifdef ARM_COMPUTE_ENABLE_SVE
template<>
void MergeResults<3, 8, true>(uint32_t *out, const uint32_t *in, const int ldout, const int y0, const int ymax, const int x0, const int xmax, const uint32_t *bias, Activation , bool append)
@@ -1394,4 +1394,4 @@ void MergeResults<3, 8, true>(uint32_t *out, const uint32_t *in, const int ldout
}
}
-#endif // __ARM_FEATURE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/core/NEON/kernels/arm_gemm/utils.hpp b/src/core/NEON/kernels/arm_gemm/utils.hpp
index 1269ef62a6..e648ce2fb5 100644
--- a/src/core/NEON/kernels/arm_gemm/utils.hpp
+++ b/src/core/NEON/kernels/arm_gemm/utils.hpp
@@ -141,52 +141,36 @@ struct IndirectInputArg {
};
namespace utils {
-namespace {
-
-#ifdef __ARM_FEATURE_SVE
-template<size_t sz>
-inline unsigned long get_vector_length_sz() {
- unsigned long v;
-
- __asm (
- "cntb %0"
- : "=r" (v)
- );
-
- return v / sz;
-}
-
-#define VEC_LEN_SPEC(sz, opcode) template <> inline unsigned long get_vector_length_sz<sz>() { unsigned long v; __asm ( opcode " %0" : "=r" (v)); return v; }
-
-VEC_LEN_SPEC(8, "cntd")
-VEC_LEN_SPEC(4, "cntw")
-VEC_LEN_SPEC(2, "cnth")
-VEC_LEN_SPEC(1, "cntb")
-#endif
-
-} // anonymous namespace
-
template <typename T>
inline unsigned long get_vector_length() {
-#ifdef __ARM_FEATURE_SVE
- return get_vector_length_sz<sizeof(T)>();
-#else
+#if defined(ARM_COMPUTE_ENABLE_SVE)
+ uint64_t vl;
+
+ __asm __volatile (
+ ".inst 0x0420e3e0\n" // CNTB X0, ALL, MUL #1
+ "mov %0, X0\n"
+ : "=r" (vl)
+ :
+ : "x0"
+ );
+
+ return vl / sizeof(T);
+#else // !defined(ARM_COMPUTE_ENABLE_SVE)
return 16 / sizeof(T);
-#endif
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
}
template <typename T>
inline unsigned long get_vector_length(VLType vl_type) {
switch (vl_type) {
-#ifdef __ARM_FEATURE_SVE
+#if defined(ARM_COMPUTE_ENABLE_SVE)
case VLType::SVE:
- return get_vector_length_sz<sizeof(T)>();
-#endif
+ return get_vector_length<T>();
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
default:
return 16 / sizeof(T);
}
}
-
} // utils namespace
} // arm_gemm namespace