aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12.hpp15
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12.hpp7
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_4x6VL.hpp2
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_6x4VL.hpp2
5 files changed, 21 insertions, 9 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12.hpp
index 83ccb4681b..758f2b1f8c 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12.hpp
@@ -66,11 +66,14 @@ public:
if (std::is_same<T, int8_t>::value) {
switch (ci->get_cpu_model()) {
case CPUModel::A510:
- return { 19.73, 2.81, 0.27 };
+ return { 19.73, 3.38, 0.27 };
case CPUModel::A55r1:
return { 15.361, 0.9341, 0.1636 };
+ case CPUModel::V1:
+ return { 62.40, 4.71, 0.67 };
+
default:
return { 29.0698, 3.9793, 0.4003 };
}
@@ -79,10 +82,16 @@ public:
if (std::is_same<T, int32_t>::value) {
switch (ci->get_cpu_model()) {
case CPUModel::A510:
- return { 19.73, 3.41, 3.70 };
+ return { 19.73, 3.38, 3.70 };
+
+ case CPUModel::A55r1:
+ return { 14.286, 1.171, 1.209 };
+
+ case CPUModel::V1:
+ return { 61.58, 4.78, 10.83 };
default:
- return { 31.81, 3.68, 8.01 };
+ return { 31.82, 3.51, 8.03 };
}
}
}
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4.hpp
index 07c4769479..45ba8dc423 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4.hpp
@@ -69,7 +69,7 @@ public:
template<typename T>
static PerformanceParameters get_performance_parameters(const CPUInfo *ci) {
- if (std::is_same<T, int32_t>::value) {
+ if (std::is_same<T, uint32_t>::value) {
switch (ci->get_cpu_model()) {
case CPUModel::A510:
return { 2.64, 2.72, 2.64 };
@@ -79,7 +79,7 @@ public:
}
}
- if (std::is_same<T, int8_t>::value) {
+ if (std::is_same<T, uint8_t>::value) {
switch(ci->get_cpu_model()) {
case CPUModel::A510:
return { 2.64, 1.79, 0.10 };
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12.hpp
index 0329f57615..21c9f59661 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12.hpp
@@ -71,7 +71,7 @@ public:
template<typename T>
static PerformanceParameters get_performance_parameters(const CPUInfo *ci) {
- if (std::is_same<T, int8_t>::value) {
+ if (std::is_same<T, uint8_t>::value) {
switch (ci->get_cpu_model()) {
case CPUModel::A510:
return { 19.73, 3.38, 0.27 };
@@ -87,11 +87,14 @@ public:
}
}
- if (std::is_same<T, int32_t>::value) {
+ if (std::is_same<T, uint32_t>::value) {
switch (ci->get_cpu_model()) {
case CPUModel::A510:
return { 19.73, 3.38, 3.70 };
+ case CPUModel::A55r1:
+ return { 14.286, 1.171, 1.209 };
+
case CPUModel::V1:
return { 61.58, 4.78, 10.83 };
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_4x6VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_4x6VL.hpp
index 2142f1067d..30e265fbc0 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_4x6VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_4x6VL.hpp
@@ -83,7 +83,7 @@ public:
case CPUModel::A510:
return { 5.42 };
case CPUModel::V1:
- return { 28.40 };
+ return { 34.56 };
}
}
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_6x4VL.hpp
index d941ccc0e9..61c7ad17e7 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_6x4VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_6x4VL.hpp
@@ -83,7 +83,7 @@ public:
case CPUModel::A510:
return { 5.31 };
case CPUModel::V1:
- return { 26.64 };
+ return { 28.93 };
}
}