aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2021-07-27 10:46:13 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2021-07-27 14:06:08 +0000
commit387f80ed5b4c8ef32cfdba4ea27734dd257f87f0 (patch)
tree4abcbc7da5119411ea278c30d20b1787b29f8978
parent5e61cb060133bd2db98225e1649027e5b18c2415 (diff)
downloadComputeLibrary-387f80ed5b4c8ef32cfdba4ea27734dd257f87f0.tar.gz
Update GEMM assembly performance parameters
Resolves: COMPMID-4709 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: I20913c6153b8b86edf6e83d5f64175dcdf76dab4 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6002 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12.hpp15
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4.hpp4
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12.hpp7
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_4x6VL.hpp2
-rw-r--r--src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_6x4VL.hpp2
5 files changed, 21 insertions, 9 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12.hpp
index 83ccb4681b..758f2b1f8c 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12.hpp
@@ -66,11 +66,14 @@ public:
if (std::is_same<T, int8_t>::value) {
switch (ci->get_cpu_model()) {
case CPUModel::A510:
- return { 19.73, 2.81, 0.27 };
+ return { 19.73, 3.38, 0.27 };
case CPUModel::A55r1:
return { 15.361, 0.9341, 0.1636 };
+ case CPUModel::V1:
+ return { 62.40, 4.71, 0.67 };
+
default:
return { 29.0698, 3.9793, 0.4003 };
}
@@ -79,10 +82,16 @@ public:
if (std::is_same<T, int32_t>::value) {
switch (ci->get_cpu_model()) {
case CPUModel::A510:
- return { 19.73, 3.41, 3.70 };
+ return { 19.73, 3.38, 3.70 };
+
+ case CPUModel::A55r1:
+ return { 14.286, 1.171, 1.209 };
+
+ case CPUModel::V1:
+ return { 61.58, 4.78, 10.83 };
default:
- return { 31.81, 3.68, 8.01 };
+ return { 31.82, 3.51, 8.03 };
}
}
}
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4.hpp
index 07c4769479..45ba8dc423 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4.hpp
@@ -69,7 +69,7 @@ public:
template<typename T>
static PerformanceParameters get_performance_parameters(const CPUInfo *ci) {
- if (std::is_same<T, int32_t>::value) {
+ if (std::is_same<T, uint32_t>::value) {
switch (ci->get_cpu_model()) {
case CPUModel::A510:
return { 2.64, 2.72, 2.64 };
@@ -79,7 +79,7 @@ public:
}
}
- if (std::is_same<T, int8_t>::value) {
+ if (std::is_same<T, uint8_t>::value) {
switch(ci->get_cpu_model()) {
case CPUModel::A510:
return { 2.64, 1.79, 0.10 };
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12.hpp
index 0329f57615..21c9f59661 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12.hpp
@@ -71,7 +71,7 @@ public:
template<typename T>
static PerformanceParameters get_performance_parameters(const CPUInfo *ci) {
- if (std::is_same<T, int8_t>::value) {
+ if (std::is_same<T, uint8_t>::value) {
switch (ci->get_cpu_model()) {
case CPUModel::A510:
return { 19.73, 3.38, 0.27 };
@@ -87,11 +87,14 @@ public:
}
}
- if (std::is_same<T, int32_t>::value) {
+ if (std::is_same<T, uint32_t>::value) {
switch (ci->get_cpu_model()) {
case CPUModel::A510:
return { 19.73, 3.38, 3.70 };
+ case CPUModel::A55r1:
+ return { 14.286, 1.171, 1.209 };
+
case CPUModel::V1:
return { 61.58, 4.78, 10.83 };
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_4x6VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_4x6VL.hpp
index 2142f1067d..30e265fbc0 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_4x6VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_4x6VL.hpp
@@ -83,7 +83,7 @@ public:
case CPUModel::A510:
return { 5.42 };
case CPUModel::V1:
- return { 28.40 };
+ return { 34.56 };
}
}
diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_6x4VL.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_6x4VL.hpp
index d941ccc0e9..61c7ad17e7 100644
--- a/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_6x4VL.hpp
+++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_6x4VL.hpp
@@ -83,7 +83,7 @@ public:
case CPUModel::A510:
return { 5.31 };
case CPUModel::V1:
- return { 26.64 };
+ return { 28.93 };
}
}