From 4ee8b1599dbaf7634d25607fa5ac96ba3dc6b0f2 Mon Sep 17 00:00:00 2001
From: Georgios Pinitas <georgios.pinitas@arm.com>
Date: Fri, 16 Jul 2021 16:16:43 +0100
Subject: Update GEMM assembly kernels

- Introduce Fp32 kernels with internal calculations in Bfloat16 when
fast_mode is enabled
- Improve kernel selection heuristics

Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Change-Id: I68a9e7e862b6fd2721b46e0d7cc791091c4ab279
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5965
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
---
 src/common/cpuinfo/CpuModel.cpp | 33 ++++++++++++++++++++++++++-------
 1 file changed, 26 insertions(+), 7 deletions(-)

(limited to 'src/common/cpuinfo/CpuModel.cpp')

diff --git a/src/common/cpuinfo/CpuModel.cpp b/src/common/cpuinfo/CpuModel.cpp
index 9f4d5d1433..2328f62515 100644
--- a/src/common/cpuinfo/CpuModel.cpp
+++ b/src/common/cpuinfo/CpuModel.cpp
@@ -50,8 +50,10 @@ bool model_supports_fp16(CpuModel model)
         case CpuModel::GENERIC_FP16:
         case CpuModel::GENERIC_FP16_DOT:
         case CpuModel::A55r1:
+        case CpuModel::A510:
         case CpuModel::X1:
-        case CpuModel::KLEIN:
+        case CpuModel::V1:
+        case CpuModel::A64FX:
             return true;
         default:
             return false;
@@ -64,8 +66,10 @@ bool model_supports_dot(CpuModel model)
     {
         case CpuModel::GENERIC_FP16_DOT:
         case CpuModel::A55r1:
+        case CpuModel::A510:
         case CpuModel::X1:
-        case CpuModel::KLEIN:
+        case CpuModel::V1:
+        case CpuModel::A64FX:
             return true;
         default:
             return false;
@@ -76,7 +80,9 @@ bool model_supports_sve(CpuModel model)
 {
     switch(model)
     {
-        case CpuModel::KLEIN:
+        case CpuModel::A510:
+        case CpuModel::V1:
+        case CpuModel::A64FX:
             return true;
         default:
             return false;
@@ -92,9 +98,9 @@ CpuModel midr_to_model(uint32_t midr)
     const int variant     = (midr >> 20) & 0xF;
     const int cpunum      = (midr >> 4) & 0xFFF;
 
+    // Only CPUs we have code paths for are detected.  All other CPUs can be safely classed as "GENERIC"
     if(implementer == 0x41) // Arm CPUs
     {
-        // Only CPUs we have code paths for are detected.  All other CPUs can be safely classed as "GENERIC"
         switch(cpunum)
         {
             case 0xd03: // A53
@@ -134,11 +140,26 @@ CpuModel midr_to_model(uint32_t midr)
             case 0xd4a: // E1
                 model = CpuModel::GENERIC_FP16_DOT;
                 break;
+            case 0xd40: // V1
+                model = CpuModel::V1;
+                break;
             case 0xd44: // X1
                 model = CpuModel::X1;
                 break;
             case 0xd46:
-                model = CpuModel::KLEIN;
+                model = CpuModel::A510;
+                break;
+            default:
+                model = CpuModel::GENERIC;
+                break;
+        }
+    }
+    else if(implementer == 0x46)
+    {
+        switch(cpunum)
+        {
+            case 0x001: // A64FX
+                model = CpuModel::A64FX;
                 break;
             default:
                 model = CpuModel::GENERIC;
@@ -147,7 +168,6 @@ CpuModel midr_to_model(uint32_t midr)
     }
     else if(implementer == 0x48)
     {
-        // Only CPUs we have code paths for are detected.  All other CPUs can be safely classed as "GENERIC"
         switch(cpunum)
         {
             case 0xd40: // A76
@@ -160,7 +180,6 @@ CpuModel midr_to_model(uint32_t midr)
     }
     else if(implementer == 0x51)
     {
-        // Only CPUs we have code paths for are detected.  All other CPUs can be safely classed as "GENERIC"
         switch(cpunum)
         {
             case 0x800: // A73
-- 
cgit v1.2.1