diff options
-rw-r--r-- | src/common/cpuinfo/CpuInfo.h | 9 | ||||
-rw-r--r-- | src/common/cpuinfo/CpuIsaInfo.cpp | 117 | ||||
-rw-r--r-- | src/cpu/CpuContext.cpp | 42 |
3 files changed, 49 insertions, 119 deletions
diff --git a/src/common/cpuinfo/CpuInfo.h b/src/common/cpuinfo/CpuInfo.h index c04c9f4ec8..135ff96931 100644 --- a/src/common/cpuinfo/CpuInfo.h +++ b/src/common/cpuinfo/CpuInfo.h @@ -100,6 +100,15 @@ public: return _isa.svef32mm; } + const CpuIsaInfo &isa() const + { + return _isa; + } + const std::vector<CpuModel> &cpus() const + { + return _cpus; + } + CpuModel cpu_model(uint32_t cpuid) const; CpuModel cpu_model() const; uint32_t num_cpus() const; diff --git a/src/common/cpuinfo/CpuIsaInfo.cpp b/src/common/cpuinfo/CpuIsaInfo.cpp index 14466ef4e7..845ad1c4e9 100644 --- a/src/common/cpuinfo/CpuIsaInfo.cpp +++ b/src/common/cpuinfo/CpuIsaInfo.cpp @@ -49,69 +49,36 @@ namespace cpuinfo { namespace { +inline bool is_feature_supported(uint64_t features, uint64_t feature_mask) +{ + return (features & feature_mask); +} + #if defined(__arm__) void decode_hwcaps(CpuIsaInfo &isa, const uint32_t hwcaps, const uint32_t hwcaps2) { ARM_COMPUTE_UNUSED(hwcaps2); - - if(hwcaps & ARM_COMPUTE_CPU_FEATURE_HWCAP_HALF) - { - isa.fp16 = true; - } - if(hwcaps & ARM_COMPUTE_CPU_FEATURE_HWCAP_NEON) - { - isa.neon = true; - } + isa.fp16 = is_feature_supported(hwcaps, ARM_COMPUTE_CPU_FEATURE_HWCAP_HALF); + isa.neon = is_feature_supported(hwcaps, ARM_COMPUTE_CPU_FEATURE_HWCAP_NEON); } #elif defined(__aarch64__) void decode_hwcaps(CpuIsaInfo &isa, const uint32_t hwcaps, const uint32_t hwcaps2) { // High-level SIMD support - if(hwcaps & ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMD) - { - isa.neon = true; - } - if(hwcaps & ARM_COMPUTE_CPU_FEATURE_HWCAP_SVE) - { - isa.sve = true; - } - if(hwcaps2 & ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVE2) - { - isa.sve2 = true; - } + isa.neon = is_feature_supported(hwcaps, ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMD); + isa.sve = is_feature_supported(hwcaps, ARM_COMPUTE_CPU_FEATURE_HWCAP_SVE); + isa.sve2 = is_feature_supported(hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVE2); // Data-type support - const uint32_t fp16_support_mask = ARM_COMPUTE_CPU_FEATURE_HWCAP_FPHP | ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDHP; - if(hwcaps & fp16_support_mask) - { - isa.fp16 = true; - } - if(hwcaps2 & ARM_COMPUTE_CPU_FEATURE_HWCAP2_BF16) - { - isa.bf16 = true; - } - if(hwcaps2 & ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEBF16) - { - isa.svebf16 = true; - } + isa.fp16 = is_feature_supported(hwcaps, ARM_COMPUTE_CPU_FEATURE_HWCAP_FPHP | ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDHP); + isa.bf16 = is_feature_supported(hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_BF16); + isa.svebf16 = is_feature_supported(hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEBF16); // Instruction extensions - if(hwcaps & ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDDP) - { - isa.dot = true; - } - if(hwcaps2 & ARM_COMPUTE_CPU_FEATURE_HWCAP2_I8MM) - { - isa.i8mm = true; - } - if(hwcaps2 & ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEI8MM) - { - isa.svei8mm = true; - } - if(hwcaps2 & ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEF32MM) - { - isa.svef32mm = true; - } + isa.dot = is_feature_supported(hwcaps, ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDDP); + isa.i8mm = is_feature_supported(hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_I8MM); + isa.svei8mm = is_feature_supported(hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEI8MM); + isa.svef32mm = is_feature_supported(hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEF32MM); } #else /* defined(__aarch64__) */ void decode_hwcaps(CpuIsaInfo &isa, const uint32_t hwcaps, const uint32_t hwcaps2) @@ -122,47 +89,25 @@ void decode_hwcaps(CpuIsaInfo &isa, const uint32_t hwcaps, const uint32_t hwcaps void decode_regs(CpuIsaInfo &isa, const uint64_t isar0, const uint64_t isar1, const uint64_t pfr0, const uint64_t svefr0) { - // High-level SIMD support - if((pfr0 >> 32) & 0xf) - { - isa.sve = true; - } - if(svefr0 & 0xf) + auto is_supported = [](uint64_t feature_reg, uint8_t feature_pos) -> bool { - isa.sve2 = true; - } + return ((feature_reg >> feature_pos) & 0xf); + }; + + // High-level SIMD support + isa.sve = is_supported(pfr0, 32); + isa.sve2 = is_supported(svefr0, 0); // Data-type support - if((pfr0 >> 16) & 0xf) - { - isa.fp16 = true; - } - if((isar1 >> 44) & 0xf) - { - isa.bf16 = true; - } - if((svefr0 >> 20) & 0xf) - { - isa.svebf16 = true; - } + isa.fp16 = is_supported(pfr0, 16); + isa.bf16 = is_supported(isar1, 44); + isa.svebf16 = is_supported(svefr0, 20); // Instruction extensions - if((isar0 >> 44) & 0xf) - { - isa.dot = true; - } - if((isar1 >> 48) & 0xf) - { - isa.i8mm = true; - } - if((svefr0 >> 44) & 0xf) - { - isa.svei8mm = true; - } - if((svefr0 >> 52) & 0xf) - { - isa.svef32mm = true; - } + isa.dot = is_supported(isar0, 44); + isa.i8mm = is_supported(isar1, 48); + isa.svei8mm = is_supported(svefr0, 44); + isa.svef32mm = is_supported(svefr0, 52); } /** Handle features from whitelisted models in case of problematic kernels diff --git a/src/cpu/CpuContext.cpp b/src/cpu/CpuContext.cpp index a1c6413c98..1a971a6a16 100644 --- a/src/cpu/CpuContext.cpp +++ b/src/cpu/CpuContext.cpp @@ -90,29 +90,6 @@ AllocatorWrapper populate_allocator(AclAllocator *external_allocator) return is_valid ? AllocatorWrapper(*external_allocator) : AllocatorWrapper(default_allocator); } -cpuinfo::CpuIsaInfo populate_capabilities_legacy(const CPUInfo &cpu_info) -{ - cpuinfo::CpuIsaInfo isa_caps; - - // Extract SIMD extension - isa_caps.neon = true; - isa_caps.sve = cpu_info.has_sve(); - isa_caps.sve2 = cpu_info.has_sve2(); - - // Extract data-type support - isa_caps.fp16 = cpu_info.has_fp16(); - isa_caps.bf16 = cpu_info.has_bf16(); - isa_caps.svebf16 = cpu_info.has_svebf16(); - - // Extract ISA extensions - isa_caps.dot = cpu_info.has_dotprod(); - isa_caps.i8mm = cpu_info.has_i8mm(); - isa_caps.svei8mm = cpu_info.has_svei8mm(); - isa_caps.svef32mm = cpu_info.has_svef32mm(); - - return isa_caps; -} - cpuinfo::CpuIsaInfo populate_capabilities_flags(AclTargetCapabilities external_caps) { cpuinfo::CpuIsaInfo isa_caps; @@ -123,8 +100,9 @@ cpuinfo::CpuIsaInfo populate_capabilities_flags(AclTargetCapabilities external_c isa_caps.sve2 = external_caps & AclCpuCapabilitiesSve2; // Extract data-type support - isa_caps.fp16 = external_caps & AclCpuCapabilitiesFp16; - isa_caps.bf16 = external_caps & AclCpuCapabilitiesBf16; + isa_caps.fp16 = external_caps & AclCpuCapabilitiesFp16; + isa_caps.bf16 = external_caps & AclCpuCapabilitiesBf16; + isa_caps.svebf16 = isa_caps.bf16; // Extract ISA extensions isa_caps.dot = external_caps & AclCpuCapabilitiesDot; @@ -139,17 +117,15 @@ CpuCapabilities populate_capabilities(AclTargetCapabilities external_caps, { CpuCapabilities caps; - // Extract legacy structure - cpuinfo::CpuIsaInfo isa_caps; + // Populate capabilities with system information + caps.cpu_info = cpuinfo::CpuInfo::build(); if(external_caps != AclCpuCapabilitiesAuto) { - isa_caps = populate_capabilities_flags(external_caps); - } - else - { - isa_caps = populate_capabilities_legacy(CPUInfo::get()); + cpuinfo::CpuIsaInfo isa = populate_capabilities_flags(external_caps); + auto cpus = caps.cpu_info.cpus(); + + caps.cpu_info = cpuinfo::CpuInfo(isa, cpus); } - caps.cpu_info = cpuinfo::CpuInfo(isa_caps, {}); // Set max number of threads #if defined(BARE_METAL) |