diff options
Diffstat (limited to 'src/cpu/CpuContext.cpp')
-rw-r--r-- | src/cpu/CpuContext.cpp | 108 |
1 files changed, 44 insertions, 64 deletions
diff --git a/src/cpu/CpuContext.cpp b/src/cpu/CpuContext.cpp index 18fa2e7469..b745af8229 100644 --- a/src/cpu/CpuContext.cpp +++ b/src/cpu/CpuContext.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,12 +24,24 @@ #include "src/cpu/CpuContext.h" #include "arm_compute/core/CPP/CPPTypes.h" + #include "src/cpu/CpuQueue.h" #include "src/cpu/CpuTensor.h" #include <cstdlib> +#if !defined(__APPLE__) && !defined(__OpenBSD__) #include <malloc.h> +#if defined(_WIN64) +#define posix_memalign _aligned_realloc +#define posix_memalign_free _aligned_free +#endif // defined(_WIN64) +#endif // !defined(__APPLE__) && !defined(__OpenBSD__) + +#ifndef BARE_METAL +#include <thread> +#endif /* BARE_METAL */ + namespace arm_compute { namespace cpu @@ -50,19 +62,19 @@ void *default_aligned_allocate(void *user_data, size_t size, size_t alignment) { ARM_COMPUTE_UNUSED(user_data); void *ptr = nullptr; -#if defined(BARE_METAL) || defined(__APPLE__) +#if defined(BARE_METAL) size_t rem = size % alignment; size_t real_size = (rem) ? (size + alignment - rem) : size; ptr = memalign(alignment, real_size); -#else /* defined(BARE_METAL) || defined(__APPLE__) */ - if(posix_memalign(&ptr, alignment, size) != 0) +#else /* defined(BARE_METAL) */ + if (posix_memalign(&ptr, alignment, size) != 0) { // posix_memalign returns non-zero on failures, the return values will be // - EINVAL: wrong alignment // - ENOMEM: insufficient memory ARM_COMPUTE_LOG_ERROR_ACL("posix_memalign failed, the returned pointer will be invalid"); } -#endif /* defined(BARE_METAL) || defined(__APPLE__) */ +#endif /* defined(BARE_METAL) */ return ptr; } void default_aligned_free(void *user_data, void *ptr) @@ -70,17 +82,13 @@ void default_aligned_free(void *user_data, void *ptr) ARM_COMPUTE_UNUSED(user_data); free(ptr); } -static AclAllocator default_allocator = { &default_allocate, - &default_free, - &default_aligned_allocate, - &default_aligned_free, - nullptr - }; +static AclAllocator default_allocator = {&default_allocate, &default_free, &default_aligned_allocate, + &default_aligned_free, nullptr}; AllocatorWrapper populate_allocator(AclAllocator *external_allocator) { bool is_valid = (external_allocator != nullptr); - if(is_valid) + if (is_valid) { is_valid = is_valid && (external_allocator->alloc != nullptr); is_valid = is_valid && (external_allocator->free != nullptr); @@ -90,66 +98,40 @@ AllocatorWrapper populate_allocator(AclAllocator *external_allocator) return is_valid ? AllocatorWrapper(*external_allocator) : AllocatorWrapper(default_allocator); } -CpuCapabilities populate_capabilities_legacy(const CPUInfo &cpu_info) +cpuinfo::CpuIsaInfo populate_capabilities_flags(AclTargetCapabilities external_caps) { - CpuCapabilities caps; + cpuinfo::CpuIsaInfo isa_caps; // Extract SIMD extension - caps.neon = true; -#ifdef SVE2 - caps.sve2 = true; -#endif /* SVE2 */ + isa_caps.neon = external_caps & AclCpuCapabilitiesNeon; + isa_caps.sve = external_caps & AclCpuCapabilitiesSve; + isa_caps.sve2 = external_caps & AclCpuCapabilitiesSve2; + // Extract data-type support - caps.fp16 = cpu_info.has_fp16(); -#ifdef V8P6_BF - caps.bf16 = true; -#endif /* V8P6_BF */ + isa_caps.fp16 = external_caps & AclCpuCapabilitiesFp16; + isa_caps.bf16 = external_caps & AclCpuCapabilitiesBf16; + isa_caps.svebf16 = isa_caps.bf16; // Extract ISA extensions - caps.dot = cpu_info.has_dotprod(); -#ifdef MMLA_FP32 - caps.mmla_fp = true; -#endif /* MMLA_FP32 */ -#ifdef MMLA_INT8 - caps.mmla_int8 = true; -#endif /* MMLA_INT8 */ + isa_caps.dot = external_caps & AclCpuCapabilitiesDot; + isa_caps.i8mm = external_caps & AclCpuCapabilitiesMmlaInt8; + isa_caps.svef32mm = external_caps & AclCpuCapabilitiesMmlaFp; - return caps; + return isa_caps; } -CpuCapabilities populate_capabilities_flags(AclTargetCapabilities external_caps) +CpuCapabilities populate_capabilities(AclTargetCapabilities external_caps, int32_t max_threads) { CpuCapabilities caps; - // Extract SIMD extension - caps.neon = external_caps & AclCpuCapabilitiesNeon; - caps.sve = external_caps & AclCpuCapabilitiesSve; - caps.sve2 = external_caps & AclCpuCapabilitiesSve2; - // Extract data-type support - caps.fp16 = external_caps & AclCpuCapabilitiesFp16; - caps.bf16 = external_caps & AclCpuCapabilitiesBf16; - // Extract ISA extensions - caps.dot = external_caps & AclCpuCapabilitiesDot; - caps.mmla_fp = external_caps & AclCpuCapabilitiesMmlaFp; - caps.mmla_int8 = external_caps & AclCpuCapabilitiesMmlaInt8; - - return caps; -} - -CpuCapabilities populate_capabilities(AclTargetCapabilities external_caps, - int32_t max_threads) -{ - // Extract legacy structure - CPUInfo cpu_info; - - CpuCapabilities caps; - if(external_caps != AclCpuCapabilitiesAuto) + // Populate capabilities with system information + caps.cpu_info = cpuinfo::CpuInfo::build(); + if (external_caps != AclCpuCapabilitiesAuto) { - caps = populate_capabilities_flags(external_caps); - } - else - { - caps = populate_capabilities_legacy(cpu_info); + cpuinfo::CpuIsaInfo isa = populate_capabilities_flags(external_caps); + auto cpus = caps.cpu_info.cpus(); + + caps.cpu_info = cpuinfo::CpuInfo(isa, cpus); } // Set max number of threads @@ -165,11 +147,9 @@ CpuCapabilities populate_capabilities(AclTargetCapabilities external_caps, } // namespace CpuContext::CpuContext(const AclContextOptions *options) - : IContext(Target::Cpu), - _allocator(default_allocator), - _caps(populate_capabilities(AclCpuCapabilitiesAuto, -1)) + : IContext(Target::Cpu), _allocator(default_allocator), _caps(populate_capabilities(AclCpuCapabilitiesAuto, -1)) { - if(options != nullptr) + if (options != nullptr) { _allocator = populate_allocator(options->allocator); _caps = populate_capabilities(options->capabilities, options->max_compute_units); @@ -189,7 +169,7 @@ AllocatorWrapper &CpuContext::allocator() ITensorV2 *CpuContext::create_tensor(const AclTensorDescriptor &desc, bool allocate) { CpuTensor *tensor = new CpuTensor(this, desc); - if(tensor != nullptr && allocate) + if (tensor != nullptr && allocate) { tensor->allocate(); } |