From eb82fd2aa786715c3b6a941dc6d6deac4ce8e2a0 Mon Sep 17 00:00:00 2001 From: Pablo Tello Date: Fri, 23 Feb 2018 13:43:50 +0000 Subject: COMPMID-881: RSH new arm_gemm interface. Change-Id: I1e2a1a77097d8017c274af3f97eba6964f80f5fa Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/122592 Tested-by: Jenkins Reviewed-by: Anthony Barbier --- .../core/NEON/kernels/assembly/newgemm_lib.hpp | 410 +++++++++++++++++++++ 1 file changed, 410 insertions(+) create mode 100644 arm_compute/core/NEON/kernels/assembly/newgemm_lib.hpp (limited to 'arm_compute/core/NEON/kernels/assembly/newgemm_lib.hpp') diff --git a/arm_compute/core/NEON/kernels/assembly/newgemm_lib.hpp b/arm_compute/core/NEON/kernels/assembly/newgemm_lib.hpp new file mode 100644 index 0000000000..b7cc3d773b --- /dev/null +++ b/arm_compute/core/NEON/kernels/assembly/newgemm_lib.hpp @@ -0,0 +1,410 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +extern int l1_cache_size; +extern int l2_cache_size; +extern int force_cpu; + +#ifdef __ANDROID__ +inline unsigned long stoul( const std::string& str, std::size_t* pos = 0, int base = 10 ) +{ + char *end; + const unsigned long ret = strtoul( str.c_str(), &end, base); + *pos = end - str.c_str(); + return ret; +} +inline int stoi( const std::string& str, std::size_t* pos = 0, int base = 10 ) +{ + return atoi(str.c_str()); +} +#endif + + +#ifndef BARE_METAL +#include + +/* Get HWCAP bits from asm/hwcap.h */ +#include +#endif /* !BARE_METAL */ + +/* Make sure the bits we care about are defined, just in case asm/hwcap.h is + * out of date (or for bare metal mode) */ +#ifndef HWCAP_ASIMDHP +#define HWCAP_ASIMDHP (1 << 10) +#endif + +#ifndef HWCAP_CPUID +#define HWCAP_CPUID (1 << 11) +#endif + +#ifndef HWCAP_ASIMDDP +#define HWCAP_ASIMDDP (1 << 20) +#endif + +#define CPUINFO_HACK + +//unsigned int get_cpu_impl(); + + +/* CPU models - we only need to detect CPUs we have + * microarchitecture-specific code for. + * + * Architecture features are detected via HWCAPs. + */ +enum class CPUModel { + GENERIC = 0x0001, + A53 = 0x0010, + A55r0 = 0x0011, + A55r1 = 0x0012, +}; + +class CPUInfo +{ +private: + struct PerCPUData { + CPUModel model = CPUModel::GENERIC; + uint32_t midr = 0; + bool model_set = false; + }; + + std::vector _percpu={}; + + bool _cpuid = false; + bool _fp16 = false; + bool _dotprod = false; + + unsigned int L1_cache_size = 32768; + unsigned int L2_cache_size = 262144; + + /* Convert an MIDR register value to a CPUModel enum value. */ + CPUModel midr_to_model(const unsigned int midr) const { + CPUModel model; + + // Unpack variant and CPU ID + int variant = (midr >> 20) & 0xF; + int cpunum = (midr >> 4) & 0xFFF; + + /* Only CPUs we have code paths for are detected. All other CPUs + * can be safely classed as "GENERIC" + */ + + switch(cpunum) { + case 0xd03: + model = CPUModel::A53; + break; + + case 0xd05: + if (variant) { + model = CPUModel::A55r1; + } else { + model = CPUModel::A55r0; + } + break; + + default: + model = CPUModel::GENERIC; + break; + } + + return model; + } + + /* If the CPUID capability is present, MIDR information is provided in + /sys. Use that to populate the CPU model table. */ + void populate_models_cpuid() { + for (unsigned long int i=0; i<_percpu.size(); i++) { + std::stringstream str; + str << "/sys/devices/system/cpu/cpu" << i << "/regs/identification/midr_el1"; + std::ifstream file; + + file.open(str.str(), std::ios::in); + + if (file.is_open()) { + std::string line; + + if (bool(getline(file, line))) { + const unsigned long midr = stoul(line, nullptr, 16); + + _percpu[i].midr = (midr & 0xffffffff); + _percpu[i].model = midr_to_model(_percpu[i].midr); + _percpu[i].model_set = true; + } + } + } + } + + /* If "long-form" cpuinfo is present, parse that to populate models. */ + void populate_models_cpuinfo() { + std::regex proc_regex("^processor.*(\\d+)$"); + std::regex imp_regex("^CPU implementer.*0x(..)$"); + std::regex var_regex("^CPU variant.*0x(.)$"); + std::regex part_regex("^CPU part.*0x(...)$"); + std::regex rev_regex("^CPU revision.*(\\d+)$"); + + std::ifstream file; + file.open("/proc/cpuinfo", std::ios::in); + + if (file.is_open()) { + std::string line; + int midr=0; + int curcpu=-1; + + while(bool(getline(file, line))) { + std::smatch match; + + if (std::regex_match(line, match, proc_regex)) { + std::string id = match[1]; + int newcpu=stoi(id, nullptr, 0); + + if (curcpu >= 0 && midr==0) { + // Matched a new CPU ID without any description of the previous one - looks like old format. + return; + } + + if (curcpu >= 0) { + _percpu[curcpu].midr = midr; + _percpu[curcpu].model = midr_to_model(midr); + _percpu[curcpu].model_set = true; + + printf("CPU %d: %x\n",curcpu,midr); + } + + midr=0; + curcpu=newcpu; + + continue; + } + + if (std::regex_match(line, match, imp_regex)) { + int impv = stoi(match[1], nullptr, 16); + midr |= (impv << 24); + continue; + } + + if (std::regex_match(line, match, var_regex)) { + int varv = stoi(match[1], nullptr, 16); + midr |= (varv << 16); + continue; + } + + if (std::regex_match(line, match, part_regex)) { + int partv = stoi(match[1], nullptr, 16); + midr |= (partv << 4); + continue; + } + + if (std::regex_match(line, match, rev_regex)) { + int regv = stoi(match[1], nullptr, 10); + midr |= (regv); + midr |= (0xf << 16); + continue; + } + } + + if (curcpu >= 0) { + _percpu[curcpu].midr = midr; + _percpu[curcpu].model = midr_to_model(midr); + _percpu[curcpu].model_set = true; + + printf("CPU %d: %x\n",curcpu,midr); + } + } + } + + /* Identify the maximum valid CPUID in the system. This reads + * /sys/devices/system/cpu/present to get the information. */ + int get_max_cpus() { + int max_cpus = 1; + +#ifndef BARE_METAL + std::ifstream CPUspresent; + CPUspresent.open("/sys/devices/system/cpu/present", std::ios::in); + bool success = false; + + if (CPUspresent.is_open()) { + std::string line; + + if (bool(getline(CPUspresent, line))) { + /* The content of this file is a list of ranges or single values, e.g. + * 0-5, or 1-3,5,7 or similar. As we are interested in the + * max valid ID, we just need to find the last valid + * delimiter ('-' or ',') and parse the integer immediately after that. + */ + auto startfrom=line.begin(); + + for (auto i=line.begin(); i cpuid) { + _percpu[cpuid].model = model; + _percpu[cpuid].model_set = true; + } + } + + bool has_fp16() const { + return _fp16; + } + + bool has_dotprod() const { + return _dotprod; + } + + CPUModel get_cpu_model(unsigned long cpuid) const { + if (cpuid < _percpu.size()) { + return _percpu[cpuid].model; + } + + return CPUModel::GENERIC; + } + + CPUModel get_cpu_model() const { +#ifdef BARE_METAL + return get_cpu_model(0); +#else + return get_cpu_model(sched_getcpu()); +#endif + } + + unsigned int get_L1_cache_size() const { + return L1_cache_size; + } + + void set_L1_cache_size(unsigned int size) { + L1_cache_size = size; + } + + unsigned int get_L2_cache_size() const { + return L2_cache_size; + } + + void set_L2_cache_size(unsigned int size) { + L2_cache_size = size; + } +}; + +CPUInfo *get_CPUInfo(); -- cgit v1.2.1