From 1bd531dec0b4eb745fb8856d14c1aba2b8a73026 Mon Sep 17 00:00:00 2001 From: Tim Hall Date: Sun, 1 Nov 2020 20:59:36 +0000 Subject: MLBEDSW-3249: Vela config file examples - Added sample vela.ini config file - Changed vela config format, split into system config and memory mode - Removed unused CPU cycle performance estimation - Added new CLI options for --memory-mode and --verbose-config - Changed CLI option --config to take multiple files - Removed CLI option --global-memory-clock-scales - Changed error helper functions to raise a VelaError exception - Refactored to create a new is_spilling_enabled function Signed-off-by: Tim Hall Change-Id: I27c41577e37a3859edb9524cd99784be10ef0a0d --- ethosu/vela/architecture_features.py | 331 ++++++++++++++++++++++++----------- 1 file changed, 226 insertions(+), 105 deletions(-) (limited to 'ethosu/vela/architecture_features.py') diff --git a/ethosu/vela/architecture_features.py b/ethosu/vela/architecture_features.py index 9ca4304c..7b6c3bed 100644 --- a/ethosu/vela/architecture_features.py +++ b/ethosu/vela/architecture_features.py @@ -21,7 +21,8 @@ from configparser import ConfigParser import numpy as np -from .errors import OptionError +from .errors import CliOptionError +from .errors import ConfigOptionError from .ethos_u55_regs.ethos_u55_regs import resampling_mode from .numeric_util import full_shape from .numeric_util import round_up @@ -131,6 +132,12 @@ class Accelerator(enum.Enum): return [e.value for e in cls] +@enum.unique +class MemPort(enum.Enum): + Axi0 = enum.auto() + Axi1 = enum.auto() + + class ArchitectureFeatures: """This class is a container for various parameters of the Ethos-U core and system configuration that can be tuned, either by command line @@ -169,26 +176,29 @@ class ArchitectureFeatures: OFMSplitDepth = 16 SubKernelMax = Block(8, 8, 65536) + DEFAULT_CONFIG = "internal-default" + def __init__( self, - vela_config: ConfigParser, + vela_config_files, accelerator_config, system_config, + memory_mode, override_block_config, block_config_limit, - global_memory_clock_scale, max_blockdep, weight_estimation_scaling, + verbose_config, ): accelerator_config = accelerator_config.lower() - self.vela_config = vela_config if accelerator_config not in Accelerator.member_list(): - raise OptionError("--accelerator-config", self.accelerator_config, "Unknown accelerator configuration") + raise CliOptionError("--accelerator-config", self.accelerator_config, "Unknown accelerator configuration") self.accelerator_config = Accelerator(accelerator_config) accel_config = ArchitectureFeatures.accelerator_configs[self.accelerator_config] self.config = accel_config self.system_config = system_config + self.memory_mode = memory_mode self.is_ethos_u65_system = self.accelerator_config in (Accelerator.Ethos_U65_256, Accelerator.Ethos_U65_512) self.max_outstanding_dma = 2 if self.is_ethos_u65_system else 1 @@ -201,14 +211,6 @@ class ArchitectureFeatures: self.override_block_config = override_block_config self.block_config_limit = block_config_limit - self.global_memory_clock_scale = global_memory_clock_scale - if self.global_memory_clock_scale <= 0.0 or self.global_memory_clock_scale > 1.0: - raise Exception( - "Invalid global_memory_clock_scale = " - + str(self.global_memory_clock_scale) - + " (must be > 0.0 and <= 1.0)" - ) - self.max_blockdep = max_blockdep self.weight_estimation_scaling = weight_estimation_scaling @@ -220,20 +222,13 @@ class ArchitectureFeatures: self.num_elem_wise_units = accel_config.elem_units self.num_macs_per_cycle = dpu_min_height * dpu_min_width * dpu_dot_product_width * dpu_min_ofm_channels - self.memory_clock_scales = np.zeros(MemArea.Size) - self.memory_port_widths = np.zeros(MemArea.Size) - - # Get system configuration - self.__read_sys_config(self.is_ethos_u65_system) + # Get system configuration and memory mode + self._get_vela_config(vela_config_files, verbose_config) - # apply the global memory clock scales to the individual ones from the system config - for mem in MemArea.all(): - self.memory_clock_scales[mem] *= self.global_memory_clock_scale + self.axi_port_width = 128 if self.is_ethos_u65_system else 64 + self.memory_bandwidths_per_cycle = self.axi_port_width * self.memory_clock_scales / 8 - self.memory_clocks = self.memory_clock_scales * self.npu_clock - self.memory_bandwidths_per_cycle = self.memory_port_widths * self.memory_clock_scales / 8 - - self.memory_bandwidths_per_second = self.memory_bandwidths_per_cycle * self.npu_clock + self.memory_bandwidths_per_second = self.memory_bandwidths_per_cycle * self.core_clock # Get output/activation performance numbers self._generate_output_perf_tables(self.accelerator_config) @@ -303,7 +298,7 @@ class ArchitectureFeatures: self.cycles_weight = 40 self.max_sram_used_weight = 1000 - if self.is_ethos_u65_system and (self.fast_storage_mem_area != self.feature_map_storage_mem_area): + if self.is_spilling_enabled(): self.max_sram_used_weight = 0 # Shared Buffer Block allocations @@ -582,100 +577,226 @@ class ArchitectureFeatures: return blockdep - def cpu_cycle_estimate(self, op): + def is_spilling_enabled(self): """ - Gets estimated performance of a CPU operation, based on a linear model of intercept, slope, - specified in the vela config file, in ConfigParser file format (.ini file). - Example configuration snippet: - [CpuPerformance.MyOperationType] - Cortex-Mx.intercept= - Cortex-Mx.slope= + Spilling is a feature that allows the Ethos-U to use a dedicated SRAM as a cache for various types of data """ - section = "CpuPerformance." + op.type.name - if self.vela_config is not None and section in self.vela_config: - op_config = self.vela_config[section] - try: - intercept = float(op_config.get(self.cpu_config + ".intercept", op_config["default.intercept"])) - slope = float(op_config.get(self.cpu_config + ".slope", op_config["default.slope"])) - n_elements = op.inputs[0].elements() - cycles = intercept + n_elements * slope - return cycles - except Exception: - print("Error: Reading CPU cycle estimate in vela configuration file, section {}".format(section)) - raise - - print("Warning: No configured CPU performance estimate for", op.type) - return 0 - - def __read_sys_config(self, is_ethos_u65_system): + return ( + self._mem_port_mapping(self.cache_mem_area) == MemArea.Sram and self.cache_mem_area != self.arena_mem_area + ) + + def _mem_port_mapping(self, mem_port): + mem_port_mapping = {MemPort.Axi0: self.axi0_port, MemPort.Axi1: self.axi1_port} + return mem_port_mapping[mem_port] + + def _set_default_sys_config(self): + print(f"Warning: Using {ArchitectureFeatures.DEFAULT_CONFIG} values for system configuration") + # ArchitectureFeatures.DEFAULT_CONFIG values + if self.is_ethos_u65_system: + # Default Ethos-U65 system configuration + # Ethos-U65 Client-Server: SRAM (16 GB/s) and DRAM (12 GB/s) + self.core_clock = 1e9 + self.axi0_port = MemArea.Sram + self.axi1_port = MemArea.Dram + self.memory_clock_scales[MemArea.Sram] = 1.0 + self.memory_clock_scales[MemArea.Dram] = 0.75 # 3 / 4 + else: + # Default Ethos-U55 system configuration + # Ethos-U55 High-End Embedded: SRAM (4 GB/s) and Flash (0.5 GB/s) + self.core_clock = 500e6 + self.axi0_port = MemArea.Sram + self.axi1_port = MemArea.OffChipFlash + self.memory_clock_scales[MemArea.Sram] = 1.0 + self.memory_clock_scales[MemArea.OffChipFlash] = 0.125 # 1 / 8 + + def _set_default_mem_mode(self): + print(f"Warning: Using {ArchitectureFeatures.DEFAULT_CONFIG} values for memory mode") + # ArchitectureFeatures.DEFAULT_CONFIG values + if self.is_ethos_u65_system: + # Default Ethos-U65 memory mode + # Dedicated SRAM: SRAM is only used by the Ethos-U + self.const_mem_area = MemPort.Axi1 + self.arena_mem_area = MemPort.Axi1 + self.cache_mem_area = MemPort.Axi0 + self.cache_sram_size = 384 * 1024 + else: + # Default Ethos-U65 memory mode + self.const_mem_area = MemPort.Axi1 + self.arena_mem_area = MemPort.Axi0 + self.cache_mem_area = MemPort.Axi0 + + def _get_vela_config(self, vela_config_files, verbose_config): """ - Gets the system configuration with the given name from the vela configuration file - Example configuration snippet: - [SysConfig.MyConfigName] - npu_freq= - cpu=Cortex-Mx - ... + Gets the system configuration and memory modes from one or more Vela configuration file(s) or uses some + defaults. """ - # Get system configuration from the vela configuration file - if self.vela_config is None: - print("Warning: Using default values for system configuration") - else: - section_key = "SysConfig." + self.system_config - if section_key not in self.vela_config: - raise OptionError("--system-config", self.system_config, "Unknown system configuration") - try: - self.npu_clock = float(self.__sys_config("npu_freq", "500e6")) - self.cpu_config = self.__sys_config("cpu", "Cortex-M7") + # all properties are optional and are initialised to a value of 1 (or the equivalent) + self.core_clock = 1 + self.axi0_port = MemArea(1) + self.axi1_port = MemArea(1) + self.memory_clock_scales = np.ones(MemArea.Size) + self.const_mem_area = MemPort(1) + self.arena_mem_area = MemPort(1) + self.cache_mem_area = MemPort(1) + self.cache_sram_size = 1 + + # read configuration file(s) + self.vela_config = None + + if vela_config_files is not None: + self.vela_config = ConfigParser() + self.vela_config.read(vela_config_files) + + # read system configuration + sys_cfg_section = "System_Config." + self.system_config + + if self.vela_config is not None and self.vela_config.has_section(sys_cfg_section): + self.core_clock = float(self._read_config(sys_cfg_section, "core_clock", self.core_clock)) + self.axi0_port = MemArea[self._read_config(sys_cfg_section, "axi0_port", self.axi0_port)] + self.axi1_port = MemArea[self._read_config(sys_cfg_section, "axi1_port", self.axi1_port)] + + for mem_area in (self.axi0_port, self.axi1_port): + self.memory_clock_scales[mem_area] = float( + self._read_config( + sys_cfg_section, mem_area.name + "_clock_scale", self.memory_clock_scales[mem_area] + ) + ) - self.memory_clock_scales[MemArea.Sram] = float(self.__sys_config("Sram_clock_scale", "1")) - self.memory_port_widths[MemArea.Sram] = int(self.__sys_config("Sram_port_width", "64")) + elif self.system_config == ArchitectureFeatures.DEFAULT_CONFIG: + self._set_default_sys_config() - self.memory_clock_scales[MemArea.OnChipFlash] = float(self.__sys_config("OnChipFlash_clock_scale", "1")) - self.memory_port_widths[MemArea.OnChipFlash] = int(self.__sys_config("OnChipFlash_port_width", "64")) + elif vela_config_files is None: + raise CliOptionError("--config", vela_config_files, "CLI Option not specified") - self.memory_clock_scales[MemArea.OffChipFlash] = float( - self.__sys_config("OffChipFlash_clock_scale", "0.25") + else: + raise CliOptionError( + "--system-config", + self.system_config, + "Section {} not found in Vela config file".format(sys_cfg_section), ) - self.memory_port_widths[MemArea.OffChipFlash] = int(self.__sys_config("OffChipFlash_port_width", "32")) - self.memory_clock_scales[MemArea.Dram] = float(self.__sys_config("Dram_clock_scale", "1")) - self.memory_port_widths[MemArea.Dram] = int(self.__sys_config("Dram_port_width", "32")) + # read the memory mode + mem_mode_section = "Memory_Mode." + self.memory_mode - self.fast_storage_mem_area = MemArea[self.__sys_config("fast_storage_mem_area", "Sram")] - self.feature_map_storage_mem_area = MemArea[self.__sys_config("feature_map_storage_mem_area", "Sram")] + if self.vela_config is not None and self.vela_config.has_section(mem_mode_section): + self.const_mem_area = MemPort[ + self._read_config(mem_mode_section, "const_mem_area", self.const_mem_area.name) + ] + self.arena_mem_area = MemPort[ + self._read_config(mem_mode_section, "arena_mem_area", self.arena_mem_area.name) + ] + self.cache_mem_area = MemPort[ + self._read_config(mem_mode_section, "cache_mem_area", self.cache_mem_area.name) + ] + self.cache_sram_size = int(self._read_config(mem_mode_section, "cache_sram_size", self.cache_sram_size)) - self.permanent_storage_mem_area = MemArea[self.__sys_config("permanent_storage_mem_area", "OffChipFlash")] - if is_ethos_u65_system: - if self.permanent_storage_mem_area is not MemArea.Dram: - raise Exception( - "Invalid permanent_storage_mem_area = " - + str(self.permanent_storage_mem_area) - + " (must be 'DRAM' for Ethos-U65)." - ) - else: - if self.permanent_storage_mem_area not in set((MemArea.OnChipFlash, MemArea.OffChipFlash)): - raise Exception( - "Invalid permanent_storage_mem_area = " - + str(self.permanent_storage_mem_area) - + " (must be 'OnChipFlash' or 'OffChipFlash' for Ethos-U55)." - " To store the weights and other constant data in SRAM on Ethos-U55 select 'OnChipFlash'" - ) + elif self.memory_mode == ArchitectureFeatures.DEFAULT_CONFIG: + self._set_default_mem_mode() + + elif vela_config_files is None: + raise CliOptionError("--config", vela_config_files, "CLI Option not specified") + + else: + raise CliOptionError( + "--memory-mode", self.memory_mode, "Section {} not found in Vela config file".format(mem_mode_section), + ) - self.sram_size = 1024 * int(self.__sys_config("sram_size_kb", "204800")) + # override sram to onchipflash + if self._mem_port_mapping(self.const_mem_area) == MemArea.Sram: + if self.const_mem_area == self.arena_mem_area == self.cache_mem_area: + print( + "Info: Changing const_mem_area from Sram to OnChipFlash. This will use the same characteristics as" + " Sram." + ) + if self.const_mem_area == MemPort.Axi0: + self.const_mem_area = MemPort.Axi1 + self.axi1_port = MemArea.OnChipFlash + else: + self.const_mem_area = MemPort.Axi0 + self.axi0_port = MemArea.OnChipFlash + self.memory_clock_scales[MemArea.OnChipFlash] = self.memory_clock_scales[MemArea.Sram] + + # check configuration + if self._mem_port_mapping(self.cache_mem_area) != MemArea.Sram: + raise ConfigOptionError("cache_mem_area", self._mem_port_mapping(self.cache_mem_area).name, "Sram") + + if self.is_ethos_u65_system: + if self._mem_port_mapping(self.const_mem_area) not in ( + MemArea.Dram, + MemArea.OnChipFlash, + MemArea.OffChipFlash, + ): + raise ConfigOptionError( + "const_mem_area", + self._mem_port_mapping(self.const_mem_area).name, + "Dram or OnChipFlash or OffChipFlash", + ) - except Exception: - print("Error: Reading System Configuration in vela configuration file, section {}".format(section_key)) - raise + if self._mem_port_mapping(self.arena_mem_area) not in (MemArea.Sram, MemArea.Dram): + raise ConfigOptionError( + "arena_mem_area", self._mem_port_mapping(self.arena_mem_area).name, "Sram or Dram" + ) + else: + if self._mem_port_mapping(self.const_mem_area) not in (MemArea.OnChipFlash, MemArea.OffChipFlash): + raise ConfigOptionError( + "const_mem_area", self._mem_port_mapping(self.const_mem_area).name, "OnChipFlash or OffChipFlash" + ) - def __sys_config(self, key, default_value): + if self._mem_port_mapping(self.arena_mem_area) != MemArea.Sram: + raise ConfigOptionError("arena_mem_area", self._mem_port_mapping(self.arena_mem_area).name, "Sram") + + # assign existing memory areas + self.permanent_storage_mem_area = self._mem_port_mapping(self.const_mem_area) + self.feature_map_storage_mem_area = self._mem_port_mapping(self.arena_mem_area) + self.fast_storage_mem_area = self._mem_port_mapping(self.cache_mem_area) + + self.sram_size = self.cache_sram_size if self.is_spilling_enabled() else 9999 * 1024 * 1024 + + # display the system configuration and memory mode + if verbose_config: + print(f"System Configuration ({self.system_config}):") + print(f" core_clock = {self.core_clock}") + print(f" axi0_port = {self.axi0_port.name}") + print(f" axi1_port = {self.axi1_port.name}") + for mem in (MemArea.Sram, MemArea.Dram, MemArea.OnChipFlash, MemArea.OffChipFlash): + print(f" {mem.name}_clock_scales = {self.memory_clock_scales[mem]}") + + print(f"Memory Mode ({self.memory_mode}):") + print(f" const_mem_area = {self.const_mem_area.name}") + print(f" arena_mem_area = {self.arena_mem_area.name}") + print(f" cache_mem_area = {self.cache_mem_area.name}") + print(f" cache_sram_size = {self.cache_sram_size}") + + print("Architecture Settings:") + print(f" permanent_storage_mem_area = {self.permanent_storage_mem_area.name}") + print(f" feature_map_storage_mem_area = {self.feature_map_storage_mem_area.name}") + print(f" fast_storage_mem_area = {self.fast_storage_mem_area.name}") + print(f" sram_size = {self.sram_size}") + + def _read_config(self, section, key, current_value): """ - Gets the system configuration value with the given key from the vela config file. + Reads a given key from a particular section in the Vela config file. If the section contains the 'inherit' + option then we recurse into the section specified. If inherited sections result in multiple keys for a + particular option then the key from the parent section is used, regardless of the parsing order """ - if self.vela_config is None: - return default_value - section = "SysConfig." + self.system_config - result = self.vela_config[section].get(key, None) - if result is None: - raise Exception("Error: System Configuration Missing key {} in section [{}] ".format(key, section)) + if not self.vela_config.has_section(section): + raise ConfigOptionError( + "section", "{}. The section was not found in the Vela config file(s)".format(section) + ) + + result = str(current_value) + if self.vela_config.has_option(section, "inherit"): + inheritance_section = self.vela_config.get(section, "inherit") + # check for recursion loop + if inheritance_section == section: + raise ConfigOptionError( + "inherit", + "{}. This references its own section and recursion is not allowed".format(inheritance_section), + ) + result = self._read_config(inheritance_section, key, result) + + if self.vela_config.has_option(section, key): + result = self.vela_config.get(section, key) + return result -- cgit v1.2.1