aboutsummaryrefslogtreecommitdiff
path: root/ethosu
diff options
context:
space:
mode:
authorTim Hall <tim.hall@arm.com>2020-11-01 20:59:36 +0000
committerTim Hall <tim.hall@arm.com>2020-11-20 12:55:47 +0000
commit1bd531dec0b4eb745fb8856d14c1aba2b8a73026 (patch)
treea0265a0accd2395277fe88be27164d09541abc7f /ethosu
parentc8a73868d40cf63380f634baeb51aa7aa993fc0c (diff)
downloadethos-u-vela-1bd531dec0b4eb745fb8856d14c1aba2b8a73026.tar.gz
MLBEDSW-3249: Vela config file examples
- Added sample vela.ini config file - Changed vela config format, split into system config and memory mode - Removed unused CPU cycle performance estimation - Added new CLI options for --memory-mode and --verbose-config - Changed CLI option --config to take multiple files - Removed CLI option --global-memory-clock-scales - Changed error helper functions to raise a VelaError exception - Refactored to create a new is_spilling_enabled function Signed-off-by: Tim Hall <tim.hall@arm.com> Change-Id: I27c41577e37a3859edb9524cd99784be10ef0a0d
Diffstat (limited to 'ethosu')
-rw-r--r--ethosu/vela/architecture_features.py331
-rw-r--r--ethosu/vela/compiler_driver.py35
-rw-r--r--ethosu/vela/errors.py59
-rw-r--r--ethosu/vela/high_level_command_to_npu_op.py23
-rw-r--r--ethosu/vela/npu_performance.py32
-rw-r--r--ethosu/vela/register_command_stream_generator.py7
-rw-r--r--ethosu/vela/scheduler.py8
-rw-r--r--ethosu/vela/stats_writer.py22
-rw-r--r--ethosu/vela/test/testutil.py7
-rw-r--r--ethosu/vela/vela.py52
10 files changed, 351 insertions, 225 deletions
diff --git a/ethosu/vela/architecture_features.py b/ethosu/vela/architecture_features.py
index 9ca4304..7b6c3be 100644
--- a/ethosu/vela/architecture_features.py
+++ b/ethosu/vela/architecture_features.py
@@ -21,7 +21,8 @@ from configparser import ConfigParser
import numpy as np
-from .errors import OptionError
+from .errors import CliOptionError
+from .errors import ConfigOptionError
from .ethos_u55_regs.ethos_u55_regs import resampling_mode
from .numeric_util import full_shape
from .numeric_util import round_up
@@ -131,6 +132,12 @@ class Accelerator(enum.Enum):
return [e.value for e in cls]
+@enum.unique
+class MemPort(enum.Enum):
+ Axi0 = enum.auto()
+ Axi1 = enum.auto()
+
+
class ArchitectureFeatures:
"""This class is a container for various parameters of the Ethos-U core
and system configuration that can be tuned, either by command line
@@ -169,26 +176,29 @@ class ArchitectureFeatures:
OFMSplitDepth = 16
SubKernelMax = Block(8, 8, 65536)
+ DEFAULT_CONFIG = "internal-default"
+
def __init__(
self,
- vela_config: ConfigParser,
+ vela_config_files,
accelerator_config,
system_config,
+ memory_mode,
override_block_config,
block_config_limit,
- global_memory_clock_scale,
max_blockdep,
weight_estimation_scaling,
+ verbose_config,
):
accelerator_config = accelerator_config.lower()
- self.vela_config = vela_config
if accelerator_config not in Accelerator.member_list():
- raise OptionError("--accelerator-config", self.accelerator_config, "Unknown accelerator configuration")
+ raise CliOptionError("--accelerator-config", self.accelerator_config, "Unknown accelerator configuration")
self.accelerator_config = Accelerator(accelerator_config)
accel_config = ArchitectureFeatures.accelerator_configs[self.accelerator_config]
self.config = accel_config
self.system_config = system_config
+ self.memory_mode = memory_mode
self.is_ethos_u65_system = self.accelerator_config in (Accelerator.Ethos_U65_256, Accelerator.Ethos_U65_512)
self.max_outstanding_dma = 2 if self.is_ethos_u65_system else 1
@@ -201,14 +211,6 @@ class ArchitectureFeatures:
self.override_block_config = override_block_config
self.block_config_limit = block_config_limit
- self.global_memory_clock_scale = global_memory_clock_scale
- if self.global_memory_clock_scale <= 0.0 or self.global_memory_clock_scale > 1.0:
- raise Exception(
- "Invalid global_memory_clock_scale = "
- + str(self.global_memory_clock_scale)
- + " (must be > 0.0 and <= 1.0)"
- )
-
self.max_blockdep = max_blockdep
self.weight_estimation_scaling = weight_estimation_scaling
@@ -220,20 +222,13 @@ class ArchitectureFeatures:
self.num_elem_wise_units = accel_config.elem_units
self.num_macs_per_cycle = dpu_min_height * dpu_min_width * dpu_dot_product_width * dpu_min_ofm_channels
- self.memory_clock_scales = np.zeros(MemArea.Size)
- self.memory_port_widths = np.zeros(MemArea.Size)
-
- # Get system configuration
- self.__read_sys_config(self.is_ethos_u65_system)
+ # Get system configuration and memory mode
+ self._get_vela_config(vela_config_files, verbose_config)
- # apply the global memory clock scales to the individual ones from the system config
- for mem in MemArea.all():
- self.memory_clock_scales[mem] *= self.global_memory_clock_scale
+ self.axi_port_width = 128 if self.is_ethos_u65_system else 64
+ self.memory_bandwidths_per_cycle = self.axi_port_width * self.memory_clock_scales / 8
- self.memory_clocks = self.memory_clock_scales * self.npu_clock
- self.memory_bandwidths_per_cycle = self.memory_port_widths * self.memory_clock_scales / 8
-
- self.memory_bandwidths_per_second = self.memory_bandwidths_per_cycle * self.npu_clock
+ self.memory_bandwidths_per_second = self.memory_bandwidths_per_cycle * self.core_clock
# Get output/activation performance numbers
self._generate_output_perf_tables(self.accelerator_config)
@@ -303,7 +298,7 @@ class ArchitectureFeatures:
self.cycles_weight = 40
self.max_sram_used_weight = 1000
- if self.is_ethos_u65_system and (self.fast_storage_mem_area != self.feature_map_storage_mem_area):
+ if self.is_spilling_enabled():
self.max_sram_used_weight = 0
# Shared Buffer Block allocations
@@ -582,100 +577,226 @@ class ArchitectureFeatures:
return blockdep
- def cpu_cycle_estimate(self, op):
+ def is_spilling_enabled(self):
"""
- Gets estimated performance of a CPU operation, based on a linear model of intercept, slope,
- specified in the vela config file, in ConfigParser file format (.ini file).
- Example configuration snippet:
- [CpuPerformance.MyOperationType]
- Cortex-Mx.intercept=<some float value>
- Cortex-Mx.slope=<some float value>
+ Spilling is a feature that allows the Ethos-U to use a dedicated SRAM as a cache for various types of data
"""
- section = "CpuPerformance." + op.type.name
- if self.vela_config is not None and section in self.vela_config:
- op_config = self.vela_config[section]
- try:
- intercept = float(op_config.get(self.cpu_config + ".intercept", op_config["default.intercept"]))
- slope = float(op_config.get(self.cpu_config + ".slope", op_config["default.slope"]))
- n_elements = op.inputs[0].elements()
- cycles = intercept + n_elements * slope
- return cycles
- except Exception:
- print("Error: Reading CPU cycle estimate in vela configuration file, section {}".format(section))
- raise
-
- print("Warning: No configured CPU performance estimate for", op.type)
- return 0
-
- def __read_sys_config(self, is_ethos_u65_system):
+ return (
+ self._mem_port_mapping(self.cache_mem_area) == MemArea.Sram and self.cache_mem_area != self.arena_mem_area
+ )
+
+ def _mem_port_mapping(self, mem_port):
+ mem_port_mapping = {MemPort.Axi0: self.axi0_port, MemPort.Axi1: self.axi1_port}
+ return mem_port_mapping[mem_port]
+
+ def _set_default_sys_config(self):
+ print(f"Warning: Using {ArchitectureFeatures.DEFAULT_CONFIG} values for system configuration")
+ # ArchitectureFeatures.DEFAULT_CONFIG values
+ if self.is_ethos_u65_system:
+ # Default Ethos-U65 system configuration
+ # Ethos-U65 Client-Server: SRAM (16 GB/s) and DRAM (12 GB/s)
+ self.core_clock = 1e9
+ self.axi0_port = MemArea.Sram
+ self.axi1_port = MemArea.Dram
+ self.memory_clock_scales[MemArea.Sram] = 1.0
+ self.memory_clock_scales[MemArea.Dram] = 0.75 # 3 / 4
+ else:
+ # Default Ethos-U55 system configuration
+ # Ethos-U55 High-End Embedded: SRAM (4 GB/s) and Flash (0.5 GB/s)
+ self.core_clock = 500e6
+ self.axi0_port = MemArea.Sram
+ self.axi1_port = MemArea.OffChipFlash
+ self.memory_clock_scales[MemArea.Sram] = 1.0
+ self.memory_clock_scales[MemArea.OffChipFlash] = 0.125 # 1 / 8
+
+ def _set_default_mem_mode(self):
+ print(f"Warning: Using {ArchitectureFeatures.DEFAULT_CONFIG} values for memory mode")
+ # ArchitectureFeatures.DEFAULT_CONFIG values
+ if self.is_ethos_u65_system:
+ # Default Ethos-U65 memory mode
+ # Dedicated SRAM: SRAM is only used by the Ethos-U
+ self.const_mem_area = MemPort.Axi1
+ self.arena_mem_area = MemPort.Axi1
+ self.cache_mem_area = MemPort.Axi0
+ self.cache_sram_size = 384 * 1024
+ else:
+ # Default Ethos-U65 memory mode
+ self.const_mem_area = MemPort.Axi1
+ self.arena_mem_area = MemPort.Axi0
+ self.cache_mem_area = MemPort.Axi0
+
+ def _get_vela_config(self, vela_config_files, verbose_config):
"""
- Gets the system configuration with the given name from the vela configuration file
- Example configuration snippet:
- [SysConfig.MyConfigName]
- npu_freq=<some float value>
- cpu=Cortex-Mx
- ...
+ Gets the system configuration and memory modes from one or more Vela configuration file(s) or uses some
+ defaults.
"""
- # Get system configuration from the vela configuration file
- if self.vela_config is None:
- print("Warning: Using default values for system configuration")
- else:
- section_key = "SysConfig." + self.system_config
- if section_key not in self.vela_config:
- raise OptionError("--system-config", self.system_config, "Unknown system configuration")
- try:
- self.npu_clock = float(self.__sys_config("npu_freq", "500e6"))
- self.cpu_config = self.__sys_config("cpu", "Cortex-M7")
+ # all properties are optional and are initialised to a value of 1 (or the equivalent)
+ self.core_clock = 1
+ self.axi0_port = MemArea(1)
+ self.axi1_port = MemArea(1)
+ self.memory_clock_scales = np.ones(MemArea.Size)
+ self.const_mem_area = MemPort(1)
+ self.arena_mem_area = MemPort(1)
+ self.cache_mem_area = MemPort(1)
+ self.cache_sram_size = 1
+
+ # read configuration file(s)
+ self.vela_config = None
+
+ if vela_config_files is not None:
+ self.vela_config = ConfigParser()
+ self.vela_config.read(vela_config_files)
+
+ # read system configuration
+ sys_cfg_section = "System_Config." + self.system_config
+
+ if self.vela_config is not None and self.vela_config.has_section(sys_cfg_section):
+ self.core_clock = float(self._read_config(sys_cfg_section, "core_clock", self.core_clock))
+ self.axi0_port = MemArea[self._read_config(sys_cfg_section, "axi0_port", self.axi0_port)]
+ self.axi1_port = MemArea[self._read_config(sys_cfg_section, "axi1_port", self.axi1_port)]
+
+ for mem_area in (self.axi0_port, self.axi1_port):
+ self.memory_clock_scales[mem_area] = float(
+ self._read_config(
+ sys_cfg_section, mem_area.name + "_clock_scale", self.memory_clock_scales[mem_area]
+ )
+ )
- self.memory_clock_scales[MemArea.Sram] = float(self.__sys_config("Sram_clock_scale", "1"))
- self.memory_port_widths[MemArea.Sram] = int(self.__sys_config("Sram_port_width", "64"))
+ elif self.system_config == ArchitectureFeatures.DEFAULT_CONFIG:
+ self._set_default_sys_config()
- self.memory_clock_scales[MemArea.OnChipFlash] = float(self.__sys_config("OnChipFlash_clock_scale", "1"))
- self.memory_port_widths[MemArea.OnChipFlash] = int(self.__sys_config("OnChipFlash_port_width", "64"))
+ elif vela_config_files is None:
+ raise CliOptionError("--config", vela_config_files, "CLI Option not specified")
- self.memory_clock_scales[MemArea.OffChipFlash] = float(
- self.__sys_config("OffChipFlash_clock_scale", "0.25")
+ else:
+ raise CliOptionError(
+ "--system-config",
+ self.system_config,
+ "Section {} not found in Vela config file".format(sys_cfg_section),
)
- self.memory_port_widths[MemArea.OffChipFlash] = int(self.__sys_config("OffChipFlash_port_width", "32"))
- self.memory_clock_scales[MemArea.Dram] = float(self.__sys_config("Dram_clock_scale", "1"))
- self.memory_port_widths[MemArea.Dram] = int(self.__sys_config("Dram_port_width", "32"))
+ # read the memory mode
+ mem_mode_section = "Memory_Mode." + self.memory_mode
- self.fast_storage_mem_area = MemArea[self.__sys_config("fast_storage_mem_area", "Sram")]
- self.feature_map_storage_mem_area = MemArea[self.__sys_config("feature_map_storage_mem_area", "Sram")]
+ if self.vela_config is not None and self.vela_config.has_section(mem_mode_section):
+ self.const_mem_area = MemPort[
+ self._read_config(mem_mode_section, "const_mem_area", self.const_mem_area.name)
+ ]
+ self.arena_mem_area = MemPort[
+ self._read_config(mem_mode_section, "arena_mem_area", self.arena_mem_area.name)
+ ]
+ self.cache_mem_area = MemPort[
+ self._read_config(mem_mode_section, "cache_mem_area", self.cache_mem_area.name)
+ ]
+ self.cache_sram_size = int(self._read_config(mem_mode_section, "cache_sram_size", self.cache_sram_size))
- self.permanent_storage_mem_area = MemArea[self.__sys_config("permanent_storage_mem_area", "OffChipFlash")]
- if is_ethos_u65_system:
- if self.permanent_storage_mem_area is not MemArea.Dram:
- raise Exception(
- "Invalid permanent_storage_mem_area = "
- + str(self.permanent_storage_mem_area)
- + " (must be 'DRAM' for Ethos-U65)."
- )
- else:
- if self.permanent_storage_mem_area not in set((MemArea.OnChipFlash, MemArea.OffChipFlash)):
- raise Exception(
- "Invalid permanent_storage_mem_area = "
- + str(self.permanent_storage_mem_area)
- + " (must be 'OnChipFlash' or 'OffChipFlash' for Ethos-U55)."
- " To store the weights and other constant data in SRAM on Ethos-U55 select 'OnChipFlash'"
- )
+ elif self.memory_mode == ArchitectureFeatures.DEFAULT_CONFIG:
+ self._set_default_mem_mode()
+
+ elif vela_config_files is None:
+ raise CliOptionError("--config", vela_config_files, "CLI Option not specified")
+
+ else:
+ raise CliOptionError(
+ "--memory-mode", self.memory_mode, "Section {} not found in Vela config file".format(mem_mode_section),
+ )
- self.sram_size = 1024 * int(self.__sys_config("sram_size_kb", "204800"))
+ # override sram to onchipflash
+ if self._mem_port_mapping(self.const_mem_area) == MemArea.Sram:
+ if self.const_mem_area == self.arena_mem_area == self.cache_mem_area:
+ print(
+ "Info: Changing const_mem_area from Sram to OnChipFlash. This will use the same characteristics as"
+ " Sram."
+ )
+ if self.const_mem_area == MemPort.Axi0:
+ self.const_mem_area = MemPort.Axi1
+ self.axi1_port = MemArea.OnChipFlash
+ else:
+ self.const_mem_area = MemPort.Axi0
+ self.axi0_port = MemArea.OnChipFlash
+ self.memory_clock_scales[MemArea.OnChipFlash] = self.memory_clock_scales[MemArea.Sram]
+
+ # check configuration
+ if self._mem_port_mapping(self.cache_mem_area) != MemArea.Sram:
+ raise ConfigOptionError("cache_mem_area", self._mem_port_mapping(self.cache_mem_area).name, "Sram")
+
+ if self.is_ethos_u65_system:
+ if self._mem_port_mapping(self.const_mem_area) not in (
+ MemArea.Dram,
+ MemArea.OnChipFlash,
+ MemArea.OffChipFlash,
+ ):
+ raise ConfigOptionError(
+ "const_mem_area",
+ self._mem_port_mapping(self.const_mem_area).name,
+ "Dram or OnChipFlash or OffChipFlash",
+ )
- except Exception:
- print("Error: Reading System Configuration in vela configuration file, section {}".format(section_key))
- raise
+ if self._mem_port_mapping(self.arena_mem_area) not in (MemArea.Sram, MemArea.Dram):
+ raise ConfigOptionError(
+ "arena_mem_area", self._mem_port_mapping(self.arena_mem_area).name, "Sram or Dram"
+ )
+ else:
+ if self._mem_port_mapping(self.const_mem_area) not in (MemArea.OnChipFlash, MemArea.OffChipFlash):
+ raise ConfigOptionError(
+ "const_mem_area", self._mem_port_mapping(self.const_mem_area).name, "OnChipFlash or OffChipFlash"
+ )
- def __sys_config(self, key, default_value):
+ if self._mem_port_mapping(self.arena_mem_area) != MemArea.Sram:
+ raise ConfigOptionError("arena_mem_area", self._mem_port_mapping(self.arena_mem_area).name, "Sram")
+
+ # assign existing memory areas
+ self.permanent_storage_mem_area = self._mem_port_mapping(self.const_mem_area)
+ self.feature_map_storage_mem_area = self._mem_port_mapping(self.arena_mem_area)
+ self.fast_storage_mem_area = self._mem_port_mapping(self.cache_mem_area)
+
+ self.sram_size = self.cache_sram_size if self.is_spilling_enabled() else 9999 * 1024 * 1024
+
+ # display the system configuration and memory mode
+ if verbose_config:
+ print(f"System Configuration ({self.system_config}):")
+ print(f" core_clock = {self.core_clock}")
+ print(f" axi0_port = {self.axi0_port.name}")
+ print(f" axi1_port = {self.axi1_port.name}")
+ for mem in (MemArea.Sram, MemArea.Dram, MemArea.OnChipFlash, MemArea.OffChipFlash):
+ print(f" {mem.name}_clock_scales = {self.memory_clock_scales[mem]}")
+
+ print(f"Memory Mode ({self.memory_mode}):")
+ print(f" const_mem_area = {self.const_mem_area.name}")
+ print(f" arena_mem_area = {self.arena_mem_area.name}")
+ print(f" cache_mem_area = {self.cache_mem_area.name}")
+ print(f" cache_sram_size = {self.cache_sram_size}")
+
+ print("Architecture Settings:")
+ print(f" permanent_storage_mem_area = {self.permanent_storage_mem_area.name}")
+ print(f" feature_map_storage_mem_area = {self.feature_map_storage_mem_area.name}")
+ print(f" fast_storage_mem_area = {self.fast_storage_mem_area.name}")
+ print(f" sram_size = {self.sram_size}")
+
+ def _read_config(self, section, key, current_value):
"""
- Gets the system configuration value with the given key from the vela config file.
+ Reads a given key from a particular section in the Vela config file. If the section contains the 'inherit'
+ option then we recurse into the section specified. If inherited sections result in multiple keys for a
+ particular option then the key from the parent section is used, regardless of the parsing order
"""
- if self.vela_config is None:
- return default_value
- section = "SysConfig." + self.system_config
- result = self.vela_config[section].get(key, None)
- if result is None:
- raise Exception("Error: System Configuration Missing key {} in section [{}] ".format(key, section))
+ if not self.vela_config.has_section(section):
+ raise ConfigOptionError(
+ "section", "{}. The section was not found in the Vela config file(s)".format(section)
+ )
+
+ result = str(current_value)
+ if self.vela_config.has_option(section, "inherit"):
+ inheritance_section = self.vela_config.get(section, "inherit")
+ # check for recursion loop
+ if inheritance_section == section:
+ raise ConfigOptionError(
+ "inherit",
+ "{}. This references its own section and recursion is not allowed".format(inheritance_section),
+ )
+ result = self._read_config(inheritance_section, key, result)
+
+ if self.vela_config.has_option(section, key):
+ result = self.vela_config.get(section, key)
+
return result
diff --git a/ethosu/vela/compiler_driver.py b/ethosu/vela/compiler_driver.py
index 9e1cb3a..0739133 100644
--- a/ethosu/vela/compiler_driver.py
+++ b/ethosu/vela/compiler_driver.py
@@ -225,31 +225,18 @@ def compiler_driver(nng, arch, options, scheduler_options):
root_sg = nng.get_root_subgraph()
alloc_list = []
- feature_maps_in_fast_storage = arch.feature_map_storage_mem_area == arch.fast_storage_mem_area
- if feature_maps_in_fast_storage:
- mem_alloc_scratch = (arch.feature_map_storage_mem_area, set((MemType.Scratch, MemType.Scratch_fast)))
- alloc_list.append(mem_alloc_scratch)
- else:
+ if arch.is_spilling_enabled():
mem_alloc_scratch_fast = (arch.fast_storage_mem_area, set((MemType.Scratch_fast,)))
mem_alloc_scratch = (arch.feature_map_storage_mem_area, set((MemType.Scratch,)))
# Order is important
alloc_list.append(mem_alloc_scratch_fast)
alloc_list.append(mem_alloc_scratch)
+ else:
+ mem_alloc_scratch = (arch.feature_map_storage_mem_area, set((MemType.Scratch, MemType.Scratch_fast)))
+ alloc_list.append(mem_alloc_scratch)
for mem_area, mem_type_set in alloc_list:
- if feature_maps_in_fast_storage or mem_area != arch.fast_storage_mem_area:
- tensor_allocation.allocate_tensors(
- nng,
- root_sg,
- arch,
- mem_area,
- mem_type_set,
- tensor_allocator=options.tensor_allocator,
- verbose_allocation=options.verbose_allocation,
- show_minimum_possible_allocation=options.show_minimum_possible_allocation,
- allocation_alignment=options.allocation_alignment,
- )
- else:
+ if arch.is_spilling_enabled() and mem_area == arch.fast_storage_mem_area:
# For the case where scratch_fast != scratch: attempt to place feature maps used between
# cascaded passes in fast storage. Bisection is used to find the max possible usage of SRAM.
alloc_results = []
@@ -285,6 +272,18 @@ def compiler_driver(nng, arch, options, scheduler_options):
"Increasing the value of --weight-estimation-scaling may help to resolve the issue. "
"See OPTIONS.md for more information.".format(arch.sram_size)
)
+ else:
+ tensor_allocation.allocate_tensors(
+ nng,
+ root_sg,
+ arch,
+ mem_area,
+ mem_type_set,
+ tensor_allocator=options.tensor_allocator,
+ verbose_allocation=options.verbose_allocation,
+ show_minimum_possible_allocation=options.show_minimum_possible_allocation,
+ allocation_alignment=options.allocation_alignment,
+ )
# Generate command streams and serialise Npu-ops into tensors
for sg in nng.subgraphs:
diff --git a/ethosu/vela/errors.py b/ethosu/vela/errors.py
index 1a30d54..2a635d0 100644
--- a/ethosu/vela/errors.py
+++ b/ethosu/vela/errors.py
@@ -15,8 +15,6 @@
# limitations under the License.
# Description:
# Defines custom exceptions.
-import sys
-
from .operation import Operation
from .tensor import Tensor
@@ -25,31 +23,52 @@ class VelaError(Exception):
"""Base class for vela exceptions"""
def __init__(self, data):
- self.data = data
+ self.data = "Error: " + data
def __str__(self):
return repr(self.data)
class InputFileError(VelaError):
- """Raised when reading the input file results in errors"""
+ """Raised when reading an input file results in errors"""
def __init__(self, file_name, msg):
- self.data = "Error reading input file {}: {}".format(file_name, msg)
+ self.data = "Reading input file {}: {}".format(file_name, msg)
class UnsupportedFeatureError(VelaError):
- """Raised when the input file uses non-supported features that cannot be handled"""
+ """Raised when the input network uses non-supported features that cannot be handled"""
def __init__(self, data):
- self.data = "The input file uses a feature that is currently not supported: {}".format(data)
+ self.data = "Input network uses a feature that is currently not supported: {}".format(data)
+
+class CliOptionError(VelaError):
+ """Raised for errors encountered with a command line option
-class OptionError(VelaError):
- """Raised when an incorrect command line option is used"""
+ :param option: str object that contains the name of the command line option
+ :param option_value: the command line option that resulted in the error
+ :param msg: str object that contains a description of the specific error encountered
+ """
def __init__(self, option, option_value, msg):
- self.data = "Incorrect argument to CLI option: {} {}: {}".format(option, option_value, msg)
+ self.data = "Incorrect argument to CLI option: {} = {}: {}".format(option, option_value, msg)
+
+
+class ConfigOptionError(VelaError):
+ """Raised for errors encountered with a configuration option
+
+ :param option: str object that contains the name of the configuration option
+ :param option_value: the configuration option that resulted in the error
+ :param option_valid_values (optional): str object that contains the valid configuration option values
+ """
+
+ def __init__(self, option, option_value, option_valid_values=None):
+ self.data = "Invalid configuration of {} = {}".format(option, option_value)
+ if option_valid_values is not None:
+ self.data += " (must be {}).".format(option_valid_values)
+ else:
+ self.data += "."
class AllocationError(VelaError):
@@ -60,7 +79,12 @@ class AllocationError(VelaError):
def OperatorError(op, msg):
- """Called when parsing an operator results in errors"""
+ """
+ Raises a VelaError exception for errors encountered when parsing an Operation
+
+ :param op: Operation object that resulted in the error
+ :param msg: str object that contains a description of the specific error encountered
+ """
assert isinstance(op, Operation)
@@ -91,12 +115,16 @@ def OperatorError(op, msg):
data = data[:-1] # remove last newline
- print("Error: {}".format(data))
- sys.exit(1)
+ raise VelaError(data)
def TensorError(tens, msg):
- """Called when parsing a tensor results in errors"""
+ """
+ Raises a VelaError exception for errors encountered when parsing a Tensor
+
+ :param tens: Tensor object that resulted in the error
+ :param msg: str object that contains a description of the specific error encountered
+ """
assert isinstance(tens, Tensor)
@@ -126,5 +154,4 @@ def TensorError(tens, msg):
data = data[:-1] # remove last newline
- print("Error: {}".format(data))
- sys.exit(1)
+ raise VelaError(data)
diff --git a/ethosu/vela/high_level_command_to_npu_op.py b/ethosu/vela/high_level_command_to_npu_op.py
index f786444..efd8a03 100644
--- a/ethosu/vela/high_level_command_to_npu_op.py
+++ b/ethosu/vela/high_level_command_to_npu_op.py
@@ -171,20 +171,17 @@ def create_padding(cmd: NpuStripe, primary_op: Operation) -> NpuPadding:
def get_region(tens: Tensor, arch: ArchitectureFeatures) -> int:
- if arch.feature_map_storage_mem_area == arch.fast_storage_mem_area:
- base_ptr_idx_map = {
- MemType.Permanent_NPU: BasePointerIndex.WeightTensor,
- MemType.Permanent_CPU: BasePointerIndex.WeightTensor,
- MemType.Scratch: BasePointerIndex.ScratchTensor,
- MemType.Scratch_fast: BasePointerIndex.ScratchTensor,
- }
+ base_ptr_idx_map = {
+ MemType.Permanent_NPU: BasePointerIndex.WeightTensor,
+ MemType.Permanent_CPU: BasePointerIndex.WeightTensor,
+ MemType.Scratch: BasePointerIndex.ScratchTensor,
+ }
+
+ if arch.is_spilling_enabled():
+ base_ptr_idx_map[MemType.Scratch_fast] = BasePointerIndex.ScratchFastTensor
else:
- base_ptr_idx_map = {
- MemType.Permanent_NPU: BasePointerIndex.WeightTensor,
- MemType.Permanent_CPU: BasePointerIndex.WeightTensor,
- MemType.Scratch: BasePointerIndex.ScratchTensor,
- MemType.Scratch_fast: BasePointerIndex.ScratchFastTensor,
- }
+ base_ptr_idx_map[MemType.Scratch_fast] = BasePointerIndex.ScratchTensor
+
return int(base_ptr_idx_map[tens.mem_type])
diff --git a/ethosu/vela/npu_performance.py b/ethosu/vela/npu_performance.py
index 29e0df9..d1be5a5 100644
--- a/ethosu/vela/npu_performance.py
+++ b/ethosu/vela/npu_performance.py
@@ -60,7 +60,6 @@ def rolling_buffer_dims_from_passes(arch, ps1, block_config_ps1, ps2, block_conf
class PassCycles(IntEnum):
Npu = 0
- Cpu = auto()
SramAccess = auto()
DramAccess = auto()
OnChipFlashAccess = auto()
@@ -69,34 +68,19 @@ class PassCycles(IntEnum):
Size = auto()
def display_name(self):
- return (
- "NPU",
- "CPU",
- "SRAM Access",
- "DRAM Access",
- "On-chip Flash Access",
- "Off-chip Flash Access",
- "Total",
- "Size",
- )[self.value]
+ return ("NPU", "SRAM Access", "DRAM Access", "On-chip Flash Access", "Off-chip Flash Access", "Total", "Size",)[
+ self.value
+ ]
def identifier_name(self):
- return (
- "npu",
- "cpu",
- "sram_access",
- "dram_access",
- "on_chip_flash_access",
- "off_chip_flash_access",
- "total",
- "size",
- )[self.value]
+ return ("npu", "sram_access", "dram_access", "on_chip_flash_access", "off_chip_flash_access", "total", "size",)[
+ self.value
+ ]
@staticmethod
def all():
return (
PassCycles.Npu,
- PassCycles.Cpu,
PassCycles.SramAccess,
PassCycles.DramAccess,
PassCycles.OnChipFlashAccess,
@@ -460,9 +444,7 @@ def performance_metrics_for_pass(arch, ps, block_config=None, rewrite_list=[], f
ofm_block = Block(block_config[1], block_config[0], block_config[3])
ifm_block = Block(block_config[1], block_config[0], block_config[3])
- if ps.placement == PassPlacement.Cpu:
- cycles[PassCycles.Cpu] = arch.cpu_cycle_estimate(ps.ops[0])
- elif primary_op:
+ if ps.placement == PassPlacement.Npu and primary_op:
skirt = primary_op.attrs.get("skirt", skirt)
explicit_padding = primary_op.attrs.get("explicit_padding", explicit_padding)
assert primary_op.type.npu_block_type == ps.npu_block_type
diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py
index dd63d2e..e612c30 100644
--- a/ethosu/vela/register_command_stream_generator.py
+++ b/ethosu/vela/register_command_stream_generator.py
@@ -1281,14 +1281,15 @@ def generate_register_command_stream(npu_op_list: List[NpuOperation], accelerato
"""
emit = CommandStreamEmitter()
arch = ArchitectureFeatures(
- vela_config=None,
- system_config=None,
+ vela_config_files=None,
accelerator_config=accelerator.value,
+ system_config=ArchitectureFeatures.DEFAULT_CONFIG,
+ memory_mode=ArchitectureFeatures.DEFAULT_CONFIG,
override_block_config=None,
block_config_limit=None,
- global_memory_clock_scale=1.0,
max_blockdep=ArchitectureFeatures.MAX_BLOCKDEP,
weight_estimation_scaling=1.0,
+ verbose_config=False,
)
generate_command_stream(emit, npu_op_list, arch)
return emit.to_list()
diff --git a/ethosu/vela/scheduler.py b/ethosu/vela/scheduler.py
index 4af83a1..889bd06 100644
--- a/ethosu/vela/scheduler.py
+++ b/ethosu/vela/scheduler.py
@@ -249,10 +249,6 @@ class DynamicProgrammingScheduler:
self.n_combinations_searched = 0
- self.feature_maps_not_in_fast_storage = (
- arch.tensor_storage_mem_area[TensorPurpose.FeatureMap] != arch.fast_storage_mem_area
- )
-
self.pareto_max_candidates = 16
self.ifm_stream_npu_blocks = set(
@@ -694,7 +690,7 @@ class DynamicProgrammingScheduler:
all_candidates = []
for pred_pass in pred_pass_list:
# recurse into the next pass
- ifm_strat_data = self.search_ifm_streaming_body(pred_pass, self.feature_maps_not_in_fast_storage)
+ ifm_strat_data = self.search_ifm_streaming_body(pred_pass, self.arch.is_spilling_enabled())
strat_data = self.search_all_but_one_predecessor(ps, pred_pass, ifm_strat_data)
for strat_opt in strat_data:
@@ -1020,7 +1016,7 @@ class DynamicProgrammingScheduler:
output.set_format(TensorFormat.NHCWB16, arch)
for rewrite_op in rewrites:
rewrite_op.outputs[0].set_format(TensorFormat.NHCWB16, arch)
- if self.feature_maps_not_in_fast_storage:
+ if arch.is_spilling_enabled():
# Remember feature maps that can be moved to fast storage for later use
# in use_fast_storage_for_feature_maps
self.sg.scheduling_info["feature_map_rewrites"] = fast_storage_tensor_rewrites
diff --git a/ethosu/vela/stats_writer.py b/ethosu/vela/stats_writer.py
index 3cd769f..e4b8156 100644
--- a/ethosu/vela/stats_writer.py
+++ b/ethosu/vela/stats_writer.py
@@ -46,7 +46,7 @@ def write_summary_metrics_csv(nng, summary_filename, arch):
]
labels += (
- ["accelerator_configuration", "system_config", "npu_clock", "sram_size"]
+ ["accelerator_configuration", "system_config", "memory_mode", "core_clock", "sram_size"]
+ [area.identifier_name() + "_bandwidth" for area in mem_areas]
+ ["weights_storage_area", "feature_map_storage_area"]
)
@@ -83,7 +83,13 @@ def write_summary_metrics_csv(nng, summary_filename, arch):
if arch:
data_items += (
- [arch.accelerator_config, arch.system_config, arch.npu_clock, arch.sram_size / 1024]
+ [
+ arch.accelerator_config.name,
+ arch.system_config,
+ arch.memory_mode,
+ arch.core_clock,
+ arch.sram_size / 1024,
+ ]
+ [arch.memory_bandwidths_per_second[mem_area] / 1000.0 / 1000 / 1000 for mem_area in mem_areas]
+ [
arch.tensor_storage_mem_area[TensorPurpose.Weights].display_name(),
@@ -91,7 +97,7 @@ def write_summary_metrics_csv(nng, summary_filename, arch):
]
)
- midpoint_inference_time = nng.cycles[PassCycles.Total] / arch.npu_clock
+ midpoint_inference_time = nng.cycles[PassCycles.Total] / arch.core_clock
if midpoint_inference_time > 0:
midpoint_fps = 1 / midpoint_inference_time
else:
@@ -162,7 +168,6 @@ def write_pass_metrics_csv(nng, pass_filename):
all_cycles = (
PassCycles.Total,
PassCycles.Npu,
- PassCycles.Cpu,
PassCycles.SramAccess,
PassCycles.DramAccess,
PassCycles.OnChipFlashAccess,
@@ -239,7 +244,7 @@ def print_performance_metrics_for_strat(
orig_mem_areas_labels = [(v, v.display_name()) for v in mem_areas_to_report()]
- midpoint_inference_time = cycles[PassCycles.Total] / arch.npu_clock
+ midpoint_inference_time = cycles[PassCycles.Total] / arch.core_clock
if midpoint_inference_time > 0:
midpoint_fps = 1 / midpoint_inference_time
else:
@@ -252,9 +257,10 @@ def print_performance_metrics_for_strat(
if name:
print("", file=f)
print("Network summary for", name, file=f)
- print("Accelerator configuration {:20}".format(arch.accelerator_config), file=f)
- print("System configuration {:20}".format(arch.system_config), file=f)
- print("Accelerator clock {:12d} MHz".format(int(arch.npu_clock / 1e6)), file=f)
+ print("Accelerator configuration {:>20}".format(arch.accelerator_config.name), file=f)
+ print("System configuration {:>20}".format(arch.system_config), file=f)
+ print("Memory mode {:>20}".format(arch.memory_mode), file=f)
+ print("Accelerator clock {:12d} MHz".format(int(arch.core_clock / 1e6)), file=f)
for mem_area, label in mem_area_labels:
print(
"Design peak {:25} {:12.2f} GB/s".format(
diff --git a/ethosu/vela/test/testutil.py b/ethosu/vela/test/testutil.py
index 8258827..7cdd4f5 100644
--- a/ethosu/vela/test/testutil.py
+++ b/ethosu/vela/test/testutil.py
@@ -28,14 +28,15 @@ from ethosu.vela.tensor import Tensor
def create_arch():
return architecture_features.ArchitectureFeatures(
- vela_config=None,
- system_config=None,
+ vela_config_files=None,
accelerator_config=architecture_features.Accelerator.Ethos_U55_128.value,
+ system_config=architecture_features.ArchitectureFeatures.DEFAULT_CONFIG,
+ memory_mode=architecture_features.ArchitectureFeatures.DEFAULT_CONFIG,
override_block_config=None,
block_config_limit=None,
- global_memory_clock_scale=1.0,
max_blockdep=0,
weight_estimation_scaling=1.0,
+ verbose_config=False,
)
diff --git a/ethosu/vela/vela.py b/ethosu/vela/vela.py
index 6835607..4f632d5 100644
--- a/ethosu/vela/vela.py
+++ b/ethosu/vela/vela.py
@@ -19,8 +19,7 @@
# Provides command line interface, options parsing, and network loading. Before calling the compiler driver.
import argparse
import ast
-import configparser
-import os.path
+import os
import sys
import time
@@ -196,13 +195,13 @@ def main(args=None):
parser.add_argument(
"--supported-ops-report",
action="store_true",
- help="Generate the SUPPORTED_OPS.md file in the current working directory and exits.",
+ help="Generate the SUPPORTED_OPS.md file in the current working directory and exit",
)
+ # set network nargs to be optional to allow the support-ops-report CLI option to be used standalone
parser.add_argument(
"network", metavar="NETWORK", type=str, default=None, nargs="?", help="Filename of network to process"
)
-
parser.add_argument(
"--output-dir", type=str, default="output", help="Output directory to write files to (default: %(default)s)"
)
@@ -212,9 +211,10 @@ def main(args=None):
default=None,
help="Enables the calculation and writing of a network debug database to output directory",
)
-
- parser.add_argument("--config", type=str, help="Location of vela configuration file")
-
+ parser.add_argument(
+ "--config", type=str, action="append", help="Vela configuration file(s) in Python ConfigParser .ini file format"
+ )
+ parser.add_argument("--verbose-config", action="store_true", help="Verbose system configuration and memory mode")
parser.add_argument("--verbose-graph", action="store_true", help="Verbose graph rewriter")
parser.add_argument("--verbose-quantization", action="store_true", help="Verbose quantization")
parser.add_argument("--verbose-packing", action="store_true", help="Verbose pass packing")
@@ -263,8 +263,14 @@ def main(args=None):
parser.add_argument(
"--system-config",
type=str,
- default="internal-default",
- help="System configuration to use (default: %(default)s)",
+ default=architecture_features.ArchitectureFeatures.DEFAULT_CONFIG,
+ help="System configuration to select from the Vela configuration file (default: %(default)s)",
+ )
+ parser.add_argument(
+ "--memory-mode",
+ type=str,
+ default=architecture_features.ArchitectureFeatures.DEFAULT_CONFIG,
+ help="Memory mode to select from the Vela configuration file (default: %(default)s)",
)
parser.add_argument(
"--tensor-allocator",
@@ -292,15 +298,6 @@ def main(args=None):
help="Limit block config search space, use zero for unlimited (default: %(default)s)",
)
parser.add_argument(
- "--global-memory-clock-scale",
- type=float,
- default=1.0,
- help=(
- "Performs an additional scaling of the individual memory clock scales specified by the system config "
- "(default: %(default)s)"
- ),
- )
- parser.add_argument(
"--pareto-metric",
default=ParetoMetric.BwCycMem,
type=lambda s: ParetoMetric[s],
@@ -344,14 +341,6 @@ def main(args=None):
)
args = parser.parse_args(args=args)
- # Read configuration file
- config_file = args.config
- config = None
- if config_file is not None:
- with open(config_file) as f:
- config = configparser.ConfigParser()
- config.read_file(f)
-
# Generate the supported ops report and exit
if args.supported_ops_report:
generate_supported_ops()
@@ -360,6 +349,12 @@ def main(args=None):
if args.network is None:
parser.error("the following argument is required: NETWORK")
+ # check all config files exist because they will be read as a group
+ if args.config is not None:
+ for filename in args.config:
+ if not os.access(filename, os.R_OK):
+ raise InputFileError(filename, "File not found or is not readable.")
+
sys.setrecursionlimit(args.recursion_limit)
if args.force_block_config:
@@ -374,14 +369,15 @@ def main(args=None):
parser.error("the following argument needs to be a power of 2: ALLOCATION_ALIGNMENT")
arch = architecture_features.ArchitectureFeatures(
- vela_config=config,
+ vela_config_files=args.config,
system_config=args.system_config,
+ memory_mode=args.memory_mode,
accelerator_config=args.accelerator_config,
override_block_config=force_block_config,
block_config_limit=args.block_config_limit,
- global_memory_clock_scale=args.global_memory_clock_scale,
max_blockdep=args.max_block_dependency,
weight_estimation_scaling=args.weight_estimation_scaling,
+ verbose_config=args.verbose_config,
)
compiler_options = compiler_driver.CompilerOptions(