aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNathan Bailey <nathan.bailey@arm.com>2024-01-26 14:19:52 +0000
committerNathan Bailey <nathan.bailey@arm.com>2024-02-23 15:23:06 +0000
commite506c7bd0453cb204ec7a59267fe3982492aaed6 (patch)
tree225b70ede952e6dfe0b33fcc3813106bb8828e33
parentd10b53a358d7fddc2e5a818d146b71bc5bb5e0ed (diff)
downloadmlia-e506c7bd0453cb204ec7a59267fe3982492aaed6.tar.gz
refactor: Migrate from Vela's internal code to CSV summary
Removes vela defines from vela compiler.py and performance.py Replaces calls to vela code with data from vela summary csv Resolves: MLIA-1024 Signed-off-by: Nathan Bailey <nathan.bailey@arm.com> Change-Id: I569878f2936767f70c0255919ca40d1969275529
-rw-r--r--src/mlia/backend/vela/compiler.py552
-rw-r--r--src/mlia/backend/vela/performance.py85
-rw-r--r--src/mlia/core/context.py11
-rw-r--r--src/mlia/core/reporting.py4
-rw-r--r--src/mlia/resources/vela/vela.ini51
-rw-r--r--src/mlia/target/ethos_u/advisor.py4
-rw-r--r--src/mlia/target/ethos_u/config.py5
-rw-r--r--src/mlia/target/ethos_u/data_analysis.py6
-rw-r--r--src/mlia/target/ethos_u/performance.py48
-rw-r--r--src/mlia/target/ethos_u/reporters.py87
-rw-r--r--tests/conftest.py41
-rw-r--r--tests/test_backend_vela_compiler.py656
-rw-r--r--tests/test_backend_vela_performance.py27
-rw-r--r--tests/test_cli_commands.py2
-rw-r--r--tests/test_core_context.py7
-rw-r--r--tests/test_target_ethos_u_data_analysis.py12
-rw-r--r--tests/test_target_ethos_u_data_collection.py2
-rw-r--r--tests/test_target_ethos_u_performance.py16
-rw-r--r--tests/test_target_ethos_u_reporters.py40
19 files changed, 1140 insertions, 516 deletions
diff --git a/src/mlia/backend/vela/compiler.py b/src/mlia/backend/vela/compiler.py
index fe9e365..211721a 100644
--- a/src/mlia/backend/vela/compiler.py
+++ b/src/mlia/backend/vela/compiler.py
@@ -3,36 +3,156 @@
"""Vela compiler wrapper module."""
from __future__ import annotations
+import csv
import logging
+import re
import sys
from dataclasses import dataclass
+from dataclasses import fields
from io import StringIO
from pathlib import Path
-from typing import Any
from typing import Literal
-from ethosu.vela.architecture_features import ArchitectureFeatures
-from ethosu.vela.compiler_driver import compiler_driver
-from ethosu.vela.compiler_driver import CompilerOptions
-from ethosu.vela.compiler_driver import TensorAllocator
from ethosu.vela.model_reader import ModelReaderOptions
from ethosu.vela.model_reader import read_model
from ethosu.vela.nn_graph import Graph
from ethosu.vela.nn_graph import NetworkType
from ethosu.vela.operation import CustomType
-from ethosu.vela.scheduler import OptimizationStrategy
-from ethosu.vela.scheduler import SchedulerOptions
-from ethosu.vela.tensor import BandwidthDirection
-from ethosu.vela.tensor import MemArea
-from ethosu.vela.tensor import Tensor
-from ethosu.vela.tflite_writer import write_tflite
+from ethosu.vela.vela import main
+from mlia.utils.filesystem import get_vela_config
from mlia.utils.logging import redirect_output
+from mlia.utils.logging import redirect_raw_output
logger = logging.getLogger(__name__)
@dataclass
+class VelaInitMemoryData:
+ """Memory Data from vela.ini."""
+
+ clock_scale: float | None
+ burst_length: int | None
+ read_latency: int | None
+ write_latency: int | None
+
+
+@dataclass
+class VelaInitData: # pylint: disable=too-many-instance-attributes
+ """Data gathered from the vela.ini file we provide to vela."""
+
+ system_config: str
+ core_clock: float
+ axi0_port: str
+ axi1_port: str
+ sram_memory_data: VelaInitMemoryData
+ dram_memory_data: VelaInitMemoryData
+ off_chip_flash_memory_data: VelaInitMemoryData
+ on_chip_flash_memory_data: VelaInitMemoryData
+ memory_mode: str
+ const_mem_area: str
+ arena_mem_area: str
+ cache_mem_area: str
+ arena_cache_size: int | None
+
+
+@dataclass
+class VelaSummary: # pylint: disable=too-many-instance-attributes
+ """Data gathered from the summary CSV file that Vela produces."""
+
+ cycles_total: float
+ cycles_npu: float
+ cycles_sram_access: float
+ cycles_dram_access: float
+ cycles_on_chip_flash_access: float
+ cycles_off_chip_flash_access: float
+ core_clock: float
+ dram_memory_used: float
+ sram_memory_used: float
+ on_chip_flash_memory_used: float
+ off_chip_flash_memory_used: float
+ batch_size: int
+ memory_mode: str
+ system_config: str
+ accelerator_configuration: str
+ arena_cache_size: float
+
+ def __repr__(self) -> str:
+ """Return String Representation of VelaSummary object."""
+ header_values = dict(summary_metrics)
+ string_to_check = ""
+ for field in fields(self):
+ string_to_check += (
+ f"{header_values[field.name]}: {getattr(self, field.name)}, "
+ )
+ return string_to_check
+
+
+complete_summary_metrics = [
+ ("experiment", "experiment"),
+ ("network", "network"),
+ ("accelerator_configuration", "accelerator_configuration"),
+ ("system_config", "system_config"),
+ ("memory_mode", "memory_mode"),
+ ("core_clock", "core_clock"),
+ ("arena_cache_size", "arena_cache_size"),
+ ("sram_bandwidth", "sram_bandwidth"),
+ ("dram_bandwidth", "dram_bandwidth"),
+ ("on_chip_flash_bandwidth", "on_chip_flash_bandwidth"),
+ ("off_chip_flash_bandwidth", "off_chip_flash_bandwidth"),
+ ("weights_storage_area", "weights_storage_area"),
+ ("feature_map_storage_area", "feature_map_storage_area"),
+ ("inferences_per_second", "inferences_per_second"),
+ ("batch_size", "batch_size"),
+ ("inference_time", "inference_time"),
+ ("passes_before_fusing", "passes_before_fusing"),
+ ("sram_memory_used", "sram_memory_used"),
+ ("dram_memory_used", "dram_memory_used"),
+ (
+ "on_chip_flash_memory_used",
+ "on_chip_flash_memory_used",
+ ),
+ ("off_chip_flash_memory_used", "off_chip_flash_memory_used"),
+ ("total_original_weights", "total_original_weights"),
+ ("total_npu_encoded_weights", "total_npu_encoded_weights"),
+ ("dram_total_bytes", "dram_total_bytes"),
+ (
+ "on_chip_flash_feature_map_read_bytes",
+ "on_chip_flash_feature_map_read_bytes",
+ ),
+ ("on_chip_flash_feature_map_write_bytes", "on_chip_flash_feature_map_write_bytes"),
+ ("on_chip_flash_weight_read_bytes", "on_chip_flash_weight_read_bytes"),
+ ("on_chip_flash_weight_write_bytes", "on_chip_flash_weight_write_bytes"),
+ ("on_chip_flash_total_bytes", "on_chip_flash_total_bytes"),
+ ("off_chip_flash_feature_map_read_bytes", "off_chip_flash_feature_map_read_bytes"),
+ (
+ "off_chip_flash_feature_map_write_bytes",
+ "off_chip_flash_feature_map_write_bytes",
+ ),
+ ("off_chip_flash_weight_read_bytes", "off_chip_flash_weight_read_bytes"),
+ ("off_chip_flash_weight_write_bytes", "off_chip_flash_weight_write_bytes"),
+ ("off_chip_flash_total_bytes", "off_chip_flash_total_bytes"),
+ ("nn_macs", "nn_macs"),
+ ("nn_tops", "nn_tops"),
+ ("cycles_npu", "cycles_npu"),
+ ("cycles_sram_access", "cycles_sram_access"),
+ ("cycles_dram_access", "cycles_dram_access"),
+ ("cycles_on_chip_flash_access", "cycles_on_chip_flash_access"),
+ ("cycles_off_chip_flash_access", "cycles_off_chip_flash_access"),
+ ("cycles_total", "cycles_total"),
+]
+
+OUTPUT_METRICS = [field.name for field in fields(VelaSummary)]
+
+summary_metrics = [
+ summary_metric
+ for summary_metric in complete_summary_metrics
+ if summary_metric[0] in OUTPUT_METRICS
+]
+summary_metrics.sort(key=lambda e: OUTPUT_METRICS.index(e[0]))
+
+
+@dataclass
class Model:
"""Model metadata."""
@@ -49,20 +169,6 @@ class Model:
)
-@dataclass
-class OptimizedModel:
- """Instance of the Vela optimized model."""
-
- nng: Graph
- arch: ArchitectureFeatures
- compiler_options: CompilerOptions
- scheduler_options: SchedulerOptions
-
- def save(self, output_filename: str | Path) -> None:
- """Save instance of the optimized model to the file."""
- write_tflite(self.nng, output_filename)
-
-
AcceleratorConfigType = Literal[
"ethos-u55-32",
"ethos-u55-64",
@@ -82,16 +188,17 @@ class VelaCompilerOptions: # pylint: disable=too-many-instance-attributes
"""Vela compiler options."""
config_files: str | list[str] | None = None
- system_config: str = ArchitectureFeatures.DEFAULT_CONFIG
- memory_mode: str = ArchitectureFeatures.DEFAULT_CONFIG
+ system_config: str = "internal-default"
+ memory_mode: str = "internal-default"
accelerator_config: AcceleratorConfigType | None = None
- max_block_dependency: int = ArchitectureFeatures.MAX_BLOCKDEP
+ max_block_dependency: int = 3
arena_cache_size: int | None = None
tensor_allocator: TensorAllocatorType = "HillClimb"
- cpu_tensor_alignment: int = Tensor.AllocationQuantum
+ cpu_tensor_alignment: int = 16
optimization_strategy: OptimizationStrategyType = "Performance"
output_dir: Path = Path("output")
recursion_limit: int = 1000
+ verbose_performance: bool = True
class VelaCompiler: # pylint: disable=too-many-instance-attributes
@@ -105,13 +212,12 @@ class VelaCompiler: # pylint: disable=too-many-instance-attributes
self.accelerator_config = compiler_options.accelerator_config
self.max_block_dependency = compiler_options.max_block_dependency
self.arena_cache_size = compiler_options.arena_cache_size
- self.tensor_allocator = TensorAllocator[compiler_options.tensor_allocator]
+ self.tensor_allocator = compiler_options.tensor_allocator
self.cpu_tensor_alignment = compiler_options.cpu_tensor_alignment
- self.optimization_strategy = OptimizationStrategy[
- compiler_options.optimization_strategy
- ]
- self.output_dir = compiler_options.output_dir
+ self.optimization_strategy = compiler_options.optimization_strategy
+ self.output_dir = Path(compiler_options.output_dir)
self.recursion_limit = compiler_options.recursion_limit
+ self.verbose_performance = compiler_options.verbose_performance
sys.setrecursionlimit(self.recursion_limit)
@@ -122,36 +228,48 @@ class VelaCompiler: # pylint: disable=too-many-instance-attributes
nng, network_type = self._read_model(model)
return Model(nng, network_type)
- def compile_model(self, model: str | Path | Model) -> OptimizedModel:
+ def compile_model(
+ self, model_path: Path, already_compiled: bool = False
+ ) -> tuple[VelaSummary, Path]:
"""Compile the model."""
- if isinstance(model, (str, Path)):
- nng, network_type = self._read_model(model)
- else:
- nng, network_type = model.nng, NetworkType.TFLite
-
- if not nng:
- raise ValueError("Unable to read model: model.nng is not available")
-
- output_basename = f"{self.output_dir}/{nng.name}"
-
try:
- arch = self._architecture_features()
- compiler_options = self._compiler_options()
- scheduler_options = self._scheduler_options()
-
- with redirect_output(
+ with redirect_raw_output(
logger, stdout_level=logging.DEBUG, stderr_level=logging.DEBUG
):
tmp = sys.stdout
output_message = StringIO()
sys.stdout = output_message
- compiler_driver(
- nng,
- arch,
- compiler_options,
- scheduler_options,
- network_type,
- output_basename,
+ main_args = [
+ "--output-dir",
+ str(self.output_dir.as_posix()),
+ "--tensor-allocator",
+ str(self.tensor_allocator),
+ "--cpu-tensor-alignment",
+ str(self.cpu_tensor_alignment),
+ "--accelerator-config",
+ str(self.accelerator_config),
+ "--system-config",
+ str(self.system_config),
+ "--memory-mode",
+ str(self.memory_mode),
+ "--max-block-dependency",
+ str(self.max_block_dependency),
+ "--optimise",
+ str(self.optimization_strategy),
+ model_path.as_posix(),
+ "--config",
+ str(self.config_files),
+ ]
+ if self.verbose_performance:
+ main_args.append("--verbose-performance")
+ if not already_compiled:
+ main(main_args)
+ optimized_model_path = Path(
+ self.output_dir.as_posix()
+ + "/"
+ + model_path.stem
+ + "_vela"
+ + model_path.suffix
)
sys.stdout = tmp
if (
@@ -159,51 +277,29 @@ class VelaCompiler: # pylint: disable=too-many-instance-attributes
in output_message.getvalue()
):
raise MemoryError("Model is too large and uses too much RAM")
-
- return OptimizedModel(nng, arch, compiler_options, scheduler_options)
+ summary_data = parse_summary_csv_file(
+ Path(
+ self.output_dir.as_posix()
+ + "/"
+ + model_path.stem
+ + "_summary_"
+ + self.system_config
+ + ".csv"
+ )
+ )
+ return summary_data, optimized_model_path
except MemoryError as err:
raise err
except (SystemExit, Exception) as err:
+ if (
+ "Error: Invalid tflite file." in output_message.getvalue()
+ and isinstance(err, SystemExit)
+ ):
+ raise RuntimeError(f"Unable to read model {model_path}") from err
raise RuntimeError(
"Model could not be optimized with Vela compiler."
) from err
- def get_config(self) -> dict[str, Any]:
- """Get compiler configuration."""
- arch = self._architecture_features()
-
- memory_area = {
- mem.name: {
- "clock_scales": arch.memory_clock_scales[mem],
- "burst_length": arch.memory_burst_length[mem],
- "read_latency": arch.memory_latency[mem][BandwidthDirection.Read],
- "write_latency": arch.memory_latency[mem][BandwidthDirection.Write],
- }
- for mem in (
- MemArea.Sram,
- MemArea.Dram,
- MemArea.OnChipFlash,
- MemArea.OffChipFlash,
- )
- }
-
- return {
- "accelerator_config": arch.accelerator_config.value,
- "system_config": arch.system_config,
- "core_clock": arch.core_clock,
- "axi0_port": arch.axi0_port.name,
- "axi1_port": arch.axi1_port.name,
- "memory_mode": arch.memory_mode,
- "const_mem_area": arch.const_mem_area.name,
- "arena_mem_area": arch.arena_mem_area.name,
- "cache_mem_area": arch.cache_mem_area.name,
- "arena_cache_size": arch.arena_cache_size,
- "permanent_storage_mem_area": arch.permanent_storage_mem_area.name,
- "feature_map_storage_mem_area": arch.feature_map_storage_mem_area.name,
- "fast_storage_mem_area": arch.fast_storage_mem_area.name,
- "memory_area": memory_area,
- }
-
@staticmethod
def _read_model(model: str | Path) -> tuple[Graph, NetworkType]:
"""Read TensorFlow Lite model."""
@@ -216,57 +312,10 @@ class VelaCompiler: # pylint: disable=too-many-instance-attributes
except (SystemExit, Exception) as err:
raise RuntimeError(f"Unable to read model {model_path}.") from err
- def _architecture_features(self) -> ArchitectureFeatures:
- """Return ArchitectureFeatures instance."""
- return ArchitectureFeatures(
- vela_config_files=self.config_files,
- accelerator_config=self.accelerator_config,
- system_config=self.system_config,
- memory_mode=self.memory_mode,
- max_blockdep=self.max_block_dependency,
- verbose_config=False,
- arena_cache_size=self.arena_cache_size,
- )
-
- def _scheduler_options(self) -> SchedulerOptions:
- """Return SchedulerOptions instance."""
- arch = self._architecture_features()
-
- return SchedulerOptions(
- optimization_strategy=self.optimization_strategy,
- sram_target=arch.arena_cache_size,
- verbose_schedule=False,
- )
-
- def _compiler_options(self) -> CompilerOptions:
- """Return CompilerOptions instance."""
- return CompilerOptions(
- verbose_graph=False,
- verbose_quantization=False,
- verbose_packing=False,
- verbose_tensor_purpose=False,
- verbose_tensor_format=False,
- verbose_allocation=False,
- verbose_high_level_command_stream=False,
- verbose_register_command_stream=False,
- verbose_operators=False,
- verbose_weights=False,
- verbose_performance=True,
- show_cpu_operations=False,
- tensor_allocator=self.tensor_allocator,
- timing=False,
- output_dir=self.output_dir,
- cpu_tensor_alignment=self.cpu_tensor_alignment,
- )
-
- def return_compiler_options(self) -> CompilerOptions:
- """Return CompilerOptions instance for test purposes."""
- return self._compiler_options()
-
def resolve_compiler_config(
vela_compiler_options: VelaCompilerOptions,
-) -> dict[str, Any]:
+) -> VelaInitData:
"""Resolve passed compiler options.
Vela has number of configuration parameters that being
@@ -278,22 +327,209 @@ def resolve_compiler_config(
In order to get this information we need to create
instance of the Vela compiler first.
"""
- vela_compiler = VelaCompiler(vela_compiler_options)
- return vela_compiler.get_config()
-
-
-def optimize_model(
- model_path: Path, compiler_options: VelaCompilerOptions, output_model_path: Path
-) -> None:
- """Optimize model and return it's path after optimization."""
- logger.debug(
- "Optimize model %s for target %s",
- model_path,
- compiler_options.accelerator_config,
+ return parse_vela_initialisation_file(
+ get_vela_config(),
+ vela_compiler_options.system_config,
+ vela_compiler_options.memory_mode,
)
+
+def compile_model(model_path: Path, compiler_options: VelaCompilerOptions) -> Path:
+ """Compile model."""
vela_compiler = VelaCompiler(compiler_options)
- optimized_model = vela_compiler.compile_model(model_path)
+ # output dir could be a path or str, cast to Path object
+ output_dir = Path(compiler_options.output_dir)
+ if Path(
+ output_dir.as_posix()
+ + "/"
+ + model_path.stem
+ + "_summary_"
+ + compiler_options.system_config
+ + ".csv"
+ ).is_file():
+ _, optimized_model_path = vela_compiler.compile_model(model_path, True)
+ else:
+ _, optimized_model_path = vela_compiler.compile_model(model_path)
+ return optimized_model_path
+
+
+def parse_summary_csv_file(vela_summary_csv_file: Path) -> VelaSummary:
+ """Parse the summary csv file from Vela."""
+ if not vela_summary_csv_file.is_file():
+ raise FileNotFoundError(f"CSV File not found at {vela_summary_csv_file}")
+
+ with open(vela_summary_csv_file, encoding="UTF-8") as csv_file:
+ summary_reader = csv.DictReader(csv_file, delimiter=",")
+ try:
+ row = next(summary_reader)
+ except StopIteration as err:
+ raise RuntimeError("Generated Vela Summary CSV is empty") from err
+ try:
+ # pylint: disable=eval-used
+ key_types = {
+ field.name: eval(field.type) # type: ignore # nosec
+ for field in fields(VelaSummary)
+ }
+ # pylint: enable=eval-used
+ summary_data = VelaSummary(
+ **{key: key_types[key](row[title]) for key, title in summary_metrics}
+ )
+ except KeyError as err:
+ raise KeyError(
+ f"Generated Vela Summary CSV missing expected header: {err.args[0]}."
+ ) from err
+ return summary_data
+
+
+def parse_vela_initialisation_file( # pylint: disable=too-many-locals
+ vela_init_file: Path, system_config: str, memory_mode: str
+) -> VelaInitData:
+ """Parse the vela.ini to retrieve data for the target information table."""
+ if not vela_init_file.is_file():
+ raise FileNotFoundError(
+ f"Vela Initialisation File not found at {vela_init_file}"
+ )
+
+ lines = []
+ with open(vela_init_file, encoding="UTF-8") as init_file:
+ lines = init_file.readlines()
+
+ if len(lines) == 0:
+ raise OSError("vela.ini File Is Empty")
+
+ lines = [line.strip("\n][ ") for line in lines]
+
+ idxs_memory_mode = [
+ idx for idx, item in enumerate(lines) if re.search("^Memory_Mode.*", item)
+ ]
+
+ if len(idxs_memory_mode) == 0:
+ raise IndexError("No memory modes are present in vela.ini file.")
+
+ idxs_system_config = [
+ idx for idx, item in enumerate(lines) if re.search("^System_Config.*", item)
+ ] + [idxs_memory_mode[0]]
+
+ if len(idxs_system_config) <= 1:
+ raise IndexError("No system configs are present in vela.ini file.")
+
+ try:
+ idx_config = lines.index("System_Config." + system_config)
+ except ValueError as err:
+ raise ValueError(
+ f"System Config: {system_config} not present in vela.ini file."
+ ) from err
+
+ lines_to_probe = lines[
+ idx_config : idxs_system_config[ # noqa: E203
+ idxs_system_config.index(idx_config) + 1
+ ]
+ ]
+
+ def collect_memory_mode_lines(memory_mode: str) -> list[str]:
+ try:
+ idx_memory_mode = lines.index("Memory_Mode." + memory_mode)
+ except ValueError as err:
+ raise ValueError(
+ f"Memory Mode: {memory_mode} not present in vela.ini file."
+ ) from err
+ if idxs_memory_mode.index(idx_memory_mode) == len(idxs_memory_mode) - 1:
+ lines_to_probe = lines[idx_memory_mode:]
+ else:
+ lines_to_probe = lines[
+ idx_memory_mode : idxs_memory_mode[ # noqa: E203
+ idxs_memory_mode.index(idx_memory_mode) + 1
+ ]
+ ]
+ return lines_to_probe
+
+ lines_to_probe_memory_mode = collect_memory_mode_lines(memory_mode)
+ extra_memory_mode_lines = []
+ for line in lines_to_probe_memory_mode:
+ if "inherit=Memory_Mode." in line:
+ extra_memory_mode = line[line.rindex(".") + 1 :] # noqa: E203
+ extra_memory_mode_lines = collect_memory_mode_lines(extra_memory_mode)
+
+ lines_to_probe += extra_memory_mode_lines + lines_to_probe_memory_mode
+
+ init_dict = {}
+ for line in lines_to_probe:
+ if "=" in line:
+ init_dict[line[: line.index("=")]] = line[
+ line.index("=") + 1 : # noqa: E203
+ ]
+ try:
+ init_data = VelaInitData(
+ system_config=system_config,
+ core_clock=float(init_dict["core_clock"]),
+ axi0_port=str(init_dict["axi0_port"]),
+ axi1_port=str(init_dict["axi1_port"]),
+ memory_mode=memory_mode,
+ sram_memory_data=VelaInitMemoryData(
+ clock_scale=float(init_dict["Sram_clock_scale"])
+ if "Sram_clock_scale" in init_dict
+ else None,
+ burst_length=int(init_dict["Sram_burst_length"])
+ if "Sram_burst_length" in init_dict
+ else None,
+ read_latency=int(init_dict["Sram_read_latency"])
+ if "Sram_read_latency" in init_dict
+ else None,
+ write_latency=int(init_dict["Sram_write_latency"])
+ if "Sram_write_latency" in init_dict
+ else None,
+ ),
+ dram_memory_data=VelaInitMemoryData(
+ clock_scale=float(init_dict["Dram_clock_scale"])
+ if "Dram_clock_scale" in init_dict
+ else None,
+ burst_length=int(init_dict["Dram_burst_length"])
+ if "Dram_burst_length" in init_dict
+ else None,
+ read_latency=int(init_dict["Dram_read_latency"])
+ if "Dram_read_latency" in init_dict
+ else None,
+ write_latency=int(init_dict["Dram_write_latency"])
+ if "Dram_write_latency" in init_dict
+ else None,
+ ),
+ off_chip_flash_memory_data=VelaInitMemoryData(
+ clock_scale=float(init_dict["OffChipFlash_clock_scale"])
+ if "OffChipFlash_clock_scale" in init_dict
+ else None,
+ burst_length=int(init_dict["OffChipFlash_burst_length"])
+ if "OffChipFlash_burst_length" in init_dict
+ else None,
+ read_latency=int(init_dict["OffChipFlash_read_latency"])
+ if "OffChipFlash_read_latency" in init_dict
+ else None,
+ write_latency=int(init_dict["OffChipFlash_write_latency"])
+ if "OffChipFlash_write_latency" in init_dict
+ else None,
+ ),
+ on_chip_flash_memory_data=VelaInitMemoryData(
+ clock_scale=float(init_dict["OnChipFlash_clock_scale"])
+ if "OnChipFlash_clock_scale" in init_dict
+ else None,
+ burst_length=int(init_dict["OnChipFlash_burst_length"])
+ if "OnChipFlash_burst_length" in init_dict
+ else None,
+ read_latency=int(init_dict["OnChipFlash_read_latency"])
+ if "OnChipFlash_read_latency" in init_dict
+ else None,
+ write_latency=int(init_dict["OnChipFlash_write_latency"])
+ if "OnChipFlash_write_latency" in init_dict
+ else None,
+ ),
+ const_mem_area=str(init_dict["const_mem_area"]),
+ arena_mem_area=str(init_dict["arena_mem_area"]),
+ cache_mem_area=str(init_dict["cache_mem_area"]),
+ arena_cache_size=int(init_dict["arena_cache_size"])
+ if "arena_cache_size" in init_dict
+ else None,
+ )
+
+ except KeyError as err:
+ raise KeyError(f"Vela.ini file missing expected header: {err.args[0]}") from err
- logger.debug("Save optimized model into %s", output_model_path)
- optimized_model.save(output_model_path)
+ return init_data
diff --git a/src/mlia/backend/vela/performance.py b/src/mlia/backend/vela/performance.py
index 72a8ceb..2cf945d 100644
--- a/src/mlia/backend/vela/performance.py
+++ b/src/mlia/backend/vela/performance.py
@@ -10,15 +10,12 @@ from collections import Counter
from dataclasses import dataclass
from dataclasses import fields
from pathlib import Path
-from pydoc import locate
import numpy as np
-from ethosu.vela.npu_performance import PassCycles
-from ethosu.vela.tensor import MemArea
-from mlia.backend.vela.compiler import OptimizedModel
from mlia.backend.vela.compiler import VelaCompiler
from mlia.backend.vela.compiler import VelaCompilerOptions
+from mlia.backend.vela.compiler import VelaSummary
logger = logging.getLogger(__name__)
@@ -37,11 +34,10 @@ class PerformanceMetrics: # pylint: disable=too-many-instance-attributes
batch_inference_time: float
inferences_per_second: float
batch_size: int
- unknown_memory_area_size: int
- sram_memory_area_size: int
- dram_memory_area_size: int
- on_chip_flash_memory_area_size: int
- off_chip_flash_memory_area_size: int
+ sram_memory_area_size: float
+ dram_memory_area_size: float
+ on_chip_flash_memory_area_size: float
+ off_chip_flash_memory_area_size: float
layerwise_performance_info: LayerwisePerfInfo
@@ -145,19 +141,19 @@ def parse_layerwise_perf_csv( # pylint: disable=too-many-locals
if row == headers_to_check_cpu_ops:
continue
try:
+ # pylint: disable=eval-used
key_types = {
- field.name: locate(str(field.type))
+ field.name: eval(field.type) # type: ignore # nosec
for field in fields(LayerPerfInfo)
}
+ # pylint: enable=eval-used
ids_to_metrics = {}
for key, title, _ in metrics:
try:
- ids_to_metrics[key] = key_types[key]( # type: ignore
- row_as_dict[title]
- )
+ ids_to_metrics[key] = key_types[key](row_as_dict[title])
except ValueError as err:
if "invalid literal for int() with base 10" in str(err):
- ids_to_metrics[key] = key_types[key]( # type: ignore
+ ids_to_metrics[key] = key_types[key](
float(row_as_dict[title])
)
else:
@@ -180,17 +176,20 @@ def estimate_performance(
model_path,
compiler_options.accelerator_config,
)
-
vela_compiler = VelaCompiler(compiler_options)
-
- initial_model = vela_compiler.read_model(model_path)
- if initial_model.optimized:
- raise ValueError(
- "Unable to estimate performance for the given optimized model."
- )
-
- optimized_model = vela_compiler.compile_model(initial_model)
- output_dir = optimized_model.compiler_options.output_dir
+ if Path(
+ Path(compiler_options.output_dir).as_posix()
+ + "/"
+ + model_path.stem
+ + "_summary_"
+ + compiler_options.system_config
+ + ".csv"
+ ).is_file():
+ summary_data, _ = vela_compiler.compile_model(model_path, True)
+ else:
+ summary_data, _ = vela_compiler.compile_model(model_path)
+
+ output_dir = compiler_options.output_dir
csv_paths = [entry for entry in os.listdir(output_dir) if "per-layer.csv" in entry]
model_name = str(model_path.stem)
csv_file_found = None
@@ -204,41 +203,31 @@ def estimate_performance(
vela_csv_file=csv_path, metrics=layer_metrics
)
- return _performance_metrics(layerwise_performance_info, optimized_model)
+ return _performance_metrics(layerwise_performance_info, summary_data)
def _performance_metrics(
- layerwise_performance_info: LayerwisePerfInfo, optimized_model: OptimizedModel
+ layerwise_performance_info: LayerwisePerfInfo, summary_data: VelaSummary
) -> PerformanceMetrics:
"""Return performance metrics for optimized model."""
- cycles = optimized_model.nng.cycles
-
- def memory_usage(mem_area: MemArea) -> int:
- """Get memory usage for the proviced memory area type."""
- memory_used: dict[MemArea, int] = optimized_model.nng.memory_used
- bandwidths = optimized_model.nng.bandwidths
-
- return memory_used.get(mem_area, 0) if np.sum(bandwidths[mem_area]) > 0 else 0
-
midpoint_fps = np.nan
- midpoint_inference_time = cycles[PassCycles.Total] / optimized_model.arch.core_clock
+ midpoint_inference_time = summary_data.cycles_total / summary_data.core_clock
if midpoint_inference_time > 0:
midpoint_fps = 1 / midpoint_inference_time
return PerformanceMetrics(
- npu_cycles=int(cycles[PassCycles.Npu]),
- sram_access_cycles=int(cycles[PassCycles.SramAccess]),
- dram_access_cycles=int(cycles[PassCycles.DramAccess]),
- on_chip_flash_access_cycles=int(cycles[PassCycles.OnChipFlashAccess]),
- off_chip_flash_access_cycles=int(cycles[PassCycles.OffChipFlashAccess]),
- total_cycles=int(cycles[PassCycles.Total]),
+ npu_cycles=int(summary_data.cycles_npu),
+ sram_access_cycles=int(summary_data.cycles_sram_access),
+ dram_access_cycles=int(summary_data.cycles_dram_access),
+ on_chip_flash_access_cycles=int(summary_data.cycles_on_chip_flash_access),
+ off_chip_flash_access_cycles=int(summary_data.cycles_off_chip_flash_access),
+ total_cycles=int(summary_data.cycles_total),
batch_inference_time=midpoint_inference_time * 1000,
inferences_per_second=midpoint_fps,
- batch_size=optimized_model.nng.batch_size,
- unknown_memory_area_size=memory_usage(MemArea.Unknown),
- sram_memory_area_size=memory_usage(MemArea.Sram),
- dram_memory_area_size=memory_usage(MemArea.Dram),
- on_chip_flash_memory_area_size=memory_usage(MemArea.OnChipFlash),
- off_chip_flash_memory_area_size=memory_usage(MemArea.OffChipFlash),
+ batch_size=summary_data.batch_size,
+ sram_memory_area_size=float(summary_data.sram_memory_used),
+ dram_memory_area_size=float(summary_data.dram_memory_used),
+ on_chip_flash_memory_area_size=float(summary_data.on_chip_flash_memory_used),
+ off_chip_flash_memory_area_size=float(summary_data.off_chip_flash_memory_used),
layerwise_performance_info=layerwise_performance_info,
)
diff --git a/src/mlia/core/context.py b/src/mlia/core/context.py
index 6e699be..345b668 100644
--- a/src/mlia/core/context.py
+++ b/src/mlia/core/context.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright 2022-2023, Arm Limited and/or its affiliates.
+# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates.
# SPDX-License-Identifier: Apache-2.0
"""Context module.
@@ -112,7 +112,6 @@ class ExecutionContext(Context):
event_publisher: EventPublisher | None = None,
verbose: bool = False,
logs_dir: str = "logs",
- models_dir: str = "models",
action_resolver: ActionResolver | None = None,
output_format: OutputFormat = "plain_text",
) -> None:
@@ -129,8 +128,6 @@ class ExecutionContext(Context):
:param verbose: enable verbose output
:param logs_dir: name of the directory inside output directory where
log files will be stored
- :param models_dir: name of the directory inside output directory where
- temporary models will be stored
:param action_resolver: instance of the action resolver that could make
advice actionable
:param output_format: format for the application output
@@ -144,7 +141,6 @@ class ExecutionContext(Context):
self._event_publisher = event_publisher or DefaultEventPublisher()
self.verbose = verbose
self.logs_dir = logs_dir
- self.models_dir = models_dir
self._action_resolver = action_resolver or APIActionResolver()
self._output_format = output_format
@@ -195,10 +191,7 @@ class ExecutionContext(Context):
def get_model_path(self, model_filename: str) -> Path:
"""Return path for the model."""
- models_dir_path = self._output_dir_path / self.models_dir
- models_dir_path.mkdir(exist_ok=True)
-
- return models_dir_path / model_filename
+ return self._output_dir_path / model_filename
@property
def logs_path(self) -> Path:
diff --git a/src/mlia/core/reporting.py b/src/mlia/core/reporting.py
index 722adfd..f8ef644 100644
--- a/src/mlia/core/reporting.py
+++ b/src/mlia/core/reporting.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright 2022-2023, Arm Limited and/or its affiliates.
+# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates.
# SPDX-License-Identifier: Apache-2.0
"""Reporting module."""
from __future__ import annotations
@@ -49,7 +49,7 @@ class ReportItem:
self,
name: str,
alias: str | None = None,
- value: str | int | Cell | None = None,
+ value: str | int | float | Cell | None = None,
nested_items: list[ReportItem] | None = None,
) -> None:
"""Init the report item."""
diff --git a/src/mlia/resources/vela/vela.ini b/src/mlia/resources/vela/vela.ini
index 29a5179..747dc3d 100644
--- a/src/mlia/resources/vela/vela.ini
+++ b/src/mlia/resources/vela/vela.ini
@@ -1,4 +1,4 @@
-; SPDX-FileCopyrightText: Copyright 2020, 2022, Arm Limited and/or its affiliates.
+; SPDX-FileCopyrightText: Copyright 2020, 2022, 2024, Arm Limited and/or its affiliates.
; SPDX-License-Identifier: Apache-2.0
; -----------------------------------------------------------------------------
@@ -6,6 +6,19 @@
; -----------------------------------------------------------------------------
; System Configuration
+; Ethos-U55 Deep Embedded: SRAM (1.6 GB/s) and Flash (0.1 GB/s)
+[System_Config.Ethos_U55_Deep_Embedded]
+core_clock=200e6
+axi0_port=Sram
+axi1_port=OffChipFlash
+Sram_clock_scale=1.0
+Sram_burst_length=32
+Sram_read_latency=32
+Sram_write_latency=32
+OffChipFlash_clock_scale=0.0625
+OffChipFlash_burst_length=128
+OffChipFlash_read_latency=64
+OffChipFlash_write_latency=64
; Ethos-U55 High-End Embedded: SRAM (4 GB/s) and Flash (0.5 GB/s)
[System_Config.Ethos_U55_High_End_Embedded]
@@ -35,6 +48,20 @@ OffChipFlash_burst_length=128
OffChipFlash_read_latency=64
OffChipFlash_write_latency=64
+; Ethos-U65 Mid-End: SRAM (8 GB/s) and DRAM (3.75 GB/s)
+[System_Config.Ethos_U65_Mid_End]
+core_clock=500e6
+axi0_port=Sram
+axi1_port=Dram
+Sram_clock_scale=1.0
+Sram_burst_length=32
+Sram_read_latency=32
+Sram_write_latency=32
+Dram_clock_scale=0.46875
+Dram_burst_length=128
+Dram_read_latency=500
+Dram_write_latency=250
+
; Ethos-U65 High-End: SRAM (16 GB/s) and DRAM (3.75 GB/s)
[System_Config.Ethos_U65_High_End]
core_clock=1e9
@@ -49,6 +76,20 @@ Dram_burst_length=128
Dram_read_latency=500
Dram_write_latency=250
+; Ethos-U65 Client-Server: SRAM (16 GB/s) and DRAM (12 GB/s)
+[System_Config.Ethos_U65_Client_Server]
+core_clock=1e9
+axi0_port=Sram
+axi1_port=Dram
+Sram_clock_scale=1.0
+Sram_burst_length=32
+Sram_read_latency=32
+Sram_write_latency=32
+Dram_clock_scale=0.75
+Dram_burst_length=128
+Dram_read_latency=500
+Dram_write_latency=250
+
; -----------------------------------------------------------------------------
; Memory Mode
@@ -58,7 +99,6 @@ Dram_write_latency=250
const_mem_area=Axi0
arena_mem_area=Axi0
cache_mem_area=Axi0
-arena_cache_size=2096768
; Shared SRAM: the SRAM is shared between the Ethos-U and the Cortex-M software
; The non-SRAM memory is assumed to be read-only
@@ -66,7 +106,6 @@ arena_cache_size=2096768
const_mem_area=Axi1
arena_mem_area=Axi0
cache_mem_area=Axi0
-arena_cache_size=2096768
; Dedicated SRAM: the SRAM (384KB) is only for use by the Ethos-U
; The non-SRAM memory is assumed to be read-writeable
@@ -75,3 +114,9 @@ const_mem_area=Axi1
arena_mem_area=Axi1
cache_mem_area=Axi0
arena_cache_size=393216
+
+; Dedicated SRAM 512KB: the SRAM (512KB) is only for use by the Ethos-U
+; The non-SRAM memory is assumed to be read-writeable
+[Memory_Mode.Dedicated_Sram_512KB]
+inherit=Memory_Mode.Dedicated_Sram
+arena_cache_size=524288
diff --git a/src/mlia/target/ethos_u/advisor.py b/src/mlia/target/ethos_u/advisor.py
index b5932d0..edcfcfc 100644
--- a/src/mlia/target/ethos_u/advisor.py
+++ b/src/mlia/target/ethos_u/advisor.py
@@ -109,7 +109,9 @@ class EthosUInferenceAdvisor(DefaultInferenceAdvisor):
def _get_target_config(self, context: Context) -> EthosUConfiguration:
"""Get target configuration."""
target_profile = self.get_target_profile(context)
- return cast(EthosUConfiguration, profile(target_profile))
+ target_config = cast(EthosUConfiguration, profile(target_profile))
+ target_config.compiler_options.output_dir = context.output_dir # type: ignore
+ return target_config
def _get_optimization_settings(self, context: Context) -> list[list[dict]]:
"""Get optimization settings."""
diff --git a/src/mlia/target/ethos_u/config.py b/src/mlia/target/ethos_u/config.py
index 73baa61..b3416d3 100644
--- a/src/mlia/target/ethos_u/config.py
+++ b/src/mlia/target/ethos_u/config.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright 2022-2023, Arm Limited and/or its affiliates.
+# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates.
# SPDX-License-Identifier: Apache-2.0
"""Ethos-U configuration."""
from __future__ import annotations
@@ -10,6 +10,7 @@ from mlia.backend.corstone import is_corstone_backend
from mlia.backend.manager import get_available_backends
from mlia.backend.vela.compiler import resolve_compiler_config
from mlia.backend.vela.compiler import VelaCompilerOptions
+from mlia.backend.vela.compiler import VelaInitData
from mlia.target.config import TargetProfile
from mlia.utils.filesystem import get_vela_config
@@ -53,7 +54,7 @@ class EthosUConfiguration(TargetProfile):
)
@property
- def resolved_compiler_config(self) -> dict[str, Any]:
+ def resolved_compiler_config(self) -> VelaInitData:
"""Resolve compiler configuration."""
return resolve_compiler_config(self.compiler_options)
diff --git a/src/mlia/target/ethos_u/data_analysis.py b/src/mlia/target/ethos_u/data_analysis.py
index 5c6080f..d42d82a 100644
--- a/src/mlia/target/ethos_u/data_analysis.py
+++ b/src/mlia/target/ethos_u/data_analysis.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright 2022-2023, Arm Limited and/or its affiliates.
+# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates.
# SPDX-License-Identifier: Apache-2.0
"""Ethos-U data analysis module."""
from __future__ import annotations
@@ -110,13 +110,13 @@ class EthosUDataAnalyzer(FactExtractor):
if not optimizations:
return
- orig = optimization_results.original_perf_metrics.in_kilobytes()
+ orig = optimization_results.original_perf_metrics
orig_memory = orig.memory_usage
orig_cycles = orig.npu_cycles
diffs: list[OptimizationDiff] = []
for opt_type, opt_perf_metrics in optimizations:
- opt = opt_perf_metrics.in_kilobytes()
+ opt = opt_perf_metrics
opt_memory = opt.memory_usage
opt_cycles = opt.npu_cycles
diff --git a/src/mlia/target/ethos_u/performance.py b/src/mlia/target/ethos_u/performance.py
index 8decb75..1e2a504 100644
--- a/src/mlia/target/ethos_u/performance.py
+++ b/src/mlia/target/ethos_u/performance.py
@@ -54,7 +54,6 @@ class MemoryUsage:
sram_memory_area_size: int | float
dram_memory_area_size: int | float
- unknown_memory_area_size: int | float
on_chip_flash_memory_area_size: int | float
off_chip_flash_memory_area_size: int | float
memory_size_type: MemorySizeType = MemorySizeType.BYTES
@@ -67,27 +66,6 @@ class MemoryUsage:
"Off chip flash used",
]
- def in_kilobytes(self) -> MemoryUsage:
- """Return memory usage with values in kilobytes."""
- if self.memory_size_type == MemorySizeType.KILOBYTES:
- return self
-
- kilobytes = [
- value / BYTES_PER_KILOBYTE
- for value in [
- self.sram_memory_area_size,
- self.dram_memory_area_size,
- self.unknown_memory_area_size,
- self.on_chip_flash_memory_area_size,
- self.off_chip_flash_memory_area_size,
- ]
- ]
-
- return MemoryUsage(
- *kilobytes, # type: ignore
- memory_size_type=MemorySizeType.KILOBYTES,
- )
-
@dataclass
class PerformanceMetrics:
@@ -98,23 +76,6 @@ class PerformanceMetrics:
memory_usage: MemoryUsage | None
layerwise_perf_info: LayerwisePerfInfo | None
- def in_kilobytes(self) -> PerformanceMetrics:
- """Return metrics with memory usage in KiB."""
- if self.memory_usage is None:
- return PerformanceMetrics(
- self.target_config,
- self.npu_cycles,
- self.memory_usage,
- self.layerwise_perf_info,
- )
-
- return PerformanceMetrics(
- self.target_config,
- self.npu_cycles,
- self.memory_usage.in_kilobytes(),
- self.layerwise_perf_info,
- )
-
@dataclass
class OptimizationPerformanceMetrics:
@@ -157,7 +118,6 @@ class VelaPerformanceEstimator(
MemoryUsage(
vela_perf_metrics.sram_memory_area_size,
vela_perf_metrics.dram_memory_area_size,
- vela_perf_metrics.unknown_memory_area_size,
vela_perf_metrics.on_chip_flash_memory_area_size,
vela_perf_metrics.off_chip_flash_memory_area_size,
),
@@ -192,12 +152,8 @@ class CorstonePerformanceEstimator(
else model
)
- optimized_model_path = self.context.get_model_path(
- f"{model_path.stem}_vela.tflite"
- )
-
- vela_comp.optimize_model(
- model_path, self.target_config.compiler_options, optimized_model_path
+ optimized_model_path = vela_comp.compile_model(
+ model_path, self.target_config.compiler_options
)
corstone_perf_metrics = estimate_performance(
diff --git a/src/mlia/target/ethos_u/reporters.py b/src/mlia/target/ethos_u/reporters.py
index b747ce5..384d623 100644
--- a/src/mlia/target/ethos_u/reporters.py
+++ b/src/mlia/target/ethos_u/reporters.py
@@ -4,6 +4,7 @@
from __future__ import annotations
from collections import defaultdict
+from dataclasses import asdict
from dataclasses import fields
from typing import Any
from typing import Callable
@@ -119,29 +120,50 @@ def report_target_details(target_config: EthosUConfiguration) -> Report:
"""Return table representation for the target."""
compiler_config = target_config.resolved_compiler_config
+ memory_dict = dict(
+ zip(
+ ["Sram", "Dram", "OnChipFlash", "OffChipFlash"],
+ [
+ compiler_config.sram_memory_data,
+ compiler_config.dram_memory_data,
+ compiler_config.on_chip_flash_memory_data,
+ compiler_config.off_chip_flash_memory_data,
+ ],
+ )
+ )
+
+ memory_dict = {
+ key: val
+ for key, val in memory_dict.items()
+ if not list(asdict(val).values()).count(None) == len(list(asdict(val).values()))
+ }
+
memory_settings = [
ReportItem(
"Const mem area",
"const_mem_area",
- compiler_config["const_mem_area"],
+ compiler_config.const_mem_area,
),
ReportItem(
"Arena mem area",
"arena_mem_area",
- compiler_config["arena_mem_area"],
+ compiler_config.arena_mem_area,
),
ReportItem(
"Cache mem area",
"cache_mem_area",
- compiler_config["cache_mem_area"],
- ),
- ReportItem(
- "Arena cache size",
- "arena_cache_size",
- BytesCell(compiler_config["arena_cache_size"]),
+ compiler_config.cache_mem_area,
),
]
+ if compiler_config.arena_cache_size is not None:
+ memory_settings.append(
+ ReportItem(
+ "Arena cache size",
+ "arena_cache_size",
+ BytesCell(compiler_config.arena_cache_size),
+ )
+ )
mem_areas_settings = [
ReportItem(
f"{mem_area_name}",
@@ -151,67 +173,48 @@ def report_target_details(target_config: EthosUConfiguration) -> Report:
ReportItem(
"Clock scales",
"clock_scales",
- mem_area_settings["clock_scales"],
+ mem_area_settings.clock_scale,
),
ReportItem(
"Burst length",
"burst_length",
- BytesCell(mem_area_settings["burst_length"]),
+ BytesCell(mem_area_settings.burst_length),
),
ReportItem(
"Read latency",
"read_latency",
- CyclesCell(mem_area_settings["read_latency"]),
+ CyclesCell(mem_area_settings.read_latency),
),
ReportItem(
"Write latency",
"write_latency",
- CyclesCell(mem_area_settings["write_latency"]),
+ CyclesCell(mem_area_settings.write_latency),
),
],
)
- for mem_area_name, mem_area_settings in compiler_config["memory_area"].items()
+ for mem_area_name, mem_area_settings in memory_dict.items()
]
system_settings = [
ReportItem(
"Accelerator clock",
"accelerator_clock",
- ClockCell(compiler_config["core_clock"]),
+ ClockCell(compiler_config.core_clock),
),
ReportItem(
"AXI0 port",
"axi0_port",
- compiler_config["axi0_port"],
+ compiler_config.axi0_port,
),
ReportItem(
"AXI1 port",
"axi1_port",
- compiler_config["axi1_port"],
+ compiler_config.axi1_port,
),
ReportItem(
"Memory area settings", "memory_area", None, nested_items=mem_areas_settings
),
]
-
- arch_settings = [
- ReportItem(
- "Permanent storage mem area",
- "permanent_storage_mem_area",
- compiler_config["permanent_storage_mem_area"],
- ),
- ReportItem(
- "Feature map storage mem area",
- "feature_map_storage_mem_area",
- compiler_config["feature_map_storage_mem_area"],
- ),
- ReportItem(
- "Fast storage mem area",
- "fast_storage_mem_area",
- compiler_config["fast_storage_mem_area"],
- ),
- ]
-
return NestedReport(
"Target information",
"target",
@@ -221,21 +224,15 @@ def report_target_details(target_config: EthosUConfiguration) -> Report:
ReportItem(
"Memory mode",
alias="memory_mode",
- value=compiler_config["memory_mode"],
+ value=compiler_config.memory_mode,
nested_items=memory_settings,
),
ReportItem(
"System config",
alias="system_config",
- value=compiler_config["system_config"],
+ value=compiler_config.system_config,
nested_items=system_settings,
),
- ReportItem(
- "Architecture settings",
- "arch_settings",
- None,
- nested_items=arch_settings,
- ),
],
)
@@ -244,7 +241,6 @@ def metrics_as_records(
perf_metrics: list[PerformanceMetrics],
) -> tuple[list[tuple], list[tuple]]:
"""Convert perf metrics object into list of records."""
- perf_metrics = [item.in_kilobytes() for item in perf_metrics]
def _layerwise_as_metrics(
perf_metrics: list[PerformanceMetrics],
@@ -314,9 +310,6 @@ def metrics_as_records(
return []
metric_map["SRAM used"].append(metrics.memory_usage.sram_memory_area_size)
metric_map["DRAM used"].append(metrics.memory_usage.dram_memory_area_size)
- metric_map["Unknown memory area used"].append(
- metrics.memory_usage.unknown_memory_area_size
- )
metric_map["On-chip flash used"].append(
metrics.memory_usage.on_chip_flash_memory_area_size
)
diff --git a/tests/conftest.py b/tests/conftest.py
index 9dc1d16..1092979 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -11,7 +11,7 @@ import numpy as np
import pytest
import tensorflow as tf
-from mlia.backend.vela.compiler import optimize_model
+from mlia.backend.vela.compiler import compile_model
from mlia.core.context import ExecutionContext
from mlia.nn.rewrite.core.utils.numpy_tfrecord import NumpyTFWriter
from mlia.nn.tensorflow.tflite_convert import convert_to_tflite
@@ -51,7 +51,7 @@ def invalid_input_model_file(test_tflite_invalid_model: Path) -> Path:
@pytest.fixture(scope="session", name="empty_test_csv_file")
-def fixture_empty_test_csv_file( # pylint: disable=too-many-locals
+def fixture_empty_test_csv_file(
test_csv_path: Path,
) -> Path:
"""Return empty test csv file path."""
@@ -59,7 +59,7 @@ def fixture_empty_test_csv_file( # pylint: disable=too-many-locals
@pytest.fixture(scope="session", name="test_csv_file")
-def fixture_test_csv_file( # pylint: disable=too-many-locals
+def fixture_test_csv_file(
test_csv_path: Path,
) -> Path:
"""Return test csv file path."""
@@ -67,7 +67,7 @@ def fixture_test_csv_file( # pylint: disable=too-many-locals
@pytest.fixture(scope="session", name="test_csv_path")
-def fixture_test_csv_path( # pylint: disable=too-many-locals
+def fixture_test_csv_path(
tmp_path_factory: pytest.TempPathFactory,
) -> Generator[Path, None, None]:
"""Return test csv file path."""
@@ -76,6 +76,32 @@ def fixture_test_csv_path( # pylint: disable=too-many-locals
shutil.rmtree(tmp_path)
+@pytest.fixture(scope="session", name="test_vela_path")
+def fixture_test_vela_path(
+ tmp_path_factory: pytest.TempPathFactory,
+) -> Generator[Path, None, None]:
+ """Return test vela file path."""
+ tmp_path = tmp_path_factory.mktemp("vela_file")
+ yield tmp_path
+ shutil.rmtree(tmp_path)
+
+
+@pytest.fixture(scope="session", name="empty_vela_ini_file")
+def fixture_empty_vela_ini_file(
+ test_vela_path: Path,
+) -> Path:
+ """Return empty test vela file path."""
+ return test_vela_path / "empty_vela.ini"
+
+
+@pytest.fixture(scope="session", name="vela_ini_file")
+def fixture_vela_ini_file(
+ test_vela_path: Path,
+) -> Path:
+ """Return empty test vela file path."""
+ return test_vela_path / "vela.ini"
+
+
def get_test_keras_model() -> tf.keras.Model:
"""Return test Keras model."""
model = tf.keras.Sequential(
@@ -130,13 +156,8 @@ def fixture_test_models_path(
convert_to_tflite(keras_model, quantized=True, output_path=tflite_model_path)
# Vela-optimized TensorFlow Lite model (int8)
- tflite_vela_model = tmp_path / TEST_MODEL_TFLITE_VELA_FILE
target_config = EthosUConfiguration.load_profile("ethos-u55-256")
- optimize_model(
- tflite_model_path,
- target_config.compiler_options,
- tflite_vela_model,
- )
+ compile_model(tflite_model_path, target_config.compiler_options)
tf.saved_model.save(keras_model, str(tmp_path / TEST_MODEL_TF_SAVED_MODEL_FILE))
diff --git a/tests/test_backend_vela_compiler.py b/tests/test_backend_vela_compiler.py
index 5554efb..d5dc5cc 100644
--- a/tests/test_backend_vela_compiler.py
+++ b/tests/test_backend_vela_compiler.py
@@ -3,16 +3,22 @@
"""Tests for module vela/compiler."""
from pathlib import Path
from typing import Any
+from unittest.mock import MagicMock
import pytest
-from ethosu.vela.compiler_driver import TensorAllocator
-from ethosu.vela.scheduler import OptimizationStrategy
+from ethosu.vela.vela import main
-from mlia.backend.vela.compiler import optimize_model
-from mlia.backend.vela.compiler import OptimizedModel
+from mlia.backend.vela.compiler import compile_model
+from mlia.backend.vela.compiler import parse_summary_csv_file
+from mlia.backend.vela.compiler import parse_vela_initialisation_file
+from mlia.backend.vela.compiler import resolve_compiler_config
from mlia.backend.vela.compiler import VelaCompiler
from mlia.backend.vela.compiler import VelaCompilerOptions
+from mlia.backend.vela.compiler import VelaInitData
+from mlia.backend.vela.compiler import VelaInitMemoryData
+from mlia.backend.vela.compiler import VelaSummary
from mlia.target.ethos_u.config import EthosUConfiguration
+from mlia.utils.filesystem import recreate_directory
def test_default_vela_compiler() -> None:
@@ -26,52 +32,15 @@ def test_default_vela_compiler() -> None:
assert default_compiler.accelerator_config == "ethos-u55-256"
assert default_compiler.max_block_dependency == 3
assert default_compiler.arena_cache_size is None
- assert default_compiler.tensor_allocator == TensorAllocator.HillClimb
+ assert default_compiler.tensor_allocator == "HillClimb"
assert default_compiler.cpu_tensor_alignment == 16
- assert default_compiler.optimization_strategy == OptimizationStrategy.Performance
+ assert default_compiler.optimization_strategy == "Performance"
assert default_compiler.output_dir == Path("output")
- assert default_compiler.get_config() == {
- "accelerator_config": "ethos-u55-256",
- "system_config": "internal-default",
- "core_clock": 500000000.0,
- "axi0_port": "Sram",
- "axi1_port": "OffChipFlash",
- "memory_mode": "internal-default",
- "const_mem_area": "Axi1",
- "arena_mem_area": "Axi0",
- "cache_mem_area": "Axi0",
- "arena_cache_size": 4294967296,
- "permanent_storage_mem_area": "OffChipFlash",
- "feature_map_storage_mem_area": "Sram",
- "fast_storage_mem_area": "Sram",
- "memory_area": {
- "Sram": {
- "clock_scales": 1.0,
- "burst_length": 32,
- "read_latency": 32,
- "write_latency": 32,
- },
- "Dram": {
- "clock_scales": 1.0,
- "burst_length": 1,
- "read_latency": 0,
- "write_latency": 0,
- },
- "OnChipFlash": {
- "clock_scales": 1.0,
- "burst_length": 1,
- "read_latency": 0,
- "write_latency": 0,
- },
- "OffChipFlash": {
- "clock_scales": 0.125,
- "burst_length": 128,
- "read_latency": 64,
- "write_latency": 64,
- },
- },
- }
+ with pytest.raises(
+ ValueError, match="System Config: internal-default not present in vela.ini file"
+ ):
+ resolve_compiler_config(vela_compiler_options=default_compiler_options)
def test_vela_compiler_with_parameters(test_resources_path: Path) -> None:
@@ -98,52 +67,120 @@ def test_vela_compiler_with_parameters(test_resources_path: Path) -> None:
assert compiler.accelerator_config == "ethos-u65-256"
assert compiler.max_block_dependency == 1
assert compiler.arena_cache_size == 10
- assert compiler.tensor_allocator == TensorAllocator.Greedy
+ assert compiler.tensor_allocator == "Greedy"
assert compiler.cpu_tensor_alignment == 4
- assert compiler.optimization_strategy == OptimizationStrategy.Size
+ assert compiler.optimization_strategy == "Size"
assert compiler.output_dir == Path("custom_output")
- assert compiler.get_config() == {
- "accelerator_config": "ethos-u65-256",
- "system_config": "Ethos_U65_High_End",
- "core_clock": 1000000000.0,
- "axi0_port": "Sram",
- "axi1_port": "Dram",
- "memory_mode": "Shared_Sram",
- "const_mem_area": "Axi1",
- "arena_mem_area": "Axi0",
- "cache_mem_area": "Axi0",
- "arena_cache_size": 10,
- "permanent_storage_mem_area": "Dram",
- "feature_map_storage_mem_area": "Sram",
- "fast_storage_mem_area": "Sram",
- "memory_area": {
- "Sram": {
- "clock_scales": 1.0,
- "burst_length": 32,
- "read_latency": 32,
- "write_latency": 32,
- },
- "Dram": {
- "clock_scales": 0.234375,
- "burst_length": 128,
- "read_latency": 500,
- "write_latency": 250,
- },
- "OnChipFlash": {
- "clock_scales": 1.0,
- "burst_length": 1,
- "read_latency": 0,
- "write_latency": 0,
- },
- "OffChipFlash": {
- "clock_scales": 1.0,
- "burst_length": 1,
- "read_latency": 0,
- "write_latency": 0,
- },
- },
- }
+ assert resolve_compiler_config(
+ vela_compiler_options=compiler_options
+ ) == VelaInitData(
+ system_config="Ethos_U65_High_End",
+ core_clock=1000000000.0,
+ axi0_port="Sram",
+ axi1_port="Dram",
+ memory_mode="Shared_Sram",
+ const_mem_area="Axi1",
+ arena_mem_area="Axi0",
+ cache_mem_area="Axi0",
+ arena_cache_size=None,
+ sram_memory_data=VelaInitMemoryData(
+ clock_scale=1.0,
+ burst_length=32,
+ read_latency=32,
+ write_latency=32,
+ ),
+ dram_memory_data=VelaInitMemoryData(
+ clock_scale=0.234375,
+ burst_length=128,
+ read_latency=500,
+ write_latency=250,
+ ),
+ on_chip_flash_memory_data=VelaInitMemoryData(
+ clock_scale=None,
+ burst_length=None,
+ read_latency=None,
+ write_latency=None,
+ ),
+ off_chip_flash_memory_data=VelaInitMemoryData(
+ clock_scale=None,
+ burst_length=None,
+ read_latency=None,
+ write_latency=None,
+ ),
+ )
+
+
+def test_vela_compiler_with_parameters_inherit_memory_mode(
+ test_resources_path: Path,
+) -> None:
+ """Test creation of Vela compiler instance with non-default params
+ that inherits a memory mode.
+ """
+ vela_ini_path = str(test_resources_path / "vela/sample_vela.ini")
+
+ compiler_options = VelaCompilerOptions(
+ config_files=vela_ini_path,
+ system_config="Ethos_U65_High_End",
+ memory_mode="Dedicated_Sram_512KB",
+ accelerator_config="ethos-u65-256",
+ max_block_dependency=1,
+ arena_cache_size=10,
+ tensor_allocator="Greedy",
+ cpu_tensor_alignment=4,
+ optimization_strategy="Size",
+ output_dir=Path("custom_output"),
+ )
+ compiler = VelaCompiler(compiler_options)
+
+ assert compiler.config_files == vela_ini_path
+ assert compiler.system_config == "Ethos_U65_High_End"
+ assert compiler.memory_mode == "Dedicated_Sram_512KB"
+ assert compiler.accelerator_config == "ethos-u65-256"
+ assert compiler.max_block_dependency == 1
+ assert compiler.arena_cache_size == 10
+ assert compiler.tensor_allocator == "Greedy"
+ assert compiler.cpu_tensor_alignment == 4
+ assert compiler.optimization_strategy == "Size"
+ assert compiler.output_dir == Path("custom_output")
+
+ assert resolve_compiler_config(
+ vela_compiler_options=compiler_options
+ ) == VelaInitData(
+ system_config="Ethos_U65_High_End",
+ core_clock=1000000000.0,
+ axi0_port="Sram",
+ axi1_port="Dram",
+ memory_mode="Dedicated_Sram_512KB",
+ const_mem_area="Axi1",
+ arena_mem_area="Axi1",
+ cache_mem_area="Axi0",
+ arena_cache_size=524288,
+ sram_memory_data=VelaInitMemoryData(
+ clock_scale=1.0,
+ burst_length=32,
+ read_latency=32,
+ write_latency=32,
+ ),
+ dram_memory_data=VelaInitMemoryData(
+ clock_scale=0.234375,
+ burst_length=128,
+ read_latency=500,
+ write_latency=250,
+ ),
+ on_chip_flash_memory_data=VelaInitMemoryData(
+ clock_scale=None,
+ burst_length=None,
+ read_latency=None,
+ write_latency=None,
+ ),
+ off_chip_flash_memory_data=VelaInitMemoryData(
+ clock_scale=None,
+ burst_length=None,
+ read_latency=None,
+ write_latency=None,
+ ),
+ )
def test_compile_model(test_tflite_model: Path) -> None:
@@ -152,8 +189,17 @@ def test_compile_model(test_tflite_model: Path) -> None:
EthosUConfiguration.load_profile("ethos-u55-256").compiler_options
)
- optimized_model = compiler.compile_model(test_tflite_model)
- assert isinstance(optimized_model, OptimizedModel)
+ expected_model_path = Path(
+ compiler.output_dir.as_posix()
+ + "/"
+ + test_tflite_model.stem
+ + "_vela"
+ + test_tflite_model.suffix
+ )
+ vela_summary_data, optimized_model_path = compiler.compile_model(test_tflite_model)
+ assert isinstance(vela_summary_data, VelaSummary)
+ assert isinstance(optimized_model_path, Path)
+ assert expected_model_path == optimized_model_path
def test_csv_file_created(test_tflite_model: Path) -> None:
@@ -172,7 +218,7 @@ def test_verbose_flag_passed() -> None:
compiler = VelaCompiler(
EthosUConfiguration.load_profile("ethos-u55-256").compiler_options
)
- assert compiler.return_compiler_options().verbose_performance
+ assert compiler.verbose_performance
def test_compile_model_fail_sram_exceeded(
@@ -186,7 +232,7 @@ def test_compile_model_fail_sram_exceeded(
def fake_compiler(*_: Any) -> None:
print("Warning: SRAM target for arena memory area exceeded.")
- monkeypatch.setattr("mlia.backend.vela.compiler.compiler_driver", fake_compiler)
+ monkeypatch.setattr("mlia.backend.vela.compiler.main", fake_compiler)
with pytest.raises(Exception) as exc_info:
compiler.compile_model(test_tflite_model)
@@ -195,12 +241,424 @@ def test_compile_model_fail_sram_exceeded(
def test_optimize_model(tmp_path: Path, test_tflite_model: Path) -> None:
"""Test model optimization and saving into file."""
- tmp_file = tmp_path / "temp.tflite"
-
+ tmp_file = tmp_path / "test_model_int8_vela.tflite"
target_config = EthosUConfiguration.load_profile("ethos-u55-256")
- optimize_model(
- test_tflite_model, target_config.compiler_options, tmp_file.absolute()
- )
+ target_config.compiler_options.output_dir = tmp_path
+ compile_model(test_tflite_model, target_config.compiler_options)
assert tmp_file.is_file()
assert tmp_file.stat().st_size > 0
+
+
+SUMMARY_TMP_DATA = """
+experiment,network,accelerator_configuration,system_config,memory_mode,core_clock,arena_cache_size,sram_bandwidth,dram_bandwidth,on_chip_flash_bandwidth,off_chip_flash_bandwidth,weights_storage_area,feature_map_storage_area,inferences_per_second,batch_size,inference_time,passes_before_fusing,passes_after_fusing,sram_memory_used,dram_memory_used,on_chip_flash_memory_used,off_chip_flash_memory_used,total_original_weights,total_npu_encoded_weights,sram_feature_map_read_bytes,sram_feature_map_write_bytes,sram_weight_read_bytes,sram_weight_write_bytes,sram_total_bytes,dram_feature_map_read_bytes,dram_feature_map_write_bytes,dram_weight_read_bytes,dram_weight_write_bytes,dram_total_bytes,on_chip_flash_feature_map_read_bytes,on_chip_flash_feature_map_write_bytes,on_chip_flash_weight_read_bytes,on_chip_flash_weight_write_bytes,on_chip_flash_total_bytes,off_chip_flash_feature_map_read_bytes,off_chip_flash_feature_map_write_bytes,off_chip_flash_weight_read_bytes,off_chip_flash_weight_write_bytes,off_chip_flash_total_bytes,nn_macs,nn_tops,cycles_npu,cycles_sram_access,cycles_dram_access,cycles_on_chip_flash_access,cycles_off_chip_flash_access,cycles_total
+default,test_model_fp32,Ethos_U55_256,Ethos_U55_High_End_Embedded,Shared_Sram,0.0,0.9,4.0,4.0,4.0,0.5,Off-chip Flash,SRAM,0.0,1,12.1e-05,7,2.0,1.5,0.0,0.0,1.4,7,8,6.0,5.0,7552.0,5.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,1.0,2,0.1,23297.0,1.5,0.0,0.0,1.0,2
+""".strip()
+
+SUMMARY_TMP_DATA_MISSING_HEADER = """
+experiment,network,accelerator_configuration,system_config,memory_mode,core_clock,arena_cache_size,sram_bandwidth,dram_bandwidth,on_chip_flash_bandwidth,off_chip_flash_bandwidth,weights_storage_area,feature_map_storage_area,inferences_per_second,batch_size,inference_time,passes_before_fusing,passes_after_fusing,sram_memory_used,dram_memory_used,on_chip_flash_memory_used,off_chip_flash_memory_used,total_original_weights,total_npu_encoded_weights,sram_feature_map_read_bytes,sram_feature_map_write_bytes,sram_weight_read_bytes,sram_weight_write_bytes,sram_total_bytes,dram_feature_map_read_bytes,dram_feature_map_write_bytes,dram_weight_read_bytes,dram_weight_write_bytes,dram_total_bytes,on_chip_flash_feature_map_read_bytes,on_chip_flash_feature_map_write_bytes,on_chip_flash_weight_read_bytes,on_chip_flash_weight_write_bytes,on_chip_flash_total_bytes,off_chip_flash_feature_map_read_bytes,off_chip_flash_feature_map_write_bytes,off_chip_flash_weight_read_bytes,off_chip_flash_weight_write_bytes,off_chip_flash_total_bytes,nn_macs,nn_tops,cycles_npu,cycles_sram_access,cycles_dram_access,cycles_on_chip_flash_access,cycles_off_chip_flash_access
+default,test_model_fp32,Ethos_U55_256,Ethos_U55_High_End_Embedded,Shared_Sram,0.0,0.9,4.0,4.0,4.0,0.5,Off-chip Flash,SRAM,0.0,1,12.1e-05,7,2.0,1.5,0.0,0.0,1.4,7,8,6.0,5.0,7552.0,5.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,1.0,2,0.1,23297.0,1.5,0.0,0.0,1.0
+""".strip()
+
+TMP_DATA_EXPECTED_STRING = "\
+cycles_total: 2.0, \
+cycles_npu: 23297.0, \
+cycles_sram_access: 1.5, \
+cycles_dram_access: 0.0, \
+cycles_on_chip_flash_access: 0.0, \
+cycles_off_chip_flash_access: 1.0, \
+core_clock: 0.0, \
+dram_memory_used: 0.0, \
+sram_memory_used: 1.5, \
+on_chip_flash_memory_used: 0.0, \
+off_chip_flash_memory_used: 1.4, \
+batch_size: 1, \
+memory_mode: Shared_Sram, \
+system_config: Ethos_U55_High_End_Embedded, \
+accelerator_configuration: Ethos_U55_256, \
+arena_cache_size: 0.9, \
+"
+
+
+def test_backend_compiler_parse_summary_csv_file(test_csv_file: Path) -> None:
+ """Test that parsing a csv file produces a LayerwisePerfInfo object."""
+ with open(test_csv_file, "w", encoding="utf8") as csv_file:
+ csv_file.write(SUMMARY_TMP_DATA)
+ summary_object = parse_summary_csv_file(test_csv_file)
+ strings_to_check = repr(summary_object)
+ assert isinstance(summary_object, VelaSummary)
+ assert TMP_DATA_EXPECTED_STRING == strings_to_check
+
+
+def test_backend_compiler_summary_csv_parsed_empty(empty_test_csv_file: Path) -> None:
+ """Test that ensures when we have an empty
+ CSV file we get None as backend data.
+ """
+ empty_test_csv_file.touch()
+ with pytest.raises(RuntimeError, match="Generated Vela Summary CSV is empty"):
+ parse_summary_csv_file(empty_test_csv_file)
+
+
+def test_backend_compiler_summary_csv_parsed_missing_headers(
+ test_csv_file: Path,
+) -> None:
+ """Test that ensures a KeyError
+ is raised when a csv with missing
+ expected headers is parsed.
+ """
+ with open(test_csv_file, "w", encoding="utf8") as csv_file:
+ csv_file.write(SUMMARY_TMP_DATA_MISSING_HEADER)
+ with pytest.raises(
+ KeyError,
+ match="Generated Vela Summary CSV missing expected header: cycles_total.", # pylint: disable=line-too-long
+ ):
+ parse_summary_csv_file(test_csv_file)
+
+
+def test_backend_compiler_summary_csv_parsed_missing_file() -> None:
+ """Test that ensures a FileNotFoundError
+ is raised when a non-existent csv file is parsed.
+ """
+ with pytest.raises(
+ FileNotFoundError, match="CSV File not found at missing_file.csv"
+ ):
+ parse_summary_csv_file(Path("missing_file.csv"))
+
+
+def test_backend_compiler_parsing_vela_ini_file_missing_init_file() -> None:
+ """Test that ensures a FileNotFoundError
+ is raised when a non-existent ini file is parsed.
+ """
+ with pytest.raises(
+ FileNotFoundError,
+ match="Vela Initialisation File not found at missing_init_file.ini",
+ ):
+ parse_vela_initialisation_file(
+ Path("missing_init_file.ini"), "internal-default", "internal-default"
+ )
+
+
+def test_backend_compiler_parsing_vela_ini_file_empty_init_file(
+ empty_vela_ini_file: Path,
+) -> None:
+ """Test that ensures a OSError
+ is raised when an empty vela.ini file is parsed.
+ """
+ empty_vela_ini_file.touch()
+ with pytest.raises(OSError, match="vela.ini File Is Empty"):
+ parse_vela_initialisation_file(
+ empty_vela_ini_file, "internal-default", "internal-default"
+ )
+
+
+@pytest.mark.parametrize(
+ "input_str",
+ [
+ """
+; SPDX-FileCopyrightText: Copyright 2022, Arm Limited and/or its affiliates.
+; SPDX-License-Identifier: Apache-2.0
+; Ethos-U55 High-End Embedded: SRAM (4 GB/s) and Flash (0.5 GB/s)
+[System_Config.Ethos_U55_High_End_Embedded]
+core_clock=500e6
+axi0_port=Sram
+axi1_port=OffChipFlash
+Sram_clock_scale=1.0
+Sram_burst_length=32
+Sram_read_latency=32
+Sram_write_latency=32
+OffChipFlash_clock_scale=0.125
+OffChipFlash_burst_length=128
+OffChipFlash_read_latency=64
+OffChipFlash_write_latency=64
+
+; Ethos-U65 High-End: SRAM (16 GB/s) and DRAM (3.75 GB/s)
+[System_Config.Ethos_U65_High_End]
+core_clock=1e9
+axi0_port=Sram
+axi1_port=Dram
+Sram_clock_scale=1.0
+Sram_burst_length=32
+Sram_read_latency=32
+Sram_write_latency=32
+Dram_clock_scale=0.234375
+Dram_burst_length=128
+Dram_read_latency=500
+Dram_write_latency=250
+"""
+ ],
+)
+def test_backend_compiler_parsing_vela_ini_file_missing_memory_modes(
+ vela_ini_file: Path,
+ input_str: str,
+) -> None:
+ """Test that ensures a IndexError
+ is raised when a vela.ini file with no memory modes
+ is parsed.
+ """
+ with open(vela_ini_file, "w", encoding="utf8") as vela_file:
+ vela_file.write(input_str)
+ with pytest.raises(
+ IndexError, match="No memory modes are present in vela.ini file."
+ ):
+ parse_vela_initialisation_file(
+ vela_ini_file, "Ethos_U65_High_End", "Shared_Sram"
+ )
+
+
+@pytest.mark.parametrize(
+ "input_str",
+ [
+ """
+; -----------------------------------------------------------------------------
+; Memory Mode
+
+; Shared SRAM: the SRAM is shared between the Ethos-U and the Cortex-M software
+; The non-SRAM memory is assumed to be read-only
+[Memory_Mode.Shared_Sram]
+const_mem_area=Axi1
+arena_mem_area=Axi0
+cache_mem_area=Axi0
+
+; The SRAM (384KB) is only for use by the Ethos-U
+; The non-SRAM memory is assumed to be read-writeable
+[Memory_Mode.Dedicated_Sram]
+const_mem_area=Axi1
+arena_mem_area=Axi1
+cache_mem_area=Axi0
+arena_cache_size=393216
+
+"""
+ ],
+)
+def test_backend_compiler_parsing_vela_ini_file_missing_system_configs(
+ vela_ini_file: Path,
+ input_str: str,
+) -> None:
+ """Test that ensures a IndexError
+ is raised when a vela.ini file with no system configs
+ is parsed.
+ """
+ with open(vela_ini_file, "w", encoding="utf8") as vela_file:
+ vela_file.write(input_str)
+ with pytest.raises(
+ IndexError, match="No system configs are present in vela.ini file."
+ ):
+ parse_vela_initialisation_file(
+ vela_ini_file, "Ethos_U65_High_End", "Shared_Sram"
+ )
+
+
+@pytest.mark.parametrize(
+ "input_str",
+ [
+ """
+; SPDX-FileCopyrightText: Copyright 2022, Arm Limited and/or its affiliates.
+; SPDX-License-Identifier: Apache-2.0
+; Ethos-U55 High-End Embedded: SRAM (4 GB/s) and Flash (0.5 GB/s)
+[System_Config.Ethos_U55_High_End_Embedded]
+core_clock=500e6
+axi0_port=Sram
+axi1_port=OffChipFlash
+Sram_clock_scale=1.0
+Sram_burst_length=32
+Sram_read_latency=32
+Sram_write_latency=32
+OffChipFlash_clock_scale=0.125
+OffChipFlash_burst_length=128
+OffChipFlash_read_latency=64
+OffChipFlash_write_latency=64
+
+; Ethos-U65 High-End: SRAM (16 GB/s) and DRAM (3.75 GB/s)
+[System_Config.Ethos_U65_High_End]
+core_clock=1e9
+axi0_port=Sram
+axi1_port=Dram
+Sram_clock_scale=1.0
+Sram_burst_length=32
+Sram_read_latency=32
+Sram_write_latency=32
+Dram_clock_scale=0.234375
+Dram_burst_length=128
+Dram_read_latency=500
+Dram_write_latency=250
+
+; -----------------------------------------------------------------------------
+; Memory Mode
+
+; Shared SRAM: the SRAM is shared between the Ethos-U and the Cortex-M software
+; The non-SRAM memory is assumed to be read-only
+[Memory_Mode.Shared_Sram]
+const_mem_area=Axi1
+arena_mem_area=Axi0
+cache_mem_area=Axi0
+
+"""
+ ],
+)
+def test_backend_compiler_parsing_vela_ini_file_missing_specific_memory_mode(
+ vela_ini_file: Path,
+ input_str: str,
+) -> None:
+ """Test that ensures a ValueError
+ is raised when a vela.ini file with specific missing memory mode
+ is parsed.
+ """
+ with open(vela_ini_file, "w", encoding="utf8") as vela_file:
+ vela_file.write(input_str)
+ with pytest.raises(
+ ValueError, match="Memory Mode: Dedicated_Sram not present in vela.ini file."
+ ):
+ parse_vela_initialisation_file(
+ vela_ini_file, "Ethos_U65_High_End", "Dedicated_Sram"
+ )
+
+
+@pytest.mark.parametrize(
+ "input_str",
+ [
+ """
+; SPDX-FileCopyrightText: Copyright 2022, Arm Limited and/or its affiliates.
+; SPDX-License-Identifier: Apache-2.0
+; Ethos-U55 High-End Embedded: SRAM (4 GB/s) and Flash (0.5 GB/s)
+[System_Config.Ethos_U55_High_End_Embedded]
+core_clock=500e6
+axi0_port=Sram
+axi1_port=OffChipFlash
+Sram_clock_scale=1.0
+Sram_burst_length=32
+Sram_read_latency=32
+Sram_write_latency=32
+OffChipFlash_clock_scale=0.125
+OffChipFlash_burst_length=128
+OffChipFlash_read_latency=64
+OffChipFlash_write_latency=64
+
+; -----------------------------------------------------------------------------
+; Memory Mode
+
+; Shared SRAM: the SRAM is shared between the Ethos-U and the Cortex-M software
+; The non-SRAM memory is assumed to be read-only
+[Memory_Mode.Shared_Sram]
+const_mem_area=Axi1
+arena_mem_area=Axi0
+cache_mem_area=Axi0
+
+; The SRAM (384KB) is only for use by the Ethos-U
+; The non-SRAM memory is assumed to be read-writeable
+[Memory_Mode.Dedicated_Sram]
+const_mem_area=Axi1
+arena_mem_area=Axi1
+cache_mem_area=Axi0
+arena_cache_size=393216
+
+"""
+ ],
+)
+def test_backend_compiler_parsing_vela_ini_file_missing_specific_system_config(
+ vela_ini_file: Path,
+ input_str: str,
+) -> None:
+ """Test that ensures a ValueError
+ is raised when a vela.ini file with specific missing system config
+ is parsed.
+ """
+ with open(vela_ini_file, "w", encoding="utf8") as vela_file:
+ vela_file.write(input_str)
+ with pytest.raises(
+ ValueError,
+ match="System Config: Ethos_U65_High_End not present in vela.ini file.",
+ ):
+ parse_vela_initialisation_file(
+ vela_ini_file, "Ethos_U65_High_End", "Shared_Sram"
+ )
+
+
+@pytest.mark.parametrize(
+ "input_str",
+ [
+ """
+; SPDX-FileCopyrightText: Copyright 2022, Arm Limited and/or its affiliates.
+; SPDX-License-Identifier: Apache-2.0
+; Ethos-U55 High-End Embedded: SRAM (4 GB/s) and Flash (0.5 GB/s)
+[System_Config.Ethos_U55_High_End_Embedded]
+axi0_port=Sram
+axi1_port=OffChipFlash
+Sram_clock_scale=1.0
+Sram_burst_length=32
+Sram_read_latency=32
+Sram_write_latency=32
+OffChipFlash_clock_scale=0.125
+OffChipFlash_burst_length=128
+OffChipFlash_read_latency=64
+OffChipFlash_write_latency=64
+
+; -----------------------------------------------------------------------------
+; Memory Mode
+
+; Shared SRAM: the SRAM is shared between the Ethos-U and the Cortex-M software
+; The non-SRAM memory is assumed to be read-only
+[Memory_Mode.Shared_Sram]
+const_mem_area=Axi1
+arena_mem_area=Axi0
+cache_mem_area=Axi0
+
+; The SRAM (384KB) is only for use by the Ethos-U
+; The non-SRAM memory is assumed to be read-writeable
+[Memory_Mode.Dedicated_Sram]
+const_mem_area=Axi1
+arena_mem_area=Axi1
+cache_mem_area=Axi0
+arena_cache_size=393216
+
+"""
+ ],
+)
+def test_backend_compiler_parsing_vela_ini_file_missing_header(
+ vela_ini_file: Path,
+ input_str: str,
+) -> None:
+ """Test that ensures a KeyError
+ is raised when a vela.ini file with a missing header
+ is parsed.
+ """
+ with open(vela_ini_file, "w", encoding="utf8") as vela_file:
+ vela_file.write(input_str)
+ with pytest.raises(
+ KeyError, match="Vela.ini file missing expected header: core_clock"
+ ):
+ parse_vela_initialisation_file(
+ vela_ini_file, "Ethos_U55_High_End_Embedded", "Shared_Sram"
+ )
+
+
+def test_backend_compiler_model_already_compiled(
+ test_tflite_model: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+ """Test that if we try compile a model twice,
+ the correct flag is passed and that main is called only once.
+ """
+ target_config = EthosUConfiguration.load_profile("ethos-u55-256")
+ recreate_directory(Path(target_config.compiler_options.output_dir))
+
+ main_mock = MagicMock(side_effect=main)
+ monkeypatch.setattr("mlia.backend.vela.compiler.main", main_mock)
+ compile_model(test_tflite_model, target_config.compiler_options)
+
+ def vela_compiler_compile_model_mock(
+ model_path: Path, *_: Any
+ ) -> tuple[None, Path]:
+ return None, Path(
+ Path(target_config.compiler_options.output_dir).as_posix()
+ + "/"
+ + model_path.stem
+ + "_vela"
+ + model_path.suffix
+ )
+
+ compiler_mock = MagicMock(side_effect=vela_compiler_compile_model_mock)
+ monkeypatch.setattr(
+ "mlia.backend.vela.compiler.VelaCompiler.compile_model", compiler_mock
+ )
+ compile_model(test_tflite_model, target_config.compiler_options)
+ main_mock.assert_called_once()
+ compiler_mock.assert_called_once_with(test_tflite_model, True)
diff --git a/tests/test_backend_vela_performance.py b/tests/test_backend_vela_performance.py
index 5800630..b4f8d4c 100644
--- a/tests/test_backend_vela_performance.py
+++ b/tests/test_backend_vela_performance.py
@@ -6,13 +6,14 @@ from unittest.mock import MagicMock
import pytest
-from mlia.backend.vela.compiler import optimize_model
+from mlia.backend.vela.compiler import compile_model
from mlia.backend.vela.performance import estimate_performance
from mlia.backend.vela.performance import layer_metrics
from mlia.backend.vela.performance import LayerwisePerfInfo
from mlia.backend.vela.performance import parse_layerwise_perf_csv
from mlia.backend.vela.performance import PerformanceMetrics
from mlia.target.ethos_u.config import EthosUConfiguration
+from mlia.utils.filesystem import recreate_directory
def test_estimate_performance(test_tflite_model: Path) -> None:
@@ -142,24 +143,6 @@ def test_estimate_performance_parse_layerwise_empty_csv_file(
assert len(layerwise_object.layerwise_info) == 0
-def test_estimate_performance_already_optimized(
- tmp_path: Path, test_tflite_model: Path
-) -> None:
- """Test that performance estimation should fail for already optimized model."""
- target_config = EthosUConfiguration.load_profile("ethos-u55-256")
-
- optimized_model_path = tmp_path / "optimized_model.tflite"
-
- optimize_model(
- test_tflite_model, target_config.compiler_options, optimized_model_path
- )
-
- with pytest.raises(
- Exception, match="Unable to estimate performance for the given optimized model"
- ):
- estimate_performance(optimized_model_path, target_config.compiler_options)
-
-
def test_read_invalid_model(test_tflite_invalid_model: Path) -> None:
"""Test that reading invalid model should fail with exception."""
with pytest.raises(
@@ -173,16 +156,18 @@ def test_compile_invalid_model(
test_tflite_model: Path, monkeypatch: pytest.MonkeyPatch, tmp_path: Path
) -> None:
"""Test that if model could not be compiled then correct exception raised."""
+
mock_compiler = MagicMock()
mock_compiler.side_effect = Exception("Bad model!")
- monkeypatch.setattr("mlia.backend.vela.compiler.compiler_driver", mock_compiler)
+ monkeypatch.setattr("mlia.backend.vela.compiler.main", mock_compiler)
model_path = tmp_path / "optimized_model.tflite"
with pytest.raises(
Exception, match="Model could not be optimized with Vela compiler"
):
target_config = EthosUConfiguration.load_profile("ethos-u55-256")
- optimize_model(test_tflite_model, target_config.compiler_options, model_path)
+ recreate_directory(Path(target_config.compiler_options.output_dir))
+ compile_model(test_tflite_model, target_config.compiler_options)
assert not model_path.exists()
diff --git a/tests/test_cli_commands.py b/tests/test_cli_commands.py
index 480e642..1ce793f 100644
--- a/tests/test_cli_commands.py
+++ b/tests/test_cli_commands.py
@@ -207,7 +207,7 @@ def mock_performance_estimation(monkeypatch: pytest.MonkeyPatch) -> None:
metrics = PerformanceMetrics(
EthosUConfiguration.load_profile("ethos-u55-256"),
NPUCycles(1, 2, 3, 4, 5, 6),
- MemoryUsage(1, 2, 3, 4, 5),
+ MemoryUsage(1, 2, 3, 4),
LayerwisePerfInfo(layerwise_info=[]),
)
monkeypatch.setattr(
diff --git a/tests/test_core_context.py b/tests/test_core_context.py
index 0810ad0..9eb3d63 100644
--- a/tests/test_core_context.py
+++ b/tests/test_core_context.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright 2022-2023, Arm Limited and/or its affiliates.
+# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates.
# SPDX-License-Identifier: Apache-2.0
"""Tests for the module context."""
from __future__ import annotations
@@ -59,7 +59,6 @@ def test_execution_context(tmp_path: Path) -> None:
event_publisher=publisher,
verbose=True,
logs_dir="logs_directory",
- models_dir="models_directory",
output_format="json",
)
@@ -74,7 +73,7 @@ def test_execution_context(tmp_path: Path) -> None:
assert context.event_handlers == []
assert context.event_publisher == publisher
assert context.logs_path == output_dir / "logs_directory"
- expected_model_path = output_dir / "models_directory/sample.model"
+ expected_model_path = output_dir / "sample.model"
assert context.get_model_path("sample.model") == expected_model_path
assert context.verbose is True
assert context.output_format == "json"
@@ -107,7 +106,7 @@ def test_execution_context_with_default_params(tmp_path: Path) -> None:
assert context_with_default_params.logs_path == output_dir / "logs"
default_model_path = context_with_default_params.get_model_path("sample.model")
- expected_default_model_path = output_dir / "models/sample.model"
+ expected_default_model_path = output_dir / "sample.model"
assert default_model_path == expected_default_model_path
assert context_with_default_params.output_format == "plain_text"
diff --git a/tests/test_target_ethos_u_data_analysis.py b/tests/test_target_ethos_u_data_analysis.py
index 3cddf10..0add7c2 100644
--- a/tests/test_target_ethos_u_data_analysis.py
+++ b/tests/test_target_ethos_u_data_analysis.py
@@ -98,7 +98,7 @@ def test_perf_metrics_diff() -> None:
cast(EthosUConfiguration, profile("ethos-u55-256")),
NPUCycles(1, 2, 3, 4, 5, 6),
# memory metrics are in kilobytes
- MemoryUsage(*[i * 1024 for i in range(1, 6)]), # type: ignore
+ MemoryUsage(*list(range(1, 5))), # type: ignore
LayerwisePerfInfo(layerwise_info=[]),
),
[
@@ -110,9 +110,7 @@ def test_perf_metrics_diff() -> None:
cast(EthosUConfiguration, profile("ethos-u55-256")),
NPUCycles(1, 2, 3, 4, 5, 6),
# memory metrics are in kilobytes
- MemoryUsage(
- *[i * 1024 for i in range(1, 6)] # type: ignore
- ),
+ MemoryUsage(*list(range(1, 5))), # type: ignore
LayerwisePerfInfo(layerwise_info=[]),
),
],
@@ -128,8 +126,8 @@ def test_perf_metrics_diff() -> None:
opt_diffs={
"sram": PerfMetricDiff(1.0, 1.0),
"dram": PerfMetricDiff(2.0, 2.0),
- "on_chip_flash": PerfMetricDiff(4.0, 4.0),
- "off_chip_flash": PerfMetricDiff(5.0, 5.0),
+ "on_chip_flash": PerfMetricDiff(3.0, 3.0),
+ "off_chip_flash": PerfMetricDiff(4.0, 4.0),
"npu_total_cycles": PerfMetricDiff(3, 3),
},
)
@@ -143,7 +141,7 @@ def test_perf_metrics_diff() -> None:
cast(EthosUConfiguration, profile("ethos-u55-256")),
NPUCycles(1, 2, 3, 4, 5, 6),
# memory metrics are in kilobytes
- MemoryUsage(*[i * 1024 for i in range(1, 6)]), # type: ignore
+ MemoryUsage(*list(range(1, 5))), # type: ignore
LayerwisePerfInfo(layerwise_info=[]),
),
[],
diff --git a/tests/test_target_ethos_u_data_collection.py b/tests/test_target_ethos_u_data_collection.py
index 3868b95..e034884 100644
--- a/tests/test_target_ethos_u_data_collection.py
+++ b/tests/test_target_ethos_u_data_collection.py
@@ -162,7 +162,7 @@ def mock_performance_estimation(
metrics = PerformanceMetrics(
target,
NPUCycles(1, 2, 3, 4, 5, 6),
- MemoryUsage(1, 2, 3, 4, 5),
+ MemoryUsage(1, 2, 3, 4),
LayerwisePerfInfo(layerwise_info=[]),
)
monkeypatch.setattr(
diff --git a/tests/test_target_ethos_u_performance.py b/tests/test_target_ethos_u_performance.py
index 76860b5..3042265 100644
--- a/tests/test_target_ethos_u_performance.py
+++ b/tests/test_target_ethos_u_performance.py
@@ -1,24 +1,10 @@
-# SPDX-FileCopyrightText: Copyright 2022, Arm Limited and/or its affiliates.
+# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates.
# SPDX-License-Identifier: Apache-2.0
"""Performance estimation tests."""
from unittest.mock import MagicMock
import pytest
-from mlia.target.ethos_u.performance import MemorySizeType
-from mlia.target.ethos_u.performance import MemoryUsage
-
-
-def test_memory_usage_conversion() -> None:
- """Test MemoryUsage objects conversion."""
- memory_usage_in_kb = MemoryUsage(1, 2, 3, 4, 5, MemorySizeType.KILOBYTES)
- assert memory_usage_in_kb.in_kilobytes() == memory_usage_in_kb
-
- memory_usage_in_bytes = MemoryUsage(
- 1 * 1024, 2 * 1024, 3 * 1024, 4 * 1024, 5 * 1024
- )
- assert memory_usage_in_bytes.in_kilobytes() == memory_usage_in_kb
-
def mock_performance_estimation(monkeypatch: pytest.MonkeyPatch) -> None:
"""Mock performance estimation."""
diff --git a/tests/test_target_ethos_u_reporters.py b/tests/test_target_ethos_u_reporters.py
index 6dff6e1..cfee86d 100644
--- a/tests/test_target_ethos_u_reporters.py
+++ b/tests/test_target_ethos_u_reporters.py
@@ -41,7 +41,6 @@ from mlia.utils.console import remove_ascii_codes
memory_usage=MemoryUsage(
sram_memory_area_size=10,
dram_memory_area_size=0,
- unknown_memory_area_size=0,
on_chip_flash_memory_area_size=0,
off_chip_flash_memory_area_size=20,
memory_size_type=MemorySizeType.KILOBYTES,
@@ -140,7 +139,6 @@ Layer-Wise Metrics:
memory_usage=MemoryUsage(
sram_memory_area_size=10,
dram_memory_area_size=0,
- unknown_memory_area_size=0,
on_chip_flash_memory_area_size=0,
off_chip_flash_memory_area_size=20,
memory_size_type=MemorySizeType.KILOBYTES,
@@ -362,7 +360,6 @@ def test_report_operators(
Const mem area Axi1
Arena mem area Axi0
Cache mem area Axi0
- Arena cache size 2,096,768 bytes
System config Ethos_U55_High_End_Embedded
Accelerator clock 500,000,000 Hz
@@ -376,28 +373,11 @@ def test_report_operators(
Read latency 32 cycles
Write latency 32 cycles
- Dram:
- Clock scales 1.0
- Burst length 1 byte
- Read latency 0 cycles
- Write latency 0 cycles
-
- OnChipFlash:
- Clock scales 1.0
- Burst length 1 byte
- Read latency 0 cycles
- Write latency 0 cycles
-
OffChipFlash:
Clock scales 0.125
Burst length 128 bytes
Read latency 64 cycles
- Write latency 64 cycles
-
- Architecture settings:
- Permanent storage mem area OffChipFlash
- Feature map storage mem area Sram
- Fast storage mem area Sram""",
+ Write latency 64 cycles""",
{
"target": {
"target": "ethos-u55",
@@ -406,7 +386,6 @@ def test_report_operators(
"const_mem_area": "Axi1",
"arena_mem_area": "Axi0",
"cache_mem_area": "Axi0",
- "arena_cache_size": {"value": 2096768, "unit": "bytes"},
},
"system_config": {
"accelerator_clock": {"value": 500000000.0, "unit": "Hz"},
@@ -419,18 +398,6 @@ def test_report_operators(
"read_latency": {"value": 32, "unit": "cycles"},
"write_latency": {"value": 32, "unit": "cycles"},
},
- "Dram": {
- "clock_scales": 1.0,
- "burst_length": {"value": 1, "unit": "byte"},
- "read_latency": {"value": 0, "unit": "cycles"},
- "write_latency": {"value": 0, "unit": "cycles"},
- },
- "OnChipFlash": {
- "clock_scales": 1.0,
- "burst_length": {"value": 1, "unit": "byte"},
- "read_latency": {"value": 0, "unit": "cycles"},
- "write_latency": {"value": 0, "unit": "cycles"},
- },
"OffChipFlash": {
"clock_scales": 0.125,
"burst_length": {"value": 128, "unit": "bytes"},
@@ -439,11 +406,6 @@ def test_report_operators(
},
},
},
- "arch_settings": {
- "permanent_storage_mem_area": "OffChipFlash",
- "feature_map_storage_mem_area": "Sram",
- "fast_storage_mem_area": "Sram",
- },
}
},
],