diff options
author | Nathan Bailey <nathan.bailey@arm.com> | 2024-01-26 14:19:52 +0000 |
---|---|---|
committer | Nathan Bailey <nathan.bailey@arm.com> | 2024-02-23 15:23:06 +0000 |
commit | e506c7bd0453cb204ec7a59267fe3982492aaed6 (patch) | |
tree | 225b70ede952e6dfe0b33fcc3813106bb8828e33 /src | |
parent | d10b53a358d7fddc2e5a818d146b71bc5bb5e0ed (diff) | |
download | mlia-e506c7bd0453cb204ec7a59267fe3982492aaed6.tar.gz |
refactor: Migrate from Vela's internal code to CSV summary
Removes vela defines from vela compiler.py and performance.py
Replaces calls to vela code with data from vela summary csv
Resolves: MLIA-1024
Signed-off-by: Nathan Bailey <nathan.bailey@arm.com>
Change-Id: I569878f2936767f70c0255919ca40d1969275529
Diffstat (limited to 'src')
-rw-r--r-- | src/mlia/backend/vela/compiler.py | 552 | ||||
-rw-r--r-- | src/mlia/backend/vela/performance.py | 85 | ||||
-rw-r--r-- | src/mlia/core/context.py | 11 | ||||
-rw-r--r-- | src/mlia/core/reporting.py | 4 | ||||
-rw-r--r-- | src/mlia/resources/vela/vela.ini | 51 | ||||
-rw-r--r-- | src/mlia/target/ethos_u/advisor.py | 4 | ||||
-rw-r--r-- | src/mlia/target/ethos_u/config.py | 5 | ||||
-rw-r--r-- | src/mlia/target/ethos_u/data_analysis.py | 6 | ||||
-rw-r--r-- | src/mlia/target/ethos_u/performance.py | 48 | ||||
-rw-r--r-- | src/mlia/target/ethos_u/reporters.py | 87 |
10 files changed, 534 insertions, 319 deletions
diff --git a/src/mlia/backend/vela/compiler.py b/src/mlia/backend/vela/compiler.py index fe9e365..211721a 100644 --- a/src/mlia/backend/vela/compiler.py +++ b/src/mlia/backend/vela/compiler.py @@ -3,36 +3,156 @@ """Vela compiler wrapper module.""" from __future__ import annotations +import csv import logging +import re import sys from dataclasses import dataclass +from dataclasses import fields from io import StringIO from pathlib import Path -from typing import Any from typing import Literal -from ethosu.vela.architecture_features import ArchitectureFeatures -from ethosu.vela.compiler_driver import compiler_driver -from ethosu.vela.compiler_driver import CompilerOptions -from ethosu.vela.compiler_driver import TensorAllocator from ethosu.vela.model_reader import ModelReaderOptions from ethosu.vela.model_reader import read_model from ethosu.vela.nn_graph import Graph from ethosu.vela.nn_graph import NetworkType from ethosu.vela.operation import CustomType -from ethosu.vela.scheduler import OptimizationStrategy -from ethosu.vela.scheduler import SchedulerOptions -from ethosu.vela.tensor import BandwidthDirection -from ethosu.vela.tensor import MemArea -from ethosu.vela.tensor import Tensor -from ethosu.vela.tflite_writer import write_tflite +from ethosu.vela.vela import main +from mlia.utils.filesystem import get_vela_config from mlia.utils.logging import redirect_output +from mlia.utils.logging import redirect_raw_output logger = logging.getLogger(__name__) @dataclass +class VelaInitMemoryData: + """Memory Data from vela.ini.""" + + clock_scale: float | None + burst_length: int | None + read_latency: int | None + write_latency: int | None + + +@dataclass +class VelaInitData: # pylint: disable=too-many-instance-attributes + """Data gathered from the vela.ini file we provide to vela.""" + + system_config: str + core_clock: float + axi0_port: str + axi1_port: str + sram_memory_data: VelaInitMemoryData + dram_memory_data: VelaInitMemoryData + off_chip_flash_memory_data: VelaInitMemoryData + on_chip_flash_memory_data: VelaInitMemoryData + memory_mode: str + const_mem_area: str + arena_mem_area: str + cache_mem_area: str + arena_cache_size: int | None + + +@dataclass +class VelaSummary: # pylint: disable=too-many-instance-attributes + """Data gathered from the summary CSV file that Vela produces.""" + + cycles_total: float + cycles_npu: float + cycles_sram_access: float + cycles_dram_access: float + cycles_on_chip_flash_access: float + cycles_off_chip_flash_access: float + core_clock: float + dram_memory_used: float + sram_memory_used: float + on_chip_flash_memory_used: float + off_chip_flash_memory_used: float + batch_size: int + memory_mode: str + system_config: str + accelerator_configuration: str + arena_cache_size: float + + def __repr__(self) -> str: + """Return String Representation of VelaSummary object.""" + header_values = dict(summary_metrics) + string_to_check = "" + for field in fields(self): + string_to_check += ( + f"{header_values[field.name]}: {getattr(self, field.name)}, " + ) + return string_to_check + + +complete_summary_metrics = [ + ("experiment", "experiment"), + ("network", "network"), + ("accelerator_configuration", "accelerator_configuration"), + ("system_config", "system_config"), + ("memory_mode", "memory_mode"), + ("core_clock", "core_clock"), + ("arena_cache_size", "arena_cache_size"), + ("sram_bandwidth", "sram_bandwidth"), + ("dram_bandwidth", "dram_bandwidth"), + ("on_chip_flash_bandwidth", "on_chip_flash_bandwidth"), + ("off_chip_flash_bandwidth", "off_chip_flash_bandwidth"), + ("weights_storage_area", "weights_storage_area"), + ("feature_map_storage_area", "feature_map_storage_area"), + ("inferences_per_second", "inferences_per_second"), + ("batch_size", "batch_size"), + ("inference_time", "inference_time"), + ("passes_before_fusing", "passes_before_fusing"), + ("sram_memory_used", "sram_memory_used"), + ("dram_memory_used", "dram_memory_used"), + ( + "on_chip_flash_memory_used", + "on_chip_flash_memory_used", + ), + ("off_chip_flash_memory_used", "off_chip_flash_memory_used"), + ("total_original_weights", "total_original_weights"), + ("total_npu_encoded_weights", "total_npu_encoded_weights"), + ("dram_total_bytes", "dram_total_bytes"), + ( + "on_chip_flash_feature_map_read_bytes", + "on_chip_flash_feature_map_read_bytes", + ), + ("on_chip_flash_feature_map_write_bytes", "on_chip_flash_feature_map_write_bytes"), + ("on_chip_flash_weight_read_bytes", "on_chip_flash_weight_read_bytes"), + ("on_chip_flash_weight_write_bytes", "on_chip_flash_weight_write_bytes"), + ("on_chip_flash_total_bytes", "on_chip_flash_total_bytes"), + ("off_chip_flash_feature_map_read_bytes", "off_chip_flash_feature_map_read_bytes"), + ( + "off_chip_flash_feature_map_write_bytes", + "off_chip_flash_feature_map_write_bytes", + ), + ("off_chip_flash_weight_read_bytes", "off_chip_flash_weight_read_bytes"), + ("off_chip_flash_weight_write_bytes", "off_chip_flash_weight_write_bytes"), + ("off_chip_flash_total_bytes", "off_chip_flash_total_bytes"), + ("nn_macs", "nn_macs"), + ("nn_tops", "nn_tops"), + ("cycles_npu", "cycles_npu"), + ("cycles_sram_access", "cycles_sram_access"), + ("cycles_dram_access", "cycles_dram_access"), + ("cycles_on_chip_flash_access", "cycles_on_chip_flash_access"), + ("cycles_off_chip_flash_access", "cycles_off_chip_flash_access"), + ("cycles_total", "cycles_total"), +] + +OUTPUT_METRICS = [field.name for field in fields(VelaSummary)] + +summary_metrics = [ + summary_metric + for summary_metric in complete_summary_metrics + if summary_metric[0] in OUTPUT_METRICS +] +summary_metrics.sort(key=lambda e: OUTPUT_METRICS.index(e[0])) + + +@dataclass class Model: """Model metadata.""" @@ -49,20 +169,6 @@ class Model: ) -@dataclass -class OptimizedModel: - """Instance of the Vela optimized model.""" - - nng: Graph - arch: ArchitectureFeatures - compiler_options: CompilerOptions - scheduler_options: SchedulerOptions - - def save(self, output_filename: str | Path) -> None: - """Save instance of the optimized model to the file.""" - write_tflite(self.nng, output_filename) - - AcceleratorConfigType = Literal[ "ethos-u55-32", "ethos-u55-64", @@ -82,16 +188,17 @@ class VelaCompilerOptions: # pylint: disable=too-many-instance-attributes """Vela compiler options.""" config_files: str | list[str] | None = None - system_config: str = ArchitectureFeatures.DEFAULT_CONFIG - memory_mode: str = ArchitectureFeatures.DEFAULT_CONFIG + system_config: str = "internal-default" + memory_mode: str = "internal-default" accelerator_config: AcceleratorConfigType | None = None - max_block_dependency: int = ArchitectureFeatures.MAX_BLOCKDEP + max_block_dependency: int = 3 arena_cache_size: int | None = None tensor_allocator: TensorAllocatorType = "HillClimb" - cpu_tensor_alignment: int = Tensor.AllocationQuantum + cpu_tensor_alignment: int = 16 optimization_strategy: OptimizationStrategyType = "Performance" output_dir: Path = Path("output") recursion_limit: int = 1000 + verbose_performance: bool = True class VelaCompiler: # pylint: disable=too-many-instance-attributes @@ -105,13 +212,12 @@ class VelaCompiler: # pylint: disable=too-many-instance-attributes self.accelerator_config = compiler_options.accelerator_config self.max_block_dependency = compiler_options.max_block_dependency self.arena_cache_size = compiler_options.arena_cache_size - self.tensor_allocator = TensorAllocator[compiler_options.tensor_allocator] + self.tensor_allocator = compiler_options.tensor_allocator self.cpu_tensor_alignment = compiler_options.cpu_tensor_alignment - self.optimization_strategy = OptimizationStrategy[ - compiler_options.optimization_strategy - ] - self.output_dir = compiler_options.output_dir + self.optimization_strategy = compiler_options.optimization_strategy + self.output_dir = Path(compiler_options.output_dir) self.recursion_limit = compiler_options.recursion_limit + self.verbose_performance = compiler_options.verbose_performance sys.setrecursionlimit(self.recursion_limit) @@ -122,36 +228,48 @@ class VelaCompiler: # pylint: disable=too-many-instance-attributes nng, network_type = self._read_model(model) return Model(nng, network_type) - def compile_model(self, model: str | Path | Model) -> OptimizedModel: + def compile_model( + self, model_path: Path, already_compiled: bool = False + ) -> tuple[VelaSummary, Path]: """Compile the model.""" - if isinstance(model, (str, Path)): - nng, network_type = self._read_model(model) - else: - nng, network_type = model.nng, NetworkType.TFLite - - if not nng: - raise ValueError("Unable to read model: model.nng is not available") - - output_basename = f"{self.output_dir}/{nng.name}" - try: - arch = self._architecture_features() - compiler_options = self._compiler_options() - scheduler_options = self._scheduler_options() - - with redirect_output( + with redirect_raw_output( logger, stdout_level=logging.DEBUG, stderr_level=logging.DEBUG ): tmp = sys.stdout output_message = StringIO() sys.stdout = output_message - compiler_driver( - nng, - arch, - compiler_options, - scheduler_options, - network_type, - output_basename, + main_args = [ + "--output-dir", + str(self.output_dir.as_posix()), + "--tensor-allocator", + str(self.tensor_allocator), + "--cpu-tensor-alignment", + str(self.cpu_tensor_alignment), + "--accelerator-config", + str(self.accelerator_config), + "--system-config", + str(self.system_config), + "--memory-mode", + str(self.memory_mode), + "--max-block-dependency", + str(self.max_block_dependency), + "--optimise", + str(self.optimization_strategy), + model_path.as_posix(), + "--config", + str(self.config_files), + ] + if self.verbose_performance: + main_args.append("--verbose-performance") + if not already_compiled: + main(main_args) + optimized_model_path = Path( + self.output_dir.as_posix() + + "/" + + model_path.stem + + "_vela" + + model_path.suffix ) sys.stdout = tmp if ( @@ -159,51 +277,29 @@ class VelaCompiler: # pylint: disable=too-many-instance-attributes in output_message.getvalue() ): raise MemoryError("Model is too large and uses too much RAM") - - return OptimizedModel(nng, arch, compiler_options, scheduler_options) + summary_data = parse_summary_csv_file( + Path( + self.output_dir.as_posix() + + "/" + + model_path.stem + + "_summary_" + + self.system_config + + ".csv" + ) + ) + return summary_data, optimized_model_path except MemoryError as err: raise err except (SystemExit, Exception) as err: + if ( + "Error: Invalid tflite file." in output_message.getvalue() + and isinstance(err, SystemExit) + ): + raise RuntimeError(f"Unable to read model {model_path}") from err raise RuntimeError( "Model could not be optimized with Vela compiler." ) from err - def get_config(self) -> dict[str, Any]: - """Get compiler configuration.""" - arch = self._architecture_features() - - memory_area = { - mem.name: { - "clock_scales": arch.memory_clock_scales[mem], - "burst_length": arch.memory_burst_length[mem], - "read_latency": arch.memory_latency[mem][BandwidthDirection.Read], - "write_latency": arch.memory_latency[mem][BandwidthDirection.Write], - } - for mem in ( - MemArea.Sram, - MemArea.Dram, - MemArea.OnChipFlash, - MemArea.OffChipFlash, - ) - } - - return { - "accelerator_config": arch.accelerator_config.value, - "system_config": arch.system_config, - "core_clock": arch.core_clock, - "axi0_port": arch.axi0_port.name, - "axi1_port": arch.axi1_port.name, - "memory_mode": arch.memory_mode, - "const_mem_area": arch.const_mem_area.name, - "arena_mem_area": arch.arena_mem_area.name, - "cache_mem_area": arch.cache_mem_area.name, - "arena_cache_size": arch.arena_cache_size, - "permanent_storage_mem_area": arch.permanent_storage_mem_area.name, - "feature_map_storage_mem_area": arch.feature_map_storage_mem_area.name, - "fast_storage_mem_area": arch.fast_storage_mem_area.name, - "memory_area": memory_area, - } - @staticmethod def _read_model(model: str | Path) -> tuple[Graph, NetworkType]: """Read TensorFlow Lite model.""" @@ -216,57 +312,10 @@ class VelaCompiler: # pylint: disable=too-many-instance-attributes except (SystemExit, Exception) as err: raise RuntimeError(f"Unable to read model {model_path}.") from err - def _architecture_features(self) -> ArchitectureFeatures: - """Return ArchitectureFeatures instance.""" - return ArchitectureFeatures( - vela_config_files=self.config_files, - accelerator_config=self.accelerator_config, - system_config=self.system_config, - memory_mode=self.memory_mode, - max_blockdep=self.max_block_dependency, - verbose_config=False, - arena_cache_size=self.arena_cache_size, - ) - - def _scheduler_options(self) -> SchedulerOptions: - """Return SchedulerOptions instance.""" - arch = self._architecture_features() - - return SchedulerOptions( - optimization_strategy=self.optimization_strategy, - sram_target=arch.arena_cache_size, - verbose_schedule=False, - ) - - def _compiler_options(self) -> CompilerOptions: - """Return CompilerOptions instance.""" - return CompilerOptions( - verbose_graph=False, - verbose_quantization=False, - verbose_packing=False, - verbose_tensor_purpose=False, - verbose_tensor_format=False, - verbose_allocation=False, - verbose_high_level_command_stream=False, - verbose_register_command_stream=False, - verbose_operators=False, - verbose_weights=False, - verbose_performance=True, - show_cpu_operations=False, - tensor_allocator=self.tensor_allocator, - timing=False, - output_dir=self.output_dir, - cpu_tensor_alignment=self.cpu_tensor_alignment, - ) - - def return_compiler_options(self) -> CompilerOptions: - """Return CompilerOptions instance for test purposes.""" - return self._compiler_options() - def resolve_compiler_config( vela_compiler_options: VelaCompilerOptions, -) -> dict[str, Any]: +) -> VelaInitData: """Resolve passed compiler options. Vela has number of configuration parameters that being @@ -278,22 +327,209 @@ def resolve_compiler_config( In order to get this information we need to create instance of the Vela compiler first. """ - vela_compiler = VelaCompiler(vela_compiler_options) - return vela_compiler.get_config() - - -def optimize_model( - model_path: Path, compiler_options: VelaCompilerOptions, output_model_path: Path -) -> None: - """Optimize model and return it's path after optimization.""" - logger.debug( - "Optimize model %s for target %s", - model_path, - compiler_options.accelerator_config, + return parse_vela_initialisation_file( + get_vela_config(), + vela_compiler_options.system_config, + vela_compiler_options.memory_mode, ) + +def compile_model(model_path: Path, compiler_options: VelaCompilerOptions) -> Path: + """Compile model.""" vela_compiler = VelaCompiler(compiler_options) - optimized_model = vela_compiler.compile_model(model_path) + # output dir could be a path or str, cast to Path object + output_dir = Path(compiler_options.output_dir) + if Path( + output_dir.as_posix() + + "/" + + model_path.stem + + "_summary_" + + compiler_options.system_config + + ".csv" + ).is_file(): + _, optimized_model_path = vela_compiler.compile_model(model_path, True) + else: + _, optimized_model_path = vela_compiler.compile_model(model_path) + return optimized_model_path + + +def parse_summary_csv_file(vela_summary_csv_file: Path) -> VelaSummary: + """Parse the summary csv file from Vela.""" + if not vela_summary_csv_file.is_file(): + raise FileNotFoundError(f"CSV File not found at {vela_summary_csv_file}") + + with open(vela_summary_csv_file, encoding="UTF-8") as csv_file: + summary_reader = csv.DictReader(csv_file, delimiter=",") + try: + row = next(summary_reader) + except StopIteration as err: + raise RuntimeError("Generated Vela Summary CSV is empty") from err + try: + # pylint: disable=eval-used + key_types = { + field.name: eval(field.type) # type: ignore # nosec + for field in fields(VelaSummary) + } + # pylint: enable=eval-used + summary_data = VelaSummary( + **{key: key_types[key](row[title]) for key, title in summary_metrics} + ) + except KeyError as err: + raise KeyError( + f"Generated Vela Summary CSV missing expected header: {err.args[0]}." + ) from err + return summary_data + + +def parse_vela_initialisation_file( # pylint: disable=too-many-locals + vela_init_file: Path, system_config: str, memory_mode: str +) -> VelaInitData: + """Parse the vela.ini to retrieve data for the target information table.""" + if not vela_init_file.is_file(): + raise FileNotFoundError( + f"Vela Initialisation File not found at {vela_init_file}" + ) + + lines = [] + with open(vela_init_file, encoding="UTF-8") as init_file: + lines = init_file.readlines() + + if len(lines) == 0: + raise OSError("vela.ini File Is Empty") + + lines = [line.strip("\n][ ") for line in lines] + + idxs_memory_mode = [ + idx for idx, item in enumerate(lines) if re.search("^Memory_Mode.*", item) + ] + + if len(idxs_memory_mode) == 0: + raise IndexError("No memory modes are present in vela.ini file.") + + idxs_system_config = [ + idx for idx, item in enumerate(lines) if re.search("^System_Config.*", item) + ] + [idxs_memory_mode[0]] + + if len(idxs_system_config) <= 1: + raise IndexError("No system configs are present in vela.ini file.") + + try: + idx_config = lines.index("System_Config." + system_config) + except ValueError as err: + raise ValueError( + f"System Config: {system_config} not present in vela.ini file." + ) from err + + lines_to_probe = lines[ + idx_config : idxs_system_config[ # noqa: E203 + idxs_system_config.index(idx_config) + 1 + ] + ] + + def collect_memory_mode_lines(memory_mode: str) -> list[str]: + try: + idx_memory_mode = lines.index("Memory_Mode." + memory_mode) + except ValueError as err: + raise ValueError( + f"Memory Mode: {memory_mode} not present in vela.ini file." + ) from err + if idxs_memory_mode.index(idx_memory_mode) == len(idxs_memory_mode) - 1: + lines_to_probe = lines[idx_memory_mode:] + else: + lines_to_probe = lines[ + idx_memory_mode : idxs_memory_mode[ # noqa: E203 + idxs_memory_mode.index(idx_memory_mode) + 1 + ] + ] + return lines_to_probe + + lines_to_probe_memory_mode = collect_memory_mode_lines(memory_mode) + extra_memory_mode_lines = [] + for line in lines_to_probe_memory_mode: + if "inherit=Memory_Mode." in line: + extra_memory_mode = line[line.rindex(".") + 1 :] # noqa: E203 + extra_memory_mode_lines = collect_memory_mode_lines(extra_memory_mode) + + lines_to_probe += extra_memory_mode_lines + lines_to_probe_memory_mode + + init_dict = {} + for line in lines_to_probe: + if "=" in line: + init_dict[line[: line.index("=")]] = line[ + line.index("=") + 1 : # noqa: E203 + ] + try: + init_data = VelaInitData( + system_config=system_config, + core_clock=float(init_dict["core_clock"]), + axi0_port=str(init_dict["axi0_port"]), + axi1_port=str(init_dict["axi1_port"]), + memory_mode=memory_mode, + sram_memory_data=VelaInitMemoryData( + clock_scale=float(init_dict["Sram_clock_scale"]) + if "Sram_clock_scale" in init_dict + else None, + burst_length=int(init_dict["Sram_burst_length"]) + if "Sram_burst_length" in init_dict + else None, + read_latency=int(init_dict["Sram_read_latency"]) + if "Sram_read_latency" in init_dict + else None, + write_latency=int(init_dict["Sram_write_latency"]) + if "Sram_write_latency" in init_dict + else None, + ), + dram_memory_data=VelaInitMemoryData( + clock_scale=float(init_dict["Dram_clock_scale"]) + if "Dram_clock_scale" in init_dict + else None, + burst_length=int(init_dict["Dram_burst_length"]) + if "Dram_burst_length" in init_dict + else None, + read_latency=int(init_dict["Dram_read_latency"]) + if "Dram_read_latency" in init_dict + else None, + write_latency=int(init_dict["Dram_write_latency"]) + if "Dram_write_latency" in init_dict + else None, + ), + off_chip_flash_memory_data=VelaInitMemoryData( + clock_scale=float(init_dict["OffChipFlash_clock_scale"]) + if "OffChipFlash_clock_scale" in init_dict + else None, + burst_length=int(init_dict["OffChipFlash_burst_length"]) + if "OffChipFlash_burst_length" in init_dict + else None, + read_latency=int(init_dict["OffChipFlash_read_latency"]) + if "OffChipFlash_read_latency" in init_dict + else None, + write_latency=int(init_dict["OffChipFlash_write_latency"]) + if "OffChipFlash_write_latency" in init_dict + else None, + ), + on_chip_flash_memory_data=VelaInitMemoryData( + clock_scale=float(init_dict["OnChipFlash_clock_scale"]) + if "OnChipFlash_clock_scale" in init_dict + else None, + burst_length=int(init_dict["OnChipFlash_burst_length"]) + if "OnChipFlash_burst_length" in init_dict + else None, + read_latency=int(init_dict["OnChipFlash_read_latency"]) + if "OnChipFlash_read_latency" in init_dict + else None, + write_latency=int(init_dict["OnChipFlash_write_latency"]) + if "OnChipFlash_write_latency" in init_dict + else None, + ), + const_mem_area=str(init_dict["const_mem_area"]), + arena_mem_area=str(init_dict["arena_mem_area"]), + cache_mem_area=str(init_dict["cache_mem_area"]), + arena_cache_size=int(init_dict["arena_cache_size"]) + if "arena_cache_size" in init_dict + else None, + ) + + except KeyError as err: + raise KeyError(f"Vela.ini file missing expected header: {err.args[0]}") from err - logger.debug("Save optimized model into %s", output_model_path) - optimized_model.save(output_model_path) + return init_data diff --git a/src/mlia/backend/vela/performance.py b/src/mlia/backend/vela/performance.py index 72a8ceb..2cf945d 100644 --- a/src/mlia/backend/vela/performance.py +++ b/src/mlia/backend/vela/performance.py @@ -10,15 +10,12 @@ from collections import Counter from dataclasses import dataclass from dataclasses import fields from pathlib import Path -from pydoc import locate import numpy as np -from ethosu.vela.npu_performance import PassCycles -from ethosu.vela.tensor import MemArea -from mlia.backend.vela.compiler import OptimizedModel from mlia.backend.vela.compiler import VelaCompiler from mlia.backend.vela.compiler import VelaCompilerOptions +from mlia.backend.vela.compiler import VelaSummary logger = logging.getLogger(__name__) @@ -37,11 +34,10 @@ class PerformanceMetrics: # pylint: disable=too-many-instance-attributes batch_inference_time: float inferences_per_second: float batch_size: int - unknown_memory_area_size: int - sram_memory_area_size: int - dram_memory_area_size: int - on_chip_flash_memory_area_size: int - off_chip_flash_memory_area_size: int + sram_memory_area_size: float + dram_memory_area_size: float + on_chip_flash_memory_area_size: float + off_chip_flash_memory_area_size: float layerwise_performance_info: LayerwisePerfInfo @@ -145,19 +141,19 @@ def parse_layerwise_perf_csv( # pylint: disable=too-many-locals if row == headers_to_check_cpu_ops: continue try: + # pylint: disable=eval-used key_types = { - field.name: locate(str(field.type)) + field.name: eval(field.type) # type: ignore # nosec for field in fields(LayerPerfInfo) } + # pylint: enable=eval-used ids_to_metrics = {} for key, title, _ in metrics: try: - ids_to_metrics[key] = key_types[key]( # type: ignore - row_as_dict[title] - ) + ids_to_metrics[key] = key_types[key](row_as_dict[title]) except ValueError as err: if "invalid literal for int() with base 10" in str(err): - ids_to_metrics[key] = key_types[key]( # type: ignore + ids_to_metrics[key] = key_types[key]( float(row_as_dict[title]) ) else: @@ -180,17 +176,20 @@ def estimate_performance( model_path, compiler_options.accelerator_config, ) - vela_compiler = VelaCompiler(compiler_options) - - initial_model = vela_compiler.read_model(model_path) - if initial_model.optimized: - raise ValueError( - "Unable to estimate performance for the given optimized model." - ) - - optimized_model = vela_compiler.compile_model(initial_model) - output_dir = optimized_model.compiler_options.output_dir + if Path( + Path(compiler_options.output_dir).as_posix() + + "/" + + model_path.stem + + "_summary_" + + compiler_options.system_config + + ".csv" + ).is_file(): + summary_data, _ = vela_compiler.compile_model(model_path, True) + else: + summary_data, _ = vela_compiler.compile_model(model_path) + + output_dir = compiler_options.output_dir csv_paths = [entry for entry in os.listdir(output_dir) if "per-layer.csv" in entry] model_name = str(model_path.stem) csv_file_found = None @@ -204,41 +203,31 @@ def estimate_performance( vela_csv_file=csv_path, metrics=layer_metrics ) - return _performance_metrics(layerwise_performance_info, optimized_model) + return _performance_metrics(layerwise_performance_info, summary_data) def _performance_metrics( - layerwise_performance_info: LayerwisePerfInfo, optimized_model: OptimizedModel + layerwise_performance_info: LayerwisePerfInfo, summary_data: VelaSummary ) -> PerformanceMetrics: """Return performance metrics for optimized model.""" - cycles = optimized_model.nng.cycles - - def memory_usage(mem_area: MemArea) -> int: - """Get memory usage for the proviced memory area type.""" - memory_used: dict[MemArea, int] = optimized_model.nng.memory_used - bandwidths = optimized_model.nng.bandwidths - - return memory_used.get(mem_area, 0) if np.sum(bandwidths[mem_area]) > 0 else 0 - midpoint_fps = np.nan - midpoint_inference_time = cycles[PassCycles.Total] / optimized_model.arch.core_clock + midpoint_inference_time = summary_data.cycles_total / summary_data.core_clock if midpoint_inference_time > 0: midpoint_fps = 1 / midpoint_inference_time return PerformanceMetrics( - npu_cycles=int(cycles[PassCycles.Npu]), - sram_access_cycles=int(cycles[PassCycles.SramAccess]), - dram_access_cycles=int(cycles[PassCycles.DramAccess]), - on_chip_flash_access_cycles=int(cycles[PassCycles.OnChipFlashAccess]), - off_chip_flash_access_cycles=int(cycles[PassCycles.OffChipFlashAccess]), - total_cycles=int(cycles[PassCycles.Total]), + npu_cycles=int(summary_data.cycles_npu), + sram_access_cycles=int(summary_data.cycles_sram_access), + dram_access_cycles=int(summary_data.cycles_dram_access), + on_chip_flash_access_cycles=int(summary_data.cycles_on_chip_flash_access), + off_chip_flash_access_cycles=int(summary_data.cycles_off_chip_flash_access), + total_cycles=int(summary_data.cycles_total), batch_inference_time=midpoint_inference_time * 1000, inferences_per_second=midpoint_fps, - batch_size=optimized_model.nng.batch_size, - unknown_memory_area_size=memory_usage(MemArea.Unknown), - sram_memory_area_size=memory_usage(MemArea.Sram), - dram_memory_area_size=memory_usage(MemArea.Dram), - on_chip_flash_memory_area_size=memory_usage(MemArea.OnChipFlash), - off_chip_flash_memory_area_size=memory_usage(MemArea.OffChipFlash), + batch_size=summary_data.batch_size, + sram_memory_area_size=float(summary_data.sram_memory_used), + dram_memory_area_size=float(summary_data.dram_memory_used), + on_chip_flash_memory_area_size=float(summary_data.on_chip_flash_memory_used), + off_chip_flash_memory_area_size=float(summary_data.off_chip_flash_memory_used), layerwise_performance_info=layerwise_performance_info, ) diff --git a/src/mlia/core/context.py b/src/mlia/core/context.py index 6e699be..345b668 100644 --- a/src/mlia/core/context.py +++ b/src/mlia/core/context.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright 2022-2023, Arm Limited and/or its affiliates. +# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates. # SPDX-License-Identifier: Apache-2.0 """Context module. @@ -112,7 +112,6 @@ class ExecutionContext(Context): event_publisher: EventPublisher | None = None, verbose: bool = False, logs_dir: str = "logs", - models_dir: str = "models", action_resolver: ActionResolver | None = None, output_format: OutputFormat = "plain_text", ) -> None: @@ -129,8 +128,6 @@ class ExecutionContext(Context): :param verbose: enable verbose output :param logs_dir: name of the directory inside output directory where log files will be stored - :param models_dir: name of the directory inside output directory where - temporary models will be stored :param action_resolver: instance of the action resolver that could make advice actionable :param output_format: format for the application output @@ -144,7 +141,6 @@ class ExecutionContext(Context): self._event_publisher = event_publisher or DefaultEventPublisher() self.verbose = verbose self.logs_dir = logs_dir - self.models_dir = models_dir self._action_resolver = action_resolver or APIActionResolver() self._output_format = output_format @@ -195,10 +191,7 @@ class ExecutionContext(Context): def get_model_path(self, model_filename: str) -> Path: """Return path for the model.""" - models_dir_path = self._output_dir_path / self.models_dir - models_dir_path.mkdir(exist_ok=True) - - return models_dir_path / model_filename + return self._output_dir_path / model_filename @property def logs_path(self) -> Path: diff --git a/src/mlia/core/reporting.py b/src/mlia/core/reporting.py index 722adfd..f8ef644 100644 --- a/src/mlia/core/reporting.py +++ b/src/mlia/core/reporting.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright 2022-2023, Arm Limited and/or its affiliates. +# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates. # SPDX-License-Identifier: Apache-2.0 """Reporting module.""" from __future__ import annotations @@ -49,7 +49,7 @@ class ReportItem: self, name: str, alias: str | None = None, - value: str | int | Cell | None = None, + value: str | int | float | Cell | None = None, nested_items: list[ReportItem] | None = None, ) -> None: """Init the report item.""" diff --git a/src/mlia/resources/vela/vela.ini b/src/mlia/resources/vela/vela.ini index 29a5179..747dc3d 100644 --- a/src/mlia/resources/vela/vela.ini +++ b/src/mlia/resources/vela/vela.ini @@ -1,4 +1,4 @@ -; SPDX-FileCopyrightText: Copyright 2020, 2022, Arm Limited and/or its affiliates. +; SPDX-FileCopyrightText: Copyright 2020, 2022, 2024, Arm Limited and/or its affiliates. ; SPDX-License-Identifier: Apache-2.0 ; ----------------------------------------------------------------------------- @@ -6,6 +6,19 @@ ; ----------------------------------------------------------------------------- ; System Configuration +; Ethos-U55 Deep Embedded: SRAM (1.6 GB/s) and Flash (0.1 GB/s) +[System_Config.Ethos_U55_Deep_Embedded] +core_clock=200e6 +axi0_port=Sram +axi1_port=OffChipFlash +Sram_clock_scale=1.0 +Sram_burst_length=32 +Sram_read_latency=32 +Sram_write_latency=32 +OffChipFlash_clock_scale=0.0625 +OffChipFlash_burst_length=128 +OffChipFlash_read_latency=64 +OffChipFlash_write_latency=64 ; Ethos-U55 High-End Embedded: SRAM (4 GB/s) and Flash (0.5 GB/s) [System_Config.Ethos_U55_High_End_Embedded] @@ -35,6 +48,20 @@ OffChipFlash_burst_length=128 OffChipFlash_read_latency=64 OffChipFlash_write_latency=64 +; Ethos-U65 Mid-End: SRAM (8 GB/s) and DRAM (3.75 GB/s) +[System_Config.Ethos_U65_Mid_End] +core_clock=500e6 +axi0_port=Sram +axi1_port=Dram +Sram_clock_scale=1.0 +Sram_burst_length=32 +Sram_read_latency=32 +Sram_write_latency=32 +Dram_clock_scale=0.46875 +Dram_burst_length=128 +Dram_read_latency=500 +Dram_write_latency=250 + ; Ethos-U65 High-End: SRAM (16 GB/s) and DRAM (3.75 GB/s) [System_Config.Ethos_U65_High_End] core_clock=1e9 @@ -49,6 +76,20 @@ Dram_burst_length=128 Dram_read_latency=500 Dram_write_latency=250 +; Ethos-U65 Client-Server: SRAM (16 GB/s) and DRAM (12 GB/s) +[System_Config.Ethos_U65_Client_Server] +core_clock=1e9 +axi0_port=Sram +axi1_port=Dram +Sram_clock_scale=1.0 +Sram_burst_length=32 +Sram_read_latency=32 +Sram_write_latency=32 +Dram_clock_scale=0.75 +Dram_burst_length=128 +Dram_read_latency=500 +Dram_write_latency=250 + ; ----------------------------------------------------------------------------- ; Memory Mode @@ -58,7 +99,6 @@ Dram_write_latency=250 const_mem_area=Axi0 arena_mem_area=Axi0 cache_mem_area=Axi0 -arena_cache_size=2096768 ; Shared SRAM: the SRAM is shared between the Ethos-U and the Cortex-M software ; The non-SRAM memory is assumed to be read-only @@ -66,7 +106,6 @@ arena_cache_size=2096768 const_mem_area=Axi1 arena_mem_area=Axi0 cache_mem_area=Axi0 -arena_cache_size=2096768 ; Dedicated SRAM: the SRAM (384KB) is only for use by the Ethos-U ; The non-SRAM memory is assumed to be read-writeable @@ -75,3 +114,9 @@ const_mem_area=Axi1 arena_mem_area=Axi1 cache_mem_area=Axi0 arena_cache_size=393216 + +; Dedicated SRAM 512KB: the SRAM (512KB) is only for use by the Ethos-U +; The non-SRAM memory is assumed to be read-writeable +[Memory_Mode.Dedicated_Sram_512KB] +inherit=Memory_Mode.Dedicated_Sram +arena_cache_size=524288 diff --git a/src/mlia/target/ethos_u/advisor.py b/src/mlia/target/ethos_u/advisor.py index b5932d0..edcfcfc 100644 --- a/src/mlia/target/ethos_u/advisor.py +++ b/src/mlia/target/ethos_u/advisor.py @@ -109,7 +109,9 @@ class EthosUInferenceAdvisor(DefaultInferenceAdvisor): def _get_target_config(self, context: Context) -> EthosUConfiguration: """Get target configuration.""" target_profile = self.get_target_profile(context) - return cast(EthosUConfiguration, profile(target_profile)) + target_config = cast(EthosUConfiguration, profile(target_profile)) + target_config.compiler_options.output_dir = context.output_dir # type: ignore + return target_config def _get_optimization_settings(self, context: Context) -> list[list[dict]]: """Get optimization settings.""" diff --git a/src/mlia/target/ethos_u/config.py b/src/mlia/target/ethos_u/config.py index 73baa61..b3416d3 100644 --- a/src/mlia/target/ethos_u/config.py +++ b/src/mlia/target/ethos_u/config.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright 2022-2023, Arm Limited and/or its affiliates. +# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates. # SPDX-License-Identifier: Apache-2.0 """Ethos-U configuration.""" from __future__ import annotations @@ -10,6 +10,7 @@ from mlia.backend.corstone import is_corstone_backend from mlia.backend.manager import get_available_backends from mlia.backend.vela.compiler import resolve_compiler_config from mlia.backend.vela.compiler import VelaCompilerOptions +from mlia.backend.vela.compiler import VelaInitData from mlia.target.config import TargetProfile from mlia.utils.filesystem import get_vela_config @@ -53,7 +54,7 @@ class EthosUConfiguration(TargetProfile): ) @property - def resolved_compiler_config(self) -> dict[str, Any]: + def resolved_compiler_config(self) -> VelaInitData: """Resolve compiler configuration.""" return resolve_compiler_config(self.compiler_options) diff --git a/src/mlia/target/ethos_u/data_analysis.py b/src/mlia/target/ethos_u/data_analysis.py index 5c6080f..d42d82a 100644 --- a/src/mlia/target/ethos_u/data_analysis.py +++ b/src/mlia/target/ethos_u/data_analysis.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright 2022-2023, Arm Limited and/or its affiliates. +# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates. # SPDX-License-Identifier: Apache-2.0 """Ethos-U data analysis module.""" from __future__ import annotations @@ -110,13 +110,13 @@ class EthosUDataAnalyzer(FactExtractor): if not optimizations: return - orig = optimization_results.original_perf_metrics.in_kilobytes() + orig = optimization_results.original_perf_metrics orig_memory = orig.memory_usage orig_cycles = orig.npu_cycles diffs: list[OptimizationDiff] = [] for opt_type, opt_perf_metrics in optimizations: - opt = opt_perf_metrics.in_kilobytes() + opt = opt_perf_metrics opt_memory = opt.memory_usage opt_cycles = opt.npu_cycles diff --git a/src/mlia/target/ethos_u/performance.py b/src/mlia/target/ethos_u/performance.py index 8decb75..1e2a504 100644 --- a/src/mlia/target/ethos_u/performance.py +++ b/src/mlia/target/ethos_u/performance.py @@ -54,7 +54,6 @@ class MemoryUsage: sram_memory_area_size: int | float dram_memory_area_size: int | float - unknown_memory_area_size: int | float on_chip_flash_memory_area_size: int | float off_chip_flash_memory_area_size: int | float memory_size_type: MemorySizeType = MemorySizeType.BYTES @@ -67,27 +66,6 @@ class MemoryUsage: "Off chip flash used", ] - def in_kilobytes(self) -> MemoryUsage: - """Return memory usage with values in kilobytes.""" - if self.memory_size_type == MemorySizeType.KILOBYTES: - return self - - kilobytes = [ - value / BYTES_PER_KILOBYTE - for value in [ - self.sram_memory_area_size, - self.dram_memory_area_size, - self.unknown_memory_area_size, - self.on_chip_flash_memory_area_size, - self.off_chip_flash_memory_area_size, - ] - ] - - return MemoryUsage( - *kilobytes, # type: ignore - memory_size_type=MemorySizeType.KILOBYTES, - ) - @dataclass class PerformanceMetrics: @@ -98,23 +76,6 @@ class PerformanceMetrics: memory_usage: MemoryUsage | None layerwise_perf_info: LayerwisePerfInfo | None - def in_kilobytes(self) -> PerformanceMetrics: - """Return metrics with memory usage in KiB.""" - if self.memory_usage is None: - return PerformanceMetrics( - self.target_config, - self.npu_cycles, - self.memory_usage, - self.layerwise_perf_info, - ) - - return PerformanceMetrics( - self.target_config, - self.npu_cycles, - self.memory_usage.in_kilobytes(), - self.layerwise_perf_info, - ) - @dataclass class OptimizationPerformanceMetrics: @@ -157,7 +118,6 @@ class VelaPerformanceEstimator( MemoryUsage( vela_perf_metrics.sram_memory_area_size, vela_perf_metrics.dram_memory_area_size, - vela_perf_metrics.unknown_memory_area_size, vela_perf_metrics.on_chip_flash_memory_area_size, vela_perf_metrics.off_chip_flash_memory_area_size, ), @@ -192,12 +152,8 @@ class CorstonePerformanceEstimator( else model ) - optimized_model_path = self.context.get_model_path( - f"{model_path.stem}_vela.tflite" - ) - - vela_comp.optimize_model( - model_path, self.target_config.compiler_options, optimized_model_path + optimized_model_path = vela_comp.compile_model( + model_path, self.target_config.compiler_options ) corstone_perf_metrics = estimate_performance( diff --git a/src/mlia/target/ethos_u/reporters.py b/src/mlia/target/ethos_u/reporters.py index b747ce5..384d623 100644 --- a/src/mlia/target/ethos_u/reporters.py +++ b/src/mlia/target/ethos_u/reporters.py @@ -4,6 +4,7 @@ from __future__ import annotations from collections import defaultdict +from dataclasses import asdict from dataclasses import fields from typing import Any from typing import Callable @@ -119,29 +120,50 @@ def report_target_details(target_config: EthosUConfiguration) -> Report: """Return table representation for the target.""" compiler_config = target_config.resolved_compiler_config + memory_dict = dict( + zip( + ["Sram", "Dram", "OnChipFlash", "OffChipFlash"], + [ + compiler_config.sram_memory_data, + compiler_config.dram_memory_data, + compiler_config.on_chip_flash_memory_data, + compiler_config.off_chip_flash_memory_data, + ], + ) + ) + + memory_dict = { + key: val + for key, val in memory_dict.items() + if not list(asdict(val).values()).count(None) == len(list(asdict(val).values())) + } + memory_settings = [ ReportItem( "Const mem area", "const_mem_area", - compiler_config["const_mem_area"], + compiler_config.const_mem_area, ), ReportItem( "Arena mem area", "arena_mem_area", - compiler_config["arena_mem_area"], + compiler_config.arena_mem_area, ), ReportItem( "Cache mem area", "cache_mem_area", - compiler_config["cache_mem_area"], - ), - ReportItem( - "Arena cache size", - "arena_cache_size", - BytesCell(compiler_config["arena_cache_size"]), + compiler_config.cache_mem_area, ), ] + if compiler_config.arena_cache_size is not None: + memory_settings.append( + ReportItem( + "Arena cache size", + "arena_cache_size", + BytesCell(compiler_config.arena_cache_size), + ) + ) mem_areas_settings = [ ReportItem( f"{mem_area_name}", @@ -151,67 +173,48 @@ def report_target_details(target_config: EthosUConfiguration) -> Report: ReportItem( "Clock scales", "clock_scales", - mem_area_settings["clock_scales"], + mem_area_settings.clock_scale, ), ReportItem( "Burst length", "burst_length", - BytesCell(mem_area_settings["burst_length"]), + BytesCell(mem_area_settings.burst_length), ), ReportItem( "Read latency", "read_latency", - CyclesCell(mem_area_settings["read_latency"]), + CyclesCell(mem_area_settings.read_latency), ), ReportItem( "Write latency", "write_latency", - CyclesCell(mem_area_settings["write_latency"]), + CyclesCell(mem_area_settings.write_latency), ), ], ) - for mem_area_name, mem_area_settings in compiler_config["memory_area"].items() + for mem_area_name, mem_area_settings in memory_dict.items() ] system_settings = [ ReportItem( "Accelerator clock", "accelerator_clock", - ClockCell(compiler_config["core_clock"]), + ClockCell(compiler_config.core_clock), ), ReportItem( "AXI0 port", "axi0_port", - compiler_config["axi0_port"], + compiler_config.axi0_port, ), ReportItem( "AXI1 port", "axi1_port", - compiler_config["axi1_port"], + compiler_config.axi1_port, ), ReportItem( "Memory area settings", "memory_area", None, nested_items=mem_areas_settings ), ] - - arch_settings = [ - ReportItem( - "Permanent storage mem area", - "permanent_storage_mem_area", - compiler_config["permanent_storage_mem_area"], - ), - ReportItem( - "Feature map storage mem area", - "feature_map_storage_mem_area", - compiler_config["feature_map_storage_mem_area"], - ), - ReportItem( - "Fast storage mem area", - "fast_storage_mem_area", - compiler_config["fast_storage_mem_area"], - ), - ] - return NestedReport( "Target information", "target", @@ -221,21 +224,15 @@ def report_target_details(target_config: EthosUConfiguration) -> Report: ReportItem( "Memory mode", alias="memory_mode", - value=compiler_config["memory_mode"], + value=compiler_config.memory_mode, nested_items=memory_settings, ), ReportItem( "System config", alias="system_config", - value=compiler_config["system_config"], + value=compiler_config.system_config, nested_items=system_settings, ), - ReportItem( - "Architecture settings", - "arch_settings", - None, - nested_items=arch_settings, - ), ], ) @@ -244,7 +241,6 @@ def metrics_as_records( perf_metrics: list[PerformanceMetrics], ) -> tuple[list[tuple], list[tuple]]: """Convert perf metrics object into list of records.""" - perf_metrics = [item.in_kilobytes() for item in perf_metrics] def _layerwise_as_metrics( perf_metrics: list[PerformanceMetrics], @@ -314,9 +310,6 @@ def metrics_as_records( return [] metric_map["SRAM used"].append(metrics.memory_usage.sram_memory_area_size) metric_map["DRAM used"].append(metrics.memory_usage.dram_memory_area_size) - metric_map["Unknown memory area used"].append( - metrics.memory_usage.unknown_memory_area_size - ) metric_map["On-chip flash used"].append( metrics.memory_usage.on_chip_flash_memory_area_size ) |