From e506c7bd0453cb204ec7a59267fe3982492aaed6 Mon Sep 17 00:00:00 2001 From: Nathan Bailey Date: Fri, 26 Jan 2024 14:19:52 +0000 Subject: refactor: Migrate from Vela's internal code to CSV summary Removes vela defines from vela compiler.py and performance.py Replaces calls to vela code with data from vela summary csv Resolves: MLIA-1024 Signed-off-by: Nathan Bailey Change-Id: I569878f2936767f70c0255919ca40d1969275529 --- src/mlia/backend/vela/compiler.py | 552 +++++++++++++++------- src/mlia/backend/vela/performance.py | 85 ++-- src/mlia/core/context.py | 11 +- src/mlia/core/reporting.py | 4 +- src/mlia/resources/vela/vela.ini | 51 ++- src/mlia/target/ethos_u/advisor.py | 4 +- src/mlia/target/ethos_u/config.py | 5 +- src/mlia/target/ethos_u/data_analysis.py | 6 +- src/mlia/target/ethos_u/performance.py | 48 +- src/mlia/target/ethos_u/reporters.py | 87 ++-- tests/conftest.py | 41 +- tests/test_backend_vela_compiler.py | 656 +++++++++++++++++++++++---- tests/test_backend_vela_performance.py | 27 +- tests/test_cli_commands.py | 2 +- tests/test_core_context.py | 7 +- tests/test_target_ethos_u_data_analysis.py | 12 +- tests/test_target_ethos_u_data_collection.py | 2 +- tests/test_target_ethos_u_performance.py | 16 +- tests/test_target_ethos_u_reporters.py | 40 +- 19 files changed, 1140 insertions(+), 516 deletions(-) diff --git a/src/mlia/backend/vela/compiler.py b/src/mlia/backend/vela/compiler.py index fe9e365..211721a 100644 --- a/src/mlia/backend/vela/compiler.py +++ b/src/mlia/backend/vela/compiler.py @@ -3,35 +3,155 @@ """Vela compiler wrapper module.""" from __future__ import annotations +import csv import logging +import re import sys from dataclasses import dataclass +from dataclasses import fields from io import StringIO from pathlib import Path -from typing import Any from typing import Literal -from ethosu.vela.architecture_features import ArchitectureFeatures -from ethosu.vela.compiler_driver import compiler_driver -from ethosu.vela.compiler_driver import CompilerOptions -from ethosu.vela.compiler_driver import TensorAllocator from ethosu.vela.model_reader import ModelReaderOptions from ethosu.vela.model_reader import read_model from ethosu.vela.nn_graph import Graph from ethosu.vela.nn_graph import NetworkType from ethosu.vela.operation import CustomType -from ethosu.vela.scheduler import OptimizationStrategy -from ethosu.vela.scheduler import SchedulerOptions -from ethosu.vela.tensor import BandwidthDirection -from ethosu.vela.tensor import MemArea -from ethosu.vela.tensor import Tensor -from ethosu.vela.tflite_writer import write_tflite +from ethosu.vela.vela import main +from mlia.utils.filesystem import get_vela_config from mlia.utils.logging import redirect_output +from mlia.utils.logging import redirect_raw_output logger = logging.getLogger(__name__) +@dataclass +class VelaInitMemoryData: + """Memory Data from vela.ini.""" + + clock_scale: float | None + burst_length: int | None + read_latency: int | None + write_latency: int | None + + +@dataclass +class VelaInitData: # pylint: disable=too-many-instance-attributes + """Data gathered from the vela.ini file we provide to vela.""" + + system_config: str + core_clock: float + axi0_port: str + axi1_port: str + sram_memory_data: VelaInitMemoryData + dram_memory_data: VelaInitMemoryData + off_chip_flash_memory_data: VelaInitMemoryData + on_chip_flash_memory_data: VelaInitMemoryData + memory_mode: str + const_mem_area: str + arena_mem_area: str + cache_mem_area: str + arena_cache_size: int | None + + +@dataclass +class VelaSummary: # pylint: disable=too-many-instance-attributes + """Data gathered from the summary CSV file that Vela produces.""" + + cycles_total: float + cycles_npu: float + cycles_sram_access: float + cycles_dram_access: float + cycles_on_chip_flash_access: float + cycles_off_chip_flash_access: float + core_clock: float + dram_memory_used: float + sram_memory_used: float + on_chip_flash_memory_used: float + off_chip_flash_memory_used: float + batch_size: int + memory_mode: str + system_config: str + accelerator_configuration: str + arena_cache_size: float + + def __repr__(self) -> str: + """Return String Representation of VelaSummary object.""" + header_values = dict(summary_metrics) + string_to_check = "" + for field in fields(self): + string_to_check += ( + f"{header_values[field.name]}: {getattr(self, field.name)}, " + ) + return string_to_check + + +complete_summary_metrics = [ + ("experiment", "experiment"), + ("network", "network"), + ("accelerator_configuration", "accelerator_configuration"), + ("system_config", "system_config"), + ("memory_mode", "memory_mode"), + ("core_clock", "core_clock"), + ("arena_cache_size", "arena_cache_size"), + ("sram_bandwidth", "sram_bandwidth"), + ("dram_bandwidth", "dram_bandwidth"), + ("on_chip_flash_bandwidth", "on_chip_flash_bandwidth"), + ("off_chip_flash_bandwidth", "off_chip_flash_bandwidth"), + ("weights_storage_area", "weights_storage_area"), + ("feature_map_storage_area", "feature_map_storage_area"), + ("inferences_per_second", "inferences_per_second"), + ("batch_size", "batch_size"), + ("inference_time", "inference_time"), + ("passes_before_fusing", "passes_before_fusing"), + ("sram_memory_used", "sram_memory_used"), + ("dram_memory_used", "dram_memory_used"), + ( + "on_chip_flash_memory_used", + "on_chip_flash_memory_used", + ), + ("off_chip_flash_memory_used", "off_chip_flash_memory_used"), + ("total_original_weights", "total_original_weights"), + ("total_npu_encoded_weights", "total_npu_encoded_weights"), + ("dram_total_bytes", "dram_total_bytes"), + ( + "on_chip_flash_feature_map_read_bytes", + "on_chip_flash_feature_map_read_bytes", + ), + ("on_chip_flash_feature_map_write_bytes", "on_chip_flash_feature_map_write_bytes"), + ("on_chip_flash_weight_read_bytes", "on_chip_flash_weight_read_bytes"), + ("on_chip_flash_weight_write_bytes", "on_chip_flash_weight_write_bytes"), + ("on_chip_flash_total_bytes", "on_chip_flash_total_bytes"), + ("off_chip_flash_feature_map_read_bytes", "off_chip_flash_feature_map_read_bytes"), + ( + "off_chip_flash_feature_map_write_bytes", + "off_chip_flash_feature_map_write_bytes", + ), + ("off_chip_flash_weight_read_bytes", "off_chip_flash_weight_read_bytes"), + ("off_chip_flash_weight_write_bytes", "off_chip_flash_weight_write_bytes"), + ("off_chip_flash_total_bytes", "off_chip_flash_total_bytes"), + ("nn_macs", "nn_macs"), + ("nn_tops", "nn_tops"), + ("cycles_npu", "cycles_npu"), + ("cycles_sram_access", "cycles_sram_access"), + ("cycles_dram_access", "cycles_dram_access"), + ("cycles_on_chip_flash_access", "cycles_on_chip_flash_access"), + ("cycles_off_chip_flash_access", "cycles_off_chip_flash_access"), + ("cycles_total", "cycles_total"), +] + +OUTPUT_METRICS = [field.name for field in fields(VelaSummary)] + +summary_metrics = [ + summary_metric + for summary_metric in complete_summary_metrics + if summary_metric[0] in OUTPUT_METRICS +] +summary_metrics.sort(key=lambda e: OUTPUT_METRICS.index(e[0])) + + @dataclass class Model: """Model metadata.""" @@ -49,20 +169,6 @@ class Model: ) -@dataclass -class OptimizedModel: - """Instance of the Vela optimized model.""" - - nng: Graph - arch: ArchitectureFeatures - compiler_options: CompilerOptions - scheduler_options: SchedulerOptions - - def save(self, output_filename: str | Path) -> None: - """Save instance of the optimized model to the file.""" - write_tflite(self.nng, output_filename) - - AcceleratorConfigType = Literal[ "ethos-u55-32", "ethos-u55-64", @@ -82,16 +188,17 @@ class VelaCompilerOptions: # pylint: disable=too-many-instance-attributes """Vela compiler options.""" config_files: str | list[str] | None = None - system_config: str = ArchitectureFeatures.DEFAULT_CONFIG - memory_mode: str = ArchitectureFeatures.DEFAULT_CONFIG + system_config: str = "internal-default" + memory_mode: str = "internal-default" accelerator_config: AcceleratorConfigType | None = None - max_block_dependency: int = ArchitectureFeatures.MAX_BLOCKDEP + max_block_dependency: int = 3 arena_cache_size: int | None = None tensor_allocator: TensorAllocatorType = "HillClimb" - cpu_tensor_alignment: int = Tensor.AllocationQuantum + cpu_tensor_alignment: int = 16 optimization_strategy: OptimizationStrategyType = "Performance" output_dir: Path = Path("output") recursion_limit: int = 1000 + verbose_performance: bool = True class VelaCompiler: # pylint: disable=too-many-instance-attributes @@ -105,13 +212,12 @@ class VelaCompiler: # pylint: disable=too-many-instance-attributes self.accelerator_config = compiler_options.accelerator_config self.max_block_dependency = compiler_options.max_block_dependency self.arena_cache_size = compiler_options.arena_cache_size - self.tensor_allocator = TensorAllocator[compiler_options.tensor_allocator] + self.tensor_allocator = compiler_options.tensor_allocator self.cpu_tensor_alignment = compiler_options.cpu_tensor_alignment - self.optimization_strategy = OptimizationStrategy[ - compiler_options.optimization_strategy - ] - self.output_dir = compiler_options.output_dir + self.optimization_strategy = compiler_options.optimization_strategy + self.output_dir = Path(compiler_options.output_dir) self.recursion_limit = compiler_options.recursion_limit + self.verbose_performance = compiler_options.verbose_performance sys.setrecursionlimit(self.recursion_limit) @@ -122,36 +228,48 @@ class VelaCompiler: # pylint: disable=too-many-instance-attributes nng, network_type = self._read_model(model) return Model(nng, network_type) - def compile_model(self, model: str | Path | Model) -> OptimizedModel: + def compile_model( + self, model_path: Path, already_compiled: bool = False + ) -> tuple[VelaSummary, Path]: """Compile the model.""" - if isinstance(model, (str, Path)): - nng, network_type = self._read_model(model) - else: - nng, network_type = model.nng, NetworkType.TFLite - - if not nng: - raise ValueError("Unable to read model: model.nng is not available") - - output_basename = f"{self.output_dir}/{nng.name}" - try: - arch = self._architecture_features() - compiler_options = self._compiler_options() - scheduler_options = self._scheduler_options() - - with redirect_output( + with redirect_raw_output( logger, stdout_level=logging.DEBUG, stderr_level=logging.DEBUG ): tmp = sys.stdout output_message = StringIO() sys.stdout = output_message - compiler_driver( - nng, - arch, - compiler_options, - scheduler_options, - network_type, - output_basename, + main_args = [ + "--output-dir", + str(self.output_dir.as_posix()), + "--tensor-allocator", + str(self.tensor_allocator), + "--cpu-tensor-alignment", + str(self.cpu_tensor_alignment), + "--accelerator-config", + str(self.accelerator_config), + "--system-config", + str(self.system_config), + "--memory-mode", + str(self.memory_mode), + "--max-block-dependency", + str(self.max_block_dependency), + "--optimise", + str(self.optimization_strategy), + model_path.as_posix(), + "--config", + str(self.config_files), + ] + if self.verbose_performance: + main_args.append("--verbose-performance") + if not already_compiled: + main(main_args) + optimized_model_path = Path( + self.output_dir.as_posix() + + "/" + + model_path.stem + + "_vela" + + model_path.suffix ) sys.stdout = tmp if ( @@ -159,51 +277,29 @@ class VelaCompiler: # pylint: disable=too-many-instance-attributes in output_message.getvalue() ): raise MemoryError("Model is too large and uses too much RAM") - - return OptimizedModel(nng, arch, compiler_options, scheduler_options) + summary_data = parse_summary_csv_file( + Path( + self.output_dir.as_posix() + + "/" + + model_path.stem + + "_summary_" + + self.system_config + + ".csv" + ) + ) + return summary_data, optimized_model_path except MemoryError as err: raise err except (SystemExit, Exception) as err: + if ( + "Error: Invalid tflite file." in output_message.getvalue() + and isinstance(err, SystemExit) + ): + raise RuntimeError(f"Unable to read model {model_path}") from err raise RuntimeError( "Model could not be optimized with Vela compiler." ) from err - def get_config(self) -> dict[str, Any]: - """Get compiler configuration.""" - arch = self._architecture_features() - - memory_area = { - mem.name: { - "clock_scales": arch.memory_clock_scales[mem], - "burst_length": arch.memory_burst_length[mem], - "read_latency": arch.memory_latency[mem][BandwidthDirection.Read], - "write_latency": arch.memory_latency[mem][BandwidthDirection.Write], - } - for mem in ( - MemArea.Sram, - MemArea.Dram, - MemArea.OnChipFlash, - MemArea.OffChipFlash, - ) - } - - return { - "accelerator_config": arch.accelerator_config.value, - "system_config": arch.system_config, - "core_clock": arch.core_clock, - "axi0_port": arch.axi0_port.name, - "axi1_port": arch.axi1_port.name, - "memory_mode": arch.memory_mode, - "const_mem_area": arch.const_mem_area.name, - "arena_mem_area": arch.arena_mem_area.name, - "cache_mem_area": arch.cache_mem_area.name, - "arena_cache_size": arch.arena_cache_size, - "permanent_storage_mem_area": arch.permanent_storage_mem_area.name, - "feature_map_storage_mem_area": arch.feature_map_storage_mem_area.name, - "fast_storage_mem_area": arch.fast_storage_mem_area.name, - "memory_area": memory_area, - } - @staticmethod def _read_model(model: str | Path) -> tuple[Graph, NetworkType]: """Read TensorFlow Lite model.""" @@ -216,57 +312,10 @@ class VelaCompiler: # pylint: disable=too-many-instance-attributes except (SystemExit, Exception) as err: raise RuntimeError(f"Unable to read model {model_path}.") from err - def _architecture_features(self) -> ArchitectureFeatures: - """Return ArchitectureFeatures instance.""" - return ArchitectureFeatures( - vela_config_files=self.config_files, - accelerator_config=self.accelerator_config, - system_config=self.system_config, - memory_mode=self.memory_mode, - max_blockdep=self.max_block_dependency, - verbose_config=False, - arena_cache_size=self.arena_cache_size, - ) - - def _scheduler_options(self) -> SchedulerOptions: - """Return SchedulerOptions instance.""" - arch = self._architecture_features() - - return SchedulerOptions( - optimization_strategy=self.optimization_strategy, - sram_target=arch.arena_cache_size, - verbose_schedule=False, - ) - - def _compiler_options(self) -> CompilerOptions: - """Return CompilerOptions instance.""" - return CompilerOptions( - verbose_graph=False, - verbose_quantization=False, - verbose_packing=False, - verbose_tensor_purpose=False, - verbose_tensor_format=False, - verbose_allocation=False, - verbose_high_level_command_stream=False, - verbose_register_command_stream=False, - verbose_operators=False, - verbose_weights=False, - verbose_performance=True, - show_cpu_operations=False, - tensor_allocator=self.tensor_allocator, - timing=False, - output_dir=self.output_dir, - cpu_tensor_alignment=self.cpu_tensor_alignment, - ) - - def return_compiler_options(self) -> CompilerOptions: - """Return CompilerOptions instance for test purposes.""" - return self._compiler_options() - def resolve_compiler_config( vela_compiler_options: VelaCompilerOptions, -) -> dict[str, Any]: +) -> VelaInitData: """Resolve passed compiler options. Vela has number of configuration parameters that being @@ -278,22 +327,209 @@ def resolve_compiler_config( In order to get this information we need to create instance of the Vela compiler first. """ - vela_compiler = VelaCompiler(vela_compiler_options) - return vela_compiler.get_config() - - -def optimize_model( - model_path: Path, compiler_options: VelaCompilerOptions, output_model_path: Path -) -> None: - """Optimize model and return it's path after optimization.""" - logger.debug( - "Optimize model %s for target %s", - model_path, - compiler_options.accelerator_config, + return parse_vela_initialisation_file( + get_vela_config(), + vela_compiler_options.system_config, + vela_compiler_options.memory_mode, ) + +def compile_model(model_path: Path, compiler_options: VelaCompilerOptions) -> Path: + """Compile model.""" vela_compiler = VelaCompiler(compiler_options) - optimized_model = vela_compiler.compile_model(model_path) + # output dir could be a path or str, cast to Path object + output_dir = Path(compiler_options.output_dir) + if Path( + output_dir.as_posix() + + "/" + + model_path.stem + + "_summary_" + + compiler_options.system_config + + ".csv" + ).is_file(): + _, optimized_model_path = vela_compiler.compile_model(model_path, True) + else: + _, optimized_model_path = vela_compiler.compile_model(model_path) + return optimized_model_path + + +def parse_summary_csv_file(vela_summary_csv_file: Path) -> VelaSummary: + """Parse the summary csv file from Vela.""" + if not vela_summary_csv_file.is_file(): + raise FileNotFoundError(f"CSV File not found at {vela_summary_csv_file}") + + with open(vela_summary_csv_file, encoding="UTF-8") as csv_file: + summary_reader = csv.DictReader(csv_file, delimiter=",") + try: + row = next(summary_reader) + except StopIteration as err: + raise RuntimeError("Generated Vela Summary CSV is empty") from err + try: + # pylint: disable=eval-used + key_types = { + field.name: eval(field.type) # type: ignore # nosec + for field in fields(VelaSummary) + } + # pylint: enable=eval-used + summary_data = VelaSummary( + **{key: key_types[key](row[title]) for key, title in summary_metrics} + ) + except KeyError as err: + raise KeyError( + f"Generated Vela Summary CSV missing expected header: {err.args[0]}." + ) from err + return summary_data + + +def parse_vela_initialisation_file( # pylint: disable=too-many-locals + vela_init_file: Path, system_config: str, memory_mode: str +) -> VelaInitData: + """Parse the vela.ini to retrieve data for the target information table.""" + if not vela_init_file.is_file(): + raise FileNotFoundError( + f"Vela Initialisation File not found at {vela_init_file}" + ) + + lines = [] + with open(vela_init_file, encoding="UTF-8") as init_file: + lines = init_file.readlines() + + if len(lines) == 0: + raise OSError("vela.ini File Is Empty") + + lines = [line.strip("\n][ ") for line in lines] + + idxs_memory_mode = [ + idx for idx, item in enumerate(lines) if re.search("^Memory_Mode.*", item) + ] + + if len(idxs_memory_mode) == 0: + raise IndexError("No memory modes are present in vela.ini file.") + + idxs_system_config = [ + idx for idx, item in enumerate(lines) if re.search("^System_Config.*", item) + ] + [idxs_memory_mode[0]] + + if len(idxs_system_config) <= 1: + raise IndexError("No system configs are present in vela.ini file.") + + try: + idx_config = lines.index("System_Config." + system_config) + except ValueError as err: + raise ValueError( + f"System Config: {system_config} not present in vela.ini file." + ) from err + + lines_to_probe = lines[ + idx_config : idxs_system_config[ # noqa: E203 + idxs_system_config.index(idx_config) + 1 + ] + ] + + def collect_memory_mode_lines(memory_mode: str) -> list[str]: + try: + idx_memory_mode = lines.index("Memory_Mode." + memory_mode) + except ValueError as err: + raise ValueError( + f"Memory Mode: {memory_mode} not present in vela.ini file." + ) from err + if idxs_memory_mode.index(idx_memory_mode) == len(idxs_memory_mode) - 1: + lines_to_probe = lines[idx_memory_mode:] + else: + lines_to_probe = lines[ + idx_memory_mode : idxs_memory_mode[ # noqa: E203 + idxs_memory_mode.index(idx_memory_mode) + 1 + ] + ] + return lines_to_probe + + lines_to_probe_memory_mode = collect_memory_mode_lines(memory_mode) + extra_memory_mode_lines = [] + for line in lines_to_probe_memory_mode: + if "inherit=Memory_Mode." in line: + extra_memory_mode = line[line.rindex(".") + 1 :] # noqa: E203 + extra_memory_mode_lines = collect_memory_mode_lines(extra_memory_mode) + + lines_to_probe += extra_memory_mode_lines + lines_to_probe_memory_mode + + init_dict = {} + for line in lines_to_probe: + if "=" in line: + init_dict[line[: line.index("=")]] = line[ + line.index("=") + 1 : # noqa: E203 + ] + try: + init_data = VelaInitData( + system_config=system_config, + core_clock=float(init_dict["core_clock"]), + axi0_port=str(init_dict["axi0_port"]), + axi1_port=str(init_dict["axi1_port"]), + memory_mode=memory_mode, + sram_memory_data=VelaInitMemoryData( + clock_scale=float(init_dict["Sram_clock_scale"]) + if "Sram_clock_scale" in init_dict + else None, + burst_length=int(init_dict["Sram_burst_length"]) + if "Sram_burst_length" in init_dict + else None, + read_latency=int(init_dict["Sram_read_latency"]) + if "Sram_read_latency" in init_dict + else None, + write_latency=int(init_dict["Sram_write_latency"]) + if "Sram_write_latency" in init_dict + else None, + ), + dram_memory_data=VelaInitMemoryData( + clock_scale=float(init_dict["Dram_clock_scale"]) + if "Dram_clock_scale" in init_dict + else None, + burst_length=int(init_dict["Dram_burst_length"]) + if "Dram_burst_length" in init_dict + else None, + read_latency=int(init_dict["Dram_read_latency"]) + if "Dram_read_latency" in init_dict + else None, + write_latency=int(init_dict["Dram_write_latency"]) + if "Dram_write_latency" in init_dict + else None, + ), + off_chip_flash_memory_data=VelaInitMemoryData( + clock_scale=float(init_dict["OffChipFlash_clock_scale"]) + if "OffChipFlash_clock_scale" in init_dict + else None, + burst_length=int(init_dict["OffChipFlash_burst_length"]) + if "OffChipFlash_burst_length" in init_dict + else None, + read_latency=int(init_dict["OffChipFlash_read_latency"]) + if "OffChipFlash_read_latency" in init_dict + else None, + write_latency=int(init_dict["OffChipFlash_write_latency"]) + if "OffChipFlash_write_latency" in init_dict + else None, + ), + on_chip_flash_memory_data=VelaInitMemoryData( + clock_scale=float(init_dict["OnChipFlash_clock_scale"]) + if "OnChipFlash_clock_scale" in init_dict + else None, + burst_length=int(init_dict["OnChipFlash_burst_length"]) + if "OnChipFlash_burst_length" in init_dict + else None, + read_latency=int(init_dict["OnChipFlash_read_latency"]) + if "OnChipFlash_read_latency" in init_dict + else None, + write_latency=int(init_dict["OnChipFlash_write_latency"]) + if "OnChipFlash_write_latency" in init_dict + else None, + ), + const_mem_area=str(init_dict["const_mem_area"]), + arena_mem_area=str(init_dict["arena_mem_area"]), + cache_mem_area=str(init_dict["cache_mem_area"]), + arena_cache_size=int(init_dict["arena_cache_size"]) + if "arena_cache_size" in init_dict + else None, + ) + + except KeyError as err: + raise KeyError(f"Vela.ini file missing expected header: {err.args[0]}") from err - logger.debug("Save optimized model into %s", output_model_path) - optimized_model.save(output_model_path) + return init_data diff --git a/src/mlia/backend/vela/performance.py b/src/mlia/backend/vela/performance.py index 72a8ceb..2cf945d 100644 --- a/src/mlia/backend/vela/performance.py +++ b/src/mlia/backend/vela/performance.py @@ -10,15 +10,12 @@ from collections import Counter from dataclasses import dataclass from dataclasses import fields from pathlib import Path -from pydoc import locate import numpy as np -from ethosu.vela.npu_performance import PassCycles -from ethosu.vela.tensor import MemArea -from mlia.backend.vela.compiler import OptimizedModel from mlia.backend.vela.compiler import VelaCompiler from mlia.backend.vela.compiler import VelaCompilerOptions +from mlia.backend.vela.compiler import VelaSummary logger = logging.getLogger(__name__) @@ -37,11 +34,10 @@ class PerformanceMetrics: # pylint: disable=too-many-instance-attributes batch_inference_time: float inferences_per_second: float batch_size: int - unknown_memory_area_size: int - sram_memory_area_size: int - dram_memory_area_size: int - on_chip_flash_memory_area_size: int - off_chip_flash_memory_area_size: int + sram_memory_area_size: float + dram_memory_area_size: float + on_chip_flash_memory_area_size: float + off_chip_flash_memory_area_size: float layerwise_performance_info: LayerwisePerfInfo @@ -145,19 +141,19 @@ def parse_layerwise_perf_csv( # pylint: disable=too-many-locals if row == headers_to_check_cpu_ops: continue try: + # pylint: disable=eval-used key_types = { - field.name: locate(str(field.type)) + field.name: eval(field.type) # type: ignore # nosec for field in fields(LayerPerfInfo) } + # pylint: enable=eval-used ids_to_metrics = {} for key, title, _ in metrics: try: - ids_to_metrics[key] = key_types[key]( # type: ignore - row_as_dict[title] - ) + ids_to_metrics[key] = key_types[key](row_as_dict[title]) except ValueError as err: if "invalid literal for int() with base 10" in str(err): - ids_to_metrics[key] = key_types[key]( # type: ignore + ids_to_metrics[key] = key_types[key]( float(row_as_dict[title]) ) else: @@ -180,17 +176,20 @@ def estimate_performance( model_path, compiler_options.accelerator_config, ) - vela_compiler = VelaCompiler(compiler_options) - - initial_model = vela_compiler.read_model(model_path) - if initial_model.optimized: - raise ValueError( - "Unable to estimate performance for the given optimized model." - ) - - optimized_model = vela_compiler.compile_model(initial_model) - output_dir = optimized_model.compiler_options.output_dir + if Path( + Path(compiler_options.output_dir).as_posix() + + "/" + + model_path.stem + + "_summary_" + + compiler_options.system_config + + ".csv" + ).is_file(): + summary_data, _ = vela_compiler.compile_model(model_path, True) + else: + summary_data, _ = vela_compiler.compile_model(model_path) + + output_dir = compiler_options.output_dir csv_paths = [entry for entry in os.listdir(output_dir) if "per-layer.csv" in entry] model_name = str(model_path.stem) csv_file_found = None @@ -204,41 +203,31 @@ def estimate_performance( vela_csv_file=csv_path, metrics=layer_metrics ) - return _performance_metrics(layerwise_performance_info, optimized_model) + return _performance_metrics(layerwise_performance_info, summary_data) def _performance_metrics( - layerwise_performance_info: LayerwisePerfInfo, optimized_model: OptimizedModel + layerwise_performance_info: LayerwisePerfInfo, summary_data: VelaSummary ) -> PerformanceMetrics: """Return performance metrics for optimized model.""" - cycles = optimized_model.nng.cycles - - def memory_usage(mem_area: MemArea) -> int: - """Get memory usage for the proviced memory area type.""" - memory_used: dict[MemArea, int] = optimized_model.nng.memory_used - bandwidths = optimized_model.nng.bandwidths - - return memory_used.get(mem_area, 0) if np.sum(bandwidths[mem_area]) > 0 else 0 - midpoint_fps = np.nan - midpoint_inference_time = cycles[PassCycles.Total] / optimized_model.arch.core_clock + midpoint_inference_time = summary_data.cycles_total / summary_data.core_clock if midpoint_inference_time > 0: midpoint_fps = 1 / midpoint_inference_time return PerformanceMetrics( - npu_cycles=int(cycles[PassCycles.Npu]), - sram_access_cycles=int(cycles[PassCycles.SramAccess]), - dram_access_cycles=int(cycles[PassCycles.DramAccess]), - on_chip_flash_access_cycles=int(cycles[PassCycles.OnChipFlashAccess]), - off_chip_flash_access_cycles=int(cycles[PassCycles.OffChipFlashAccess]), - total_cycles=int(cycles[PassCycles.Total]), + npu_cycles=int(summary_data.cycles_npu), + sram_access_cycles=int(summary_data.cycles_sram_access), + dram_access_cycles=int(summary_data.cycles_dram_access), + on_chip_flash_access_cycles=int(summary_data.cycles_on_chip_flash_access), + off_chip_flash_access_cycles=int(summary_data.cycles_off_chip_flash_access), + total_cycles=int(summary_data.cycles_total), batch_inference_time=midpoint_inference_time * 1000, inferences_per_second=midpoint_fps, - batch_size=optimized_model.nng.batch_size, - unknown_memory_area_size=memory_usage(MemArea.Unknown), - sram_memory_area_size=memory_usage(MemArea.Sram), - dram_memory_area_size=memory_usage(MemArea.Dram), - on_chip_flash_memory_area_size=memory_usage(MemArea.OnChipFlash), - off_chip_flash_memory_area_size=memory_usage(MemArea.OffChipFlash), + batch_size=summary_data.batch_size, + sram_memory_area_size=float(summary_data.sram_memory_used), + dram_memory_area_size=float(summary_data.dram_memory_used), + on_chip_flash_memory_area_size=float(summary_data.on_chip_flash_memory_used), + off_chip_flash_memory_area_size=float(summary_data.off_chip_flash_memory_used), layerwise_performance_info=layerwise_performance_info, ) diff --git a/src/mlia/core/context.py b/src/mlia/core/context.py index 6e699be..345b668 100644 --- a/src/mlia/core/context.py +++ b/src/mlia/core/context.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright 2022-2023, Arm Limited and/or its affiliates. +# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates. # SPDX-License-Identifier: Apache-2.0 """Context module. @@ -112,7 +112,6 @@ class ExecutionContext(Context): event_publisher: EventPublisher | None = None, verbose: bool = False, logs_dir: str = "logs", - models_dir: str = "models", action_resolver: ActionResolver | None = None, output_format: OutputFormat = "plain_text", ) -> None: @@ -129,8 +128,6 @@ class ExecutionContext(Context): :param verbose: enable verbose output :param logs_dir: name of the directory inside output directory where log files will be stored - :param models_dir: name of the directory inside output directory where - temporary models will be stored :param action_resolver: instance of the action resolver that could make advice actionable :param output_format: format for the application output @@ -144,7 +141,6 @@ class ExecutionContext(Context): self._event_publisher = event_publisher or DefaultEventPublisher() self.verbose = verbose self.logs_dir = logs_dir - self.models_dir = models_dir self._action_resolver = action_resolver or APIActionResolver() self._output_format = output_format @@ -195,10 +191,7 @@ class ExecutionContext(Context): def get_model_path(self, model_filename: str) -> Path: """Return path for the model.""" - models_dir_path = self._output_dir_path / self.models_dir - models_dir_path.mkdir(exist_ok=True) - - return models_dir_path / model_filename + return self._output_dir_path / model_filename @property def logs_path(self) -> Path: diff --git a/src/mlia/core/reporting.py b/src/mlia/core/reporting.py index 722adfd..f8ef644 100644 --- a/src/mlia/core/reporting.py +++ b/src/mlia/core/reporting.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright 2022-2023, Arm Limited and/or its affiliates. +# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates. # SPDX-License-Identifier: Apache-2.0 """Reporting module.""" from __future__ import annotations @@ -49,7 +49,7 @@ class ReportItem: self, name: str, alias: str | None = None, - value: str | int | Cell | None = None, + value: str | int | float | Cell | None = None, nested_items: list[ReportItem] | None = None, ) -> None: """Init the report item.""" diff --git a/src/mlia/resources/vela/vela.ini b/src/mlia/resources/vela/vela.ini index 29a5179..747dc3d 100644 --- a/src/mlia/resources/vela/vela.ini +++ b/src/mlia/resources/vela/vela.ini @@ -1,4 +1,4 @@ -; SPDX-FileCopyrightText: Copyright 2020, 2022, Arm Limited and/or its affiliates. +; SPDX-FileCopyrightText: Copyright 2020, 2022, 2024, Arm Limited and/or its affiliates. ; SPDX-License-Identifier: Apache-2.0 ; ----------------------------------------------------------------------------- @@ -6,6 +6,19 @@ ; ----------------------------------------------------------------------------- ; System Configuration +; Ethos-U55 Deep Embedded: SRAM (1.6 GB/s) and Flash (0.1 GB/s) +[System_Config.Ethos_U55_Deep_Embedded] +core_clock=200e6 +axi0_port=Sram +axi1_port=OffChipFlash +Sram_clock_scale=1.0 +Sram_burst_length=32 +Sram_read_latency=32 +Sram_write_latency=32 +OffChipFlash_clock_scale=0.0625 +OffChipFlash_burst_length=128 +OffChipFlash_read_latency=64 +OffChipFlash_write_latency=64 ; Ethos-U55 High-End Embedded: SRAM (4 GB/s) and Flash (0.5 GB/s) [System_Config.Ethos_U55_High_End_Embedded] @@ -35,6 +48,20 @@ OffChipFlash_burst_length=128 OffChipFlash_read_latency=64 OffChipFlash_write_latency=64 +; Ethos-U65 Mid-End: SRAM (8 GB/s) and DRAM (3.75 GB/s) +[System_Config.Ethos_U65_Mid_End] +core_clock=500e6 +axi0_port=Sram +axi1_port=Dram +Sram_clock_scale=1.0 +Sram_burst_length=32 +Sram_read_latency=32 +Sram_write_latency=32 +Dram_clock_scale=0.46875 +Dram_burst_length=128 +Dram_read_latency=500 +Dram_write_latency=250 + ; Ethos-U65 High-End: SRAM (16 GB/s) and DRAM (3.75 GB/s) [System_Config.Ethos_U65_High_End] core_clock=1e9 @@ -49,6 +76,20 @@ Dram_burst_length=128 Dram_read_latency=500 Dram_write_latency=250 +; Ethos-U65 Client-Server: SRAM (16 GB/s) and DRAM (12 GB/s) +[System_Config.Ethos_U65_Client_Server] +core_clock=1e9 +axi0_port=Sram +axi1_port=Dram +Sram_clock_scale=1.0 +Sram_burst_length=32 +Sram_read_latency=32 +Sram_write_latency=32 +Dram_clock_scale=0.75 +Dram_burst_length=128 +Dram_read_latency=500 +Dram_write_latency=250 + ; ----------------------------------------------------------------------------- ; Memory Mode @@ -58,7 +99,6 @@ Dram_write_latency=250 const_mem_area=Axi0 arena_mem_area=Axi0 cache_mem_area=Axi0 -arena_cache_size=2096768 ; Shared SRAM: the SRAM is shared between the Ethos-U and the Cortex-M software ; The non-SRAM memory is assumed to be read-only @@ -66,7 +106,6 @@ arena_cache_size=2096768 const_mem_area=Axi1 arena_mem_area=Axi0 cache_mem_area=Axi0 -arena_cache_size=2096768 ; Dedicated SRAM: the SRAM (384KB) is only for use by the Ethos-U ; The non-SRAM memory is assumed to be read-writeable @@ -75,3 +114,9 @@ const_mem_area=Axi1 arena_mem_area=Axi1 cache_mem_area=Axi0 arena_cache_size=393216 + +; Dedicated SRAM 512KB: the SRAM (512KB) is only for use by the Ethos-U +; The non-SRAM memory is assumed to be read-writeable +[Memory_Mode.Dedicated_Sram_512KB] +inherit=Memory_Mode.Dedicated_Sram +arena_cache_size=524288 diff --git a/src/mlia/target/ethos_u/advisor.py b/src/mlia/target/ethos_u/advisor.py index b5932d0..edcfcfc 100644 --- a/src/mlia/target/ethos_u/advisor.py +++ b/src/mlia/target/ethos_u/advisor.py @@ -109,7 +109,9 @@ class EthosUInferenceAdvisor(DefaultInferenceAdvisor): def _get_target_config(self, context: Context) -> EthosUConfiguration: """Get target configuration.""" target_profile = self.get_target_profile(context) - return cast(EthosUConfiguration, profile(target_profile)) + target_config = cast(EthosUConfiguration, profile(target_profile)) + target_config.compiler_options.output_dir = context.output_dir # type: ignore + return target_config def _get_optimization_settings(self, context: Context) -> list[list[dict]]: """Get optimization settings.""" diff --git a/src/mlia/target/ethos_u/config.py b/src/mlia/target/ethos_u/config.py index 73baa61..b3416d3 100644 --- a/src/mlia/target/ethos_u/config.py +++ b/src/mlia/target/ethos_u/config.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright 2022-2023, Arm Limited and/or its affiliates. +# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates. # SPDX-License-Identifier: Apache-2.0 """Ethos-U configuration.""" from __future__ import annotations @@ -10,6 +10,7 @@ from mlia.backend.corstone import is_corstone_backend from mlia.backend.manager import get_available_backends from mlia.backend.vela.compiler import resolve_compiler_config from mlia.backend.vela.compiler import VelaCompilerOptions +from mlia.backend.vela.compiler import VelaInitData from mlia.target.config import TargetProfile from mlia.utils.filesystem import get_vela_config @@ -53,7 +54,7 @@ class EthosUConfiguration(TargetProfile): ) @property - def resolved_compiler_config(self) -> dict[str, Any]: + def resolved_compiler_config(self) -> VelaInitData: """Resolve compiler configuration.""" return resolve_compiler_config(self.compiler_options) diff --git a/src/mlia/target/ethos_u/data_analysis.py b/src/mlia/target/ethos_u/data_analysis.py index 5c6080f..d42d82a 100644 --- a/src/mlia/target/ethos_u/data_analysis.py +++ b/src/mlia/target/ethos_u/data_analysis.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright 2022-2023, Arm Limited and/or its affiliates. +# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates. # SPDX-License-Identifier: Apache-2.0 """Ethos-U data analysis module.""" from __future__ import annotations @@ -110,13 +110,13 @@ class EthosUDataAnalyzer(FactExtractor): if not optimizations: return - orig = optimization_results.original_perf_metrics.in_kilobytes() + orig = optimization_results.original_perf_metrics orig_memory = orig.memory_usage orig_cycles = orig.npu_cycles diffs: list[OptimizationDiff] = [] for opt_type, opt_perf_metrics in optimizations: - opt = opt_perf_metrics.in_kilobytes() + opt = opt_perf_metrics opt_memory = opt.memory_usage opt_cycles = opt.npu_cycles diff --git a/src/mlia/target/ethos_u/performance.py b/src/mlia/target/ethos_u/performance.py index 8decb75..1e2a504 100644 --- a/src/mlia/target/ethos_u/performance.py +++ b/src/mlia/target/ethos_u/performance.py @@ -54,7 +54,6 @@ class MemoryUsage: sram_memory_area_size: int | float dram_memory_area_size: int | float - unknown_memory_area_size: int | float on_chip_flash_memory_area_size: int | float off_chip_flash_memory_area_size: int | float memory_size_type: MemorySizeType = MemorySizeType.BYTES @@ -67,27 +66,6 @@ class MemoryUsage: "Off chip flash used", ] - def in_kilobytes(self) -> MemoryUsage: - """Return memory usage with values in kilobytes.""" - if self.memory_size_type == MemorySizeType.KILOBYTES: - return self - - kilobytes = [ - value / BYTES_PER_KILOBYTE - for value in [ - self.sram_memory_area_size, - self.dram_memory_area_size, - self.unknown_memory_area_size, - self.on_chip_flash_memory_area_size, - self.off_chip_flash_memory_area_size, - ] - ] - - return MemoryUsage( - *kilobytes, # type: ignore - memory_size_type=MemorySizeType.KILOBYTES, - ) - @dataclass class PerformanceMetrics: @@ -98,23 +76,6 @@ class PerformanceMetrics: memory_usage: MemoryUsage | None layerwise_perf_info: LayerwisePerfInfo | None - def in_kilobytes(self) -> PerformanceMetrics: - """Return metrics with memory usage in KiB.""" - if self.memory_usage is None: - return PerformanceMetrics( - self.target_config, - self.npu_cycles, - self.memory_usage, - self.layerwise_perf_info, - ) - - return PerformanceMetrics( - self.target_config, - self.npu_cycles, - self.memory_usage.in_kilobytes(), - self.layerwise_perf_info, - ) - @dataclass class OptimizationPerformanceMetrics: @@ -157,7 +118,6 @@ class VelaPerformanceEstimator( MemoryUsage( vela_perf_metrics.sram_memory_area_size, vela_perf_metrics.dram_memory_area_size, - vela_perf_metrics.unknown_memory_area_size, vela_perf_metrics.on_chip_flash_memory_area_size, vela_perf_metrics.off_chip_flash_memory_area_size, ), @@ -192,12 +152,8 @@ class CorstonePerformanceEstimator( else model ) - optimized_model_path = self.context.get_model_path( - f"{model_path.stem}_vela.tflite" - ) - - vela_comp.optimize_model( - model_path, self.target_config.compiler_options, optimized_model_path + optimized_model_path = vela_comp.compile_model( + model_path, self.target_config.compiler_options ) corstone_perf_metrics = estimate_performance( diff --git a/src/mlia/target/ethos_u/reporters.py b/src/mlia/target/ethos_u/reporters.py index b747ce5..384d623 100644 --- a/src/mlia/target/ethos_u/reporters.py +++ b/src/mlia/target/ethos_u/reporters.py @@ -4,6 +4,7 @@ from __future__ import annotations from collections import defaultdict +from dataclasses import asdict from dataclasses import fields from typing import Any from typing import Callable @@ -119,29 +120,50 @@ def report_target_details(target_config: EthosUConfiguration) -> Report: """Return table representation for the target.""" compiler_config = target_config.resolved_compiler_config + memory_dict = dict( + zip( + ["Sram", "Dram", "OnChipFlash", "OffChipFlash"], + [ + compiler_config.sram_memory_data, + compiler_config.dram_memory_data, + compiler_config.on_chip_flash_memory_data, + compiler_config.off_chip_flash_memory_data, + ], + ) + ) + + memory_dict = { + key: val + for key, val in memory_dict.items() + if not list(asdict(val).values()).count(None) == len(list(asdict(val).values())) + } + memory_settings = [ ReportItem( "Const mem area", "const_mem_area", - compiler_config["const_mem_area"], + compiler_config.const_mem_area, ), ReportItem( "Arena mem area", "arena_mem_area", - compiler_config["arena_mem_area"], + compiler_config.arena_mem_area, ), ReportItem( "Cache mem area", "cache_mem_area", - compiler_config["cache_mem_area"], - ), - ReportItem( - "Arena cache size", - "arena_cache_size", - BytesCell(compiler_config["arena_cache_size"]), + compiler_config.cache_mem_area, ), ] + if compiler_config.arena_cache_size is not None: + memory_settings.append( + ReportItem( + "Arena cache size", + "arena_cache_size", + BytesCell(compiler_config.arena_cache_size), + ) + ) mem_areas_settings = [ ReportItem( f"{mem_area_name}", @@ -151,67 +173,48 @@ def report_target_details(target_config: EthosUConfiguration) -> Report: ReportItem( "Clock scales", "clock_scales", - mem_area_settings["clock_scales"], + mem_area_settings.clock_scale, ), ReportItem( "Burst length", "burst_length", - BytesCell(mem_area_settings["burst_length"]), + BytesCell(mem_area_settings.burst_length), ), ReportItem( "Read latency", "read_latency", - CyclesCell(mem_area_settings["read_latency"]), + CyclesCell(mem_area_settings.read_latency), ), ReportItem( "Write latency", "write_latency", - CyclesCell(mem_area_settings["write_latency"]), + CyclesCell(mem_area_settings.write_latency), ), ], ) - for mem_area_name, mem_area_settings in compiler_config["memory_area"].items() + for mem_area_name, mem_area_settings in memory_dict.items() ] system_settings = [ ReportItem( "Accelerator clock", "accelerator_clock", - ClockCell(compiler_config["core_clock"]), + ClockCell(compiler_config.core_clock), ), ReportItem( "AXI0 port", "axi0_port", - compiler_config["axi0_port"], + compiler_config.axi0_port, ), ReportItem( "AXI1 port", "axi1_port", - compiler_config["axi1_port"], + compiler_config.axi1_port, ), ReportItem( "Memory area settings", "memory_area", None, nested_items=mem_areas_settings ), ] - - arch_settings = [ - ReportItem( - "Permanent storage mem area", - "permanent_storage_mem_area", - compiler_config["permanent_storage_mem_area"], - ), - ReportItem( - "Feature map storage mem area", - "feature_map_storage_mem_area", - compiler_config["feature_map_storage_mem_area"], - ), - ReportItem( - "Fast storage mem area", - "fast_storage_mem_area", - compiler_config["fast_storage_mem_area"], - ), - ] - return NestedReport( "Target information", "target", @@ -221,21 +224,15 @@ def report_target_details(target_config: EthosUConfiguration) -> Report: ReportItem( "Memory mode", alias="memory_mode", - value=compiler_config["memory_mode"], + value=compiler_config.memory_mode, nested_items=memory_settings, ), ReportItem( "System config", alias="system_config", - value=compiler_config["system_config"], + value=compiler_config.system_config, nested_items=system_settings, ), - ReportItem( - "Architecture settings", - "arch_settings", - None, - nested_items=arch_settings, - ), ], ) @@ -244,7 +241,6 @@ def metrics_as_records( perf_metrics: list[PerformanceMetrics], ) -> tuple[list[tuple], list[tuple]]: """Convert perf metrics object into list of records.""" - perf_metrics = [item.in_kilobytes() for item in perf_metrics] def _layerwise_as_metrics( perf_metrics: list[PerformanceMetrics], @@ -314,9 +310,6 @@ def metrics_as_records( return [] metric_map["SRAM used"].append(metrics.memory_usage.sram_memory_area_size) metric_map["DRAM used"].append(metrics.memory_usage.dram_memory_area_size) - metric_map["Unknown memory area used"].append( - metrics.memory_usage.unknown_memory_area_size - ) metric_map["On-chip flash used"].append( metrics.memory_usage.on_chip_flash_memory_area_size ) diff --git a/tests/conftest.py b/tests/conftest.py index 9dc1d16..1092979 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -11,7 +11,7 @@ import numpy as np import pytest import tensorflow as tf -from mlia.backend.vela.compiler import optimize_model +from mlia.backend.vela.compiler import compile_model from mlia.core.context import ExecutionContext from mlia.nn.rewrite.core.utils.numpy_tfrecord import NumpyTFWriter from mlia.nn.tensorflow.tflite_convert import convert_to_tflite @@ -51,7 +51,7 @@ def invalid_input_model_file(test_tflite_invalid_model: Path) -> Path: @pytest.fixture(scope="session", name="empty_test_csv_file") -def fixture_empty_test_csv_file( # pylint: disable=too-many-locals +def fixture_empty_test_csv_file( test_csv_path: Path, ) -> Path: """Return empty test csv file path.""" @@ -59,7 +59,7 @@ def fixture_empty_test_csv_file( # pylint: disable=too-many-locals @pytest.fixture(scope="session", name="test_csv_file") -def fixture_test_csv_file( # pylint: disable=too-many-locals +def fixture_test_csv_file( test_csv_path: Path, ) -> Path: """Return test csv file path.""" @@ -67,7 +67,7 @@ def fixture_test_csv_file( # pylint: disable=too-many-locals @pytest.fixture(scope="session", name="test_csv_path") -def fixture_test_csv_path( # pylint: disable=too-many-locals +def fixture_test_csv_path( tmp_path_factory: pytest.TempPathFactory, ) -> Generator[Path, None, None]: """Return test csv file path.""" @@ -76,6 +76,32 @@ def fixture_test_csv_path( # pylint: disable=too-many-locals shutil.rmtree(tmp_path) +@pytest.fixture(scope="session", name="test_vela_path") +def fixture_test_vela_path( + tmp_path_factory: pytest.TempPathFactory, +) -> Generator[Path, None, None]: + """Return test vela file path.""" + tmp_path = tmp_path_factory.mktemp("vela_file") + yield tmp_path + shutil.rmtree(tmp_path) + + +@pytest.fixture(scope="session", name="empty_vela_ini_file") +def fixture_empty_vela_ini_file( + test_vela_path: Path, +) -> Path: + """Return empty test vela file path.""" + return test_vela_path / "empty_vela.ini" + + +@pytest.fixture(scope="session", name="vela_ini_file") +def fixture_vela_ini_file( + test_vela_path: Path, +) -> Path: + """Return empty test vela file path.""" + return test_vela_path / "vela.ini" + + def get_test_keras_model() -> tf.keras.Model: """Return test Keras model.""" model = tf.keras.Sequential( @@ -130,13 +156,8 @@ def fixture_test_models_path( convert_to_tflite(keras_model, quantized=True, output_path=tflite_model_path) # Vela-optimized TensorFlow Lite model (int8) - tflite_vela_model = tmp_path / TEST_MODEL_TFLITE_VELA_FILE target_config = EthosUConfiguration.load_profile("ethos-u55-256") - optimize_model( - tflite_model_path, - target_config.compiler_options, - tflite_vela_model, - ) + compile_model(tflite_model_path, target_config.compiler_options) tf.saved_model.save(keras_model, str(tmp_path / TEST_MODEL_TF_SAVED_MODEL_FILE)) diff --git a/tests/test_backend_vela_compiler.py b/tests/test_backend_vela_compiler.py index 5554efb..d5dc5cc 100644 --- a/tests/test_backend_vela_compiler.py +++ b/tests/test_backend_vela_compiler.py @@ -3,16 +3,22 @@ """Tests for module vela/compiler.""" from pathlib import Path from typing import Any +from unittest.mock import MagicMock import pytest -from ethosu.vela.compiler_driver import TensorAllocator -from ethosu.vela.scheduler import OptimizationStrategy +from ethosu.vela.vela import main -from mlia.backend.vela.compiler import optimize_model -from mlia.backend.vela.compiler import OptimizedModel +from mlia.backend.vela.compiler import compile_model +from mlia.backend.vela.compiler import parse_summary_csv_file +from mlia.backend.vela.compiler import parse_vela_initialisation_file +from mlia.backend.vela.compiler import resolve_compiler_config from mlia.backend.vela.compiler import VelaCompiler from mlia.backend.vela.compiler import VelaCompilerOptions +from mlia.backend.vela.compiler import VelaInitData +from mlia.backend.vela.compiler import VelaInitMemoryData +from mlia.backend.vela.compiler import VelaSummary from mlia.target.ethos_u.config import EthosUConfiguration +from mlia.utils.filesystem import recreate_directory def test_default_vela_compiler() -> None: @@ -26,52 +32,15 @@ def test_default_vela_compiler() -> None: assert default_compiler.accelerator_config == "ethos-u55-256" assert default_compiler.max_block_dependency == 3 assert default_compiler.arena_cache_size is None - assert default_compiler.tensor_allocator == TensorAllocator.HillClimb + assert default_compiler.tensor_allocator == "HillClimb" assert default_compiler.cpu_tensor_alignment == 16 - assert default_compiler.optimization_strategy == OptimizationStrategy.Performance + assert default_compiler.optimization_strategy == "Performance" assert default_compiler.output_dir == Path("output") - assert default_compiler.get_config() == { - "accelerator_config": "ethos-u55-256", - "system_config": "internal-default", - "core_clock": 500000000.0, - "axi0_port": "Sram", - "axi1_port": "OffChipFlash", - "memory_mode": "internal-default", - "const_mem_area": "Axi1", - "arena_mem_area": "Axi0", - "cache_mem_area": "Axi0", - "arena_cache_size": 4294967296, - "permanent_storage_mem_area": "OffChipFlash", - "feature_map_storage_mem_area": "Sram", - "fast_storage_mem_area": "Sram", - "memory_area": { - "Sram": { - "clock_scales": 1.0, - "burst_length": 32, - "read_latency": 32, - "write_latency": 32, - }, - "Dram": { - "clock_scales": 1.0, - "burst_length": 1, - "read_latency": 0, - "write_latency": 0, - }, - "OnChipFlash": { - "clock_scales": 1.0, - "burst_length": 1, - "read_latency": 0, - "write_latency": 0, - }, - "OffChipFlash": { - "clock_scales": 0.125, - "burst_length": 128, - "read_latency": 64, - "write_latency": 64, - }, - }, - } + with pytest.raises( + ValueError, match="System Config: internal-default not present in vela.ini file" + ): + resolve_compiler_config(vela_compiler_options=default_compiler_options) def test_vela_compiler_with_parameters(test_resources_path: Path) -> None: @@ -98,52 +67,120 @@ def test_vela_compiler_with_parameters(test_resources_path: Path) -> None: assert compiler.accelerator_config == "ethos-u65-256" assert compiler.max_block_dependency == 1 assert compiler.arena_cache_size == 10 - assert compiler.tensor_allocator == TensorAllocator.Greedy + assert compiler.tensor_allocator == "Greedy" assert compiler.cpu_tensor_alignment == 4 - assert compiler.optimization_strategy == OptimizationStrategy.Size + assert compiler.optimization_strategy == "Size" assert compiler.output_dir == Path("custom_output") - assert compiler.get_config() == { - "accelerator_config": "ethos-u65-256", - "system_config": "Ethos_U65_High_End", - "core_clock": 1000000000.0, - "axi0_port": "Sram", - "axi1_port": "Dram", - "memory_mode": "Shared_Sram", - "const_mem_area": "Axi1", - "arena_mem_area": "Axi0", - "cache_mem_area": "Axi0", - "arena_cache_size": 10, - "permanent_storage_mem_area": "Dram", - "feature_map_storage_mem_area": "Sram", - "fast_storage_mem_area": "Sram", - "memory_area": { - "Sram": { - "clock_scales": 1.0, - "burst_length": 32, - "read_latency": 32, - "write_latency": 32, - }, - "Dram": { - "clock_scales": 0.234375, - "burst_length": 128, - "read_latency": 500, - "write_latency": 250, - }, - "OnChipFlash": { - "clock_scales": 1.0, - "burst_length": 1, - "read_latency": 0, - "write_latency": 0, - }, - "OffChipFlash": { - "clock_scales": 1.0, - "burst_length": 1, - "read_latency": 0, - "write_latency": 0, - }, - }, - } + assert resolve_compiler_config( + vela_compiler_options=compiler_options + ) == VelaInitData( + system_config="Ethos_U65_High_End", + core_clock=1000000000.0, + axi0_port="Sram", + axi1_port="Dram", + memory_mode="Shared_Sram", + const_mem_area="Axi1", + arena_mem_area="Axi0", + cache_mem_area="Axi0", + arena_cache_size=None, + sram_memory_data=VelaInitMemoryData( + clock_scale=1.0, + burst_length=32, + read_latency=32, + write_latency=32, + ), + dram_memory_data=VelaInitMemoryData( + clock_scale=0.234375, + burst_length=128, + read_latency=500, + write_latency=250, + ), + on_chip_flash_memory_data=VelaInitMemoryData( + clock_scale=None, + burst_length=None, + read_latency=None, + write_latency=None, + ), + off_chip_flash_memory_data=VelaInitMemoryData( + clock_scale=None, + burst_length=None, + read_latency=None, + write_latency=None, + ), + ) + + +def test_vela_compiler_with_parameters_inherit_memory_mode( + test_resources_path: Path, +) -> None: + """Test creation of Vela compiler instance with non-default params + that inherits a memory mode. + """ + vela_ini_path = str(test_resources_path / "vela/sample_vela.ini") + + compiler_options = VelaCompilerOptions( + config_files=vela_ini_path, + system_config="Ethos_U65_High_End", + memory_mode="Dedicated_Sram_512KB", + accelerator_config="ethos-u65-256", + max_block_dependency=1, + arena_cache_size=10, + tensor_allocator="Greedy", + cpu_tensor_alignment=4, + optimization_strategy="Size", + output_dir=Path("custom_output"), + ) + compiler = VelaCompiler(compiler_options) + + assert compiler.config_files == vela_ini_path + assert compiler.system_config == "Ethos_U65_High_End" + assert compiler.memory_mode == "Dedicated_Sram_512KB" + assert compiler.accelerator_config == "ethos-u65-256" + assert compiler.max_block_dependency == 1 + assert compiler.arena_cache_size == 10 + assert compiler.tensor_allocator == "Greedy" + assert compiler.cpu_tensor_alignment == 4 + assert compiler.optimization_strategy == "Size" + assert compiler.output_dir == Path("custom_output") + + assert resolve_compiler_config( + vela_compiler_options=compiler_options + ) == VelaInitData( + system_config="Ethos_U65_High_End", + core_clock=1000000000.0, + axi0_port="Sram", + axi1_port="Dram", + memory_mode="Dedicated_Sram_512KB", + const_mem_area="Axi1", + arena_mem_area="Axi1", + cache_mem_area="Axi0", + arena_cache_size=524288, + sram_memory_data=VelaInitMemoryData( + clock_scale=1.0, + burst_length=32, + read_latency=32, + write_latency=32, + ), + dram_memory_data=VelaInitMemoryData( + clock_scale=0.234375, + burst_length=128, + read_latency=500, + write_latency=250, + ), + on_chip_flash_memory_data=VelaInitMemoryData( + clock_scale=None, + burst_length=None, + read_latency=None, + write_latency=None, + ), + off_chip_flash_memory_data=VelaInitMemoryData( + clock_scale=None, + burst_length=None, + read_latency=None, + write_latency=None, + ), + ) def test_compile_model(test_tflite_model: Path) -> None: @@ -152,8 +189,17 @@ def test_compile_model(test_tflite_model: Path) -> None: EthosUConfiguration.load_profile("ethos-u55-256").compiler_options ) - optimized_model = compiler.compile_model(test_tflite_model) - assert isinstance(optimized_model, OptimizedModel) + expected_model_path = Path( + compiler.output_dir.as_posix() + + "/" + + test_tflite_model.stem + + "_vela" + + test_tflite_model.suffix + ) + vela_summary_data, optimized_model_path = compiler.compile_model(test_tflite_model) + assert isinstance(vela_summary_data, VelaSummary) + assert isinstance(optimized_model_path, Path) + assert expected_model_path == optimized_model_path def test_csv_file_created(test_tflite_model: Path) -> None: @@ -172,7 +218,7 @@ def test_verbose_flag_passed() -> None: compiler = VelaCompiler( EthosUConfiguration.load_profile("ethos-u55-256").compiler_options ) - assert compiler.return_compiler_options().verbose_performance + assert compiler.verbose_performance def test_compile_model_fail_sram_exceeded( @@ -186,7 +232,7 @@ def test_compile_model_fail_sram_exceeded( def fake_compiler(*_: Any) -> None: print("Warning: SRAM target for arena memory area exceeded.") - monkeypatch.setattr("mlia.backend.vela.compiler.compiler_driver", fake_compiler) + monkeypatch.setattr("mlia.backend.vela.compiler.main", fake_compiler) with pytest.raises(Exception) as exc_info: compiler.compile_model(test_tflite_model) @@ -195,12 +241,424 @@ def test_compile_model_fail_sram_exceeded( def test_optimize_model(tmp_path: Path, test_tflite_model: Path) -> None: """Test model optimization and saving into file.""" - tmp_file = tmp_path / "temp.tflite" - + tmp_file = tmp_path / "test_model_int8_vela.tflite" target_config = EthosUConfiguration.load_profile("ethos-u55-256") - optimize_model( - test_tflite_model, target_config.compiler_options, tmp_file.absolute() - ) + target_config.compiler_options.output_dir = tmp_path + compile_model(test_tflite_model, target_config.compiler_options) assert tmp_file.is_file() assert tmp_file.stat().st_size > 0 + + +SUMMARY_TMP_DATA = """ +experiment,network,accelerator_configuration,system_config,memory_mode,core_clock,arena_cache_size,sram_bandwidth,dram_bandwidth,on_chip_flash_bandwidth,off_chip_flash_bandwidth,weights_storage_area,feature_map_storage_area,inferences_per_second,batch_size,inference_time,passes_before_fusing,passes_after_fusing,sram_memory_used,dram_memory_used,on_chip_flash_memory_used,off_chip_flash_memory_used,total_original_weights,total_npu_encoded_weights,sram_feature_map_read_bytes,sram_feature_map_write_bytes,sram_weight_read_bytes,sram_weight_write_bytes,sram_total_bytes,dram_feature_map_read_bytes,dram_feature_map_write_bytes,dram_weight_read_bytes,dram_weight_write_bytes,dram_total_bytes,on_chip_flash_feature_map_read_bytes,on_chip_flash_feature_map_write_bytes,on_chip_flash_weight_read_bytes,on_chip_flash_weight_write_bytes,on_chip_flash_total_bytes,off_chip_flash_feature_map_read_bytes,off_chip_flash_feature_map_write_bytes,off_chip_flash_weight_read_bytes,off_chip_flash_weight_write_bytes,off_chip_flash_total_bytes,nn_macs,nn_tops,cycles_npu,cycles_sram_access,cycles_dram_access,cycles_on_chip_flash_access,cycles_off_chip_flash_access,cycles_total +default,test_model_fp32,Ethos_U55_256,Ethos_U55_High_End_Embedded,Shared_Sram,0.0,0.9,4.0,4.0,4.0,0.5,Off-chip Flash,SRAM,0.0,1,12.1e-05,7,2.0,1.5,0.0,0.0,1.4,7,8,6.0,5.0,7552.0,5.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,1.0,2,0.1,23297.0,1.5,0.0,0.0,1.0,2 +""".strip() + +SUMMARY_TMP_DATA_MISSING_HEADER = """ +experiment,network,accelerator_configuration,system_config,memory_mode,core_clock,arena_cache_size,sram_bandwidth,dram_bandwidth,on_chip_flash_bandwidth,off_chip_flash_bandwidth,weights_storage_area,feature_map_storage_area,inferences_per_second,batch_size,inference_time,passes_before_fusing,passes_after_fusing,sram_memory_used,dram_memory_used,on_chip_flash_memory_used,off_chip_flash_memory_used,total_original_weights,total_npu_encoded_weights,sram_feature_map_read_bytes,sram_feature_map_write_bytes,sram_weight_read_bytes,sram_weight_write_bytes,sram_total_bytes,dram_feature_map_read_bytes,dram_feature_map_write_bytes,dram_weight_read_bytes,dram_weight_write_bytes,dram_total_bytes,on_chip_flash_feature_map_read_bytes,on_chip_flash_feature_map_write_bytes,on_chip_flash_weight_read_bytes,on_chip_flash_weight_write_bytes,on_chip_flash_total_bytes,off_chip_flash_feature_map_read_bytes,off_chip_flash_feature_map_write_bytes,off_chip_flash_weight_read_bytes,off_chip_flash_weight_write_bytes,off_chip_flash_total_bytes,nn_macs,nn_tops,cycles_npu,cycles_sram_access,cycles_dram_access,cycles_on_chip_flash_access,cycles_off_chip_flash_access +default,test_model_fp32,Ethos_U55_256,Ethos_U55_High_End_Embedded,Shared_Sram,0.0,0.9,4.0,4.0,4.0,0.5,Off-chip Flash,SRAM,0.0,1,12.1e-05,7,2.0,1.5,0.0,0.0,1.4,7,8,6.0,5.0,7552.0,5.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,1.0,2,0.1,23297.0,1.5,0.0,0.0,1.0 +""".strip() + +TMP_DATA_EXPECTED_STRING = "\ +cycles_total: 2.0, \ +cycles_npu: 23297.0, \ +cycles_sram_access: 1.5, \ +cycles_dram_access: 0.0, \ +cycles_on_chip_flash_access: 0.0, \ +cycles_off_chip_flash_access: 1.0, \ +core_clock: 0.0, \ +dram_memory_used: 0.0, \ +sram_memory_used: 1.5, \ +on_chip_flash_memory_used: 0.0, \ +off_chip_flash_memory_used: 1.4, \ +batch_size: 1, \ +memory_mode: Shared_Sram, \ +system_config: Ethos_U55_High_End_Embedded, \ +accelerator_configuration: Ethos_U55_256, \ +arena_cache_size: 0.9, \ +" + + +def test_backend_compiler_parse_summary_csv_file(test_csv_file: Path) -> None: + """Test that parsing a csv file produces a LayerwisePerfInfo object.""" + with open(test_csv_file, "w", encoding="utf8") as csv_file: + csv_file.write(SUMMARY_TMP_DATA) + summary_object = parse_summary_csv_file(test_csv_file) + strings_to_check = repr(summary_object) + assert isinstance(summary_object, VelaSummary) + assert TMP_DATA_EXPECTED_STRING == strings_to_check + + +def test_backend_compiler_summary_csv_parsed_empty(empty_test_csv_file: Path) -> None: + """Test that ensures when we have an empty + CSV file we get None as backend data. + """ + empty_test_csv_file.touch() + with pytest.raises(RuntimeError, match="Generated Vela Summary CSV is empty"): + parse_summary_csv_file(empty_test_csv_file) + + +def test_backend_compiler_summary_csv_parsed_missing_headers( + test_csv_file: Path, +) -> None: + """Test that ensures a KeyError + is raised when a csv with missing + expected headers is parsed. + """ + with open(test_csv_file, "w", encoding="utf8") as csv_file: + csv_file.write(SUMMARY_TMP_DATA_MISSING_HEADER) + with pytest.raises( + KeyError, + match="Generated Vela Summary CSV missing expected header: cycles_total.", # pylint: disable=line-too-long + ): + parse_summary_csv_file(test_csv_file) + + +def test_backend_compiler_summary_csv_parsed_missing_file() -> None: + """Test that ensures a FileNotFoundError + is raised when a non-existent csv file is parsed. + """ + with pytest.raises( + FileNotFoundError, match="CSV File not found at missing_file.csv" + ): + parse_summary_csv_file(Path("missing_file.csv")) + + +def test_backend_compiler_parsing_vela_ini_file_missing_init_file() -> None: + """Test that ensures a FileNotFoundError + is raised when a non-existent ini file is parsed. + """ + with pytest.raises( + FileNotFoundError, + match="Vela Initialisation File not found at missing_init_file.ini", + ): + parse_vela_initialisation_file( + Path("missing_init_file.ini"), "internal-default", "internal-default" + ) + + +def test_backend_compiler_parsing_vela_ini_file_empty_init_file( + empty_vela_ini_file: Path, +) -> None: + """Test that ensures a OSError + is raised when an empty vela.ini file is parsed. + """ + empty_vela_ini_file.touch() + with pytest.raises(OSError, match="vela.ini File Is Empty"): + parse_vela_initialisation_file( + empty_vela_ini_file, "internal-default", "internal-default" + ) + + +@pytest.mark.parametrize( + "input_str", + [ + """ +; SPDX-FileCopyrightText: Copyright 2022, Arm Limited and/or its affiliates. +; SPDX-License-Identifier: Apache-2.0 +; Ethos-U55 High-End Embedded: SRAM (4 GB/s) and Flash (0.5 GB/s) +[System_Config.Ethos_U55_High_End_Embedded] +core_clock=500e6 +axi0_port=Sram +axi1_port=OffChipFlash +Sram_clock_scale=1.0 +Sram_burst_length=32 +Sram_read_latency=32 +Sram_write_latency=32 +OffChipFlash_clock_scale=0.125 +OffChipFlash_burst_length=128 +OffChipFlash_read_latency=64 +OffChipFlash_write_latency=64 + +; Ethos-U65 High-End: SRAM (16 GB/s) and DRAM (3.75 GB/s) +[System_Config.Ethos_U65_High_End] +core_clock=1e9 +axi0_port=Sram +axi1_port=Dram +Sram_clock_scale=1.0 +Sram_burst_length=32 +Sram_read_latency=32 +Sram_write_latency=32 +Dram_clock_scale=0.234375 +Dram_burst_length=128 +Dram_read_latency=500 +Dram_write_latency=250 +""" + ], +) +def test_backend_compiler_parsing_vela_ini_file_missing_memory_modes( + vela_ini_file: Path, + input_str: str, +) -> None: + """Test that ensures a IndexError + is raised when a vela.ini file with no memory modes + is parsed. + """ + with open(vela_ini_file, "w", encoding="utf8") as vela_file: + vela_file.write(input_str) + with pytest.raises( + IndexError, match="No memory modes are present in vela.ini file." + ): + parse_vela_initialisation_file( + vela_ini_file, "Ethos_U65_High_End", "Shared_Sram" + ) + + +@pytest.mark.parametrize( + "input_str", + [ + """ +; ----------------------------------------------------------------------------- +; Memory Mode + +; Shared SRAM: the SRAM is shared between the Ethos-U and the Cortex-M software +; The non-SRAM memory is assumed to be read-only +[Memory_Mode.Shared_Sram] +const_mem_area=Axi1 +arena_mem_area=Axi0 +cache_mem_area=Axi0 + +; The SRAM (384KB) is only for use by the Ethos-U +; The non-SRAM memory is assumed to be read-writeable +[Memory_Mode.Dedicated_Sram] +const_mem_area=Axi1 +arena_mem_area=Axi1 +cache_mem_area=Axi0 +arena_cache_size=393216 + +""" + ], +) +def test_backend_compiler_parsing_vela_ini_file_missing_system_configs( + vela_ini_file: Path, + input_str: str, +) -> None: + """Test that ensures a IndexError + is raised when a vela.ini file with no system configs + is parsed. + """ + with open(vela_ini_file, "w", encoding="utf8") as vela_file: + vela_file.write(input_str) + with pytest.raises( + IndexError, match="No system configs are present in vela.ini file." + ): + parse_vela_initialisation_file( + vela_ini_file, "Ethos_U65_High_End", "Shared_Sram" + ) + + +@pytest.mark.parametrize( + "input_str", + [ + """ +; SPDX-FileCopyrightText: Copyright 2022, Arm Limited and/or its affiliates. +; SPDX-License-Identifier: Apache-2.0 +; Ethos-U55 High-End Embedded: SRAM (4 GB/s) and Flash (0.5 GB/s) +[System_Config.Ethos_U55_High_End_Embedded] +core_clock=500e6 +axi0_port=Sram +axi1_port=OffChipFlash +Sram_clock_scale=1.0 +Sram_burst_length=32 +Sram_read_latency=32 +Sram_write_latency=32 +OffChipFlash_clock_scale=0.125 +OffChipFlash_burst_length=128 +OffChipFlash_read_latency=64 +OffChipFlash_write_latency=64 + +; Ethos-U65 High-End: SRAM (16 GB/s) and DRAM (3.75 GB/s) +[System_Config.Ethos_U65_High_End] +core_clock=1e9 +axi0_port=Sram +axi1_port=Dram +Sram_clock_scale=1.0 +Sram_burst_length=32 +Sram_read_latency=32 +Sram_write_latency=32 +Dram_clock_scale=0.234375 +Dram_burst_length=128 +Dram_read_latency=500 +Dram_write_latency=250 + +; ----------------------------------------------------------------------------- +; Memory Mode + +; Shared SRAM: the SRAM is shared between the Ethos-U and the Cortex-M software +; The non-SRAM memory is assumed to be read-only +[Memory_Mode.Shared_Sram] +const_mem_area=Axi1 +arena_mem_area=Axi0 +cache_mem_area=Axi0 + +""" + ], +) +def test_backend_compiler_parsing_vela_ini_file_missing_specific_memory_mode( + vela_ini_file: Path, + input_str: str, +) -> None: + """Test that ensures a ValueError + is raised when a vela.ini file with specific missing memory mode + is parsed. + """ + with open(vela_ini_file, "w", encoding="utf8") as vela_file: + vela_file.write(input_str) + with pytest.raises( + ValueError, match="Memory Mode: Dedicated_Sram not present in vela.ini file." + ): + parse_vela_initialisation_file( + vela_ini_file, "Ethos_U65_High_End", "Dedicated_Sram" + ) + + +@pytest.mark.parametrize( + "input_str", + [ + """ +; SPDX-FileCopyrightText: Copyright 2022, Arm Limited and/or its affiliates. +; SPDX-License-Identifier: Apache-2.0 +; Ethos-U55 High-End Embedded: SRAM (4 GB/s) and Flash (0.5 GB/s) +[System_Config.Ethos_U55_High_End_Embedded] +core_clock=500e6 +axi0_port=Sram +axi1_port=OffChipFlash +Sram_clock_scale=1.0 +Sram_burst_length=32 +Sram_read_latency=32 +Sram_write_latency=32 +OffChipFlash_clock_scale=0.125 +OffChipFlash_burst_length=128 +OffChipFlash_read_latency=64 +OffChipFlash_write_latency=64 + +; ----------------------------------------------------------------------------- +; Memory Mode + +; Shared SRAM: the SRAM is shared between the Ethos-U and the Cortex-M software +; The non-SRAM memory is assumed to be read-only +[Memory_Mode.Shared_Sram] +const_mem_area=Axi1 +arena_mem_area=Axi0 +cache_mem_area=Axi0 + +; The SRAM (384KB) is only for use by the Ethos-U +; The non-SRAM memory is assumed to be read-writeable +[Memory_Mode.Dedicated_Sram] +const_mem_area=Axi1 +arena_mem_area=Axi1 +cache_mem_area=Axi0 +arena_cache_size=393216 + +""" + ], +) +def test_backend_compiler_parsing_vela_ini_file_missing_specific_system_config( + vela_ini_file: Path, + input_str: str, +) -> None: + """Test that ensures a ValueError + is raised when a vela.ini file with specific missing system config + is parsed. + """ + with open(vela_ini_file, "w", encoding="utf8") as vela_file: + vela_file.write(input_str) + with pytest.raises( + ValueError, + match="System Config: Ethos_U65_High_End not present in vela.ini file.", + ): + parse_vela_initialisation_file( + vela_ini_file, "Ethos_U65_High_End", "Shared_Sram" + ) + + +@pytest.mark.parametrize( + "input_str", + [ + """ +; SPDX-FileCopyrightText: Copyright 2022, Arm Limited and/or its affiliates. +; SPDX-License-Identifier: Apache-2.0 +; Ethos-U55 High-End Embedded: SRAM (4 GB/s) and Flash (0.5 GB/s) +[System_Config.Ethos_U55_High_End_Embedded] +axi0_port=Sram +axi1_port=OffChipFlash +Sram_clock_scale=1.0 +Sram_burst_length=32 +Sram_read_latency=32 +Sram_write_latency=32 +OffChipFlash_clock_scale=0.125 +OffChipFlash_burst_length=128 +OffChipFlash_read_latency=64 +OffChipFlash_write_latency=64 + +; ----------------------------------------------------------------------------- +; Memory Mode + +; Shared SRAM: the SRAM is shared between the Ethos-U and the Cortex-M software +; The non-SRAM memory is assumed to be read-only +[Memory_Mode.Shared_Sram] +const_mem_area=Axi1 +arena_mem_area=Axi0 +cache_mem_area=Axi0 + +; The SRAM (384KB) is only for use by the Ethos-U +; The non-SRAM memory is assumed to be read-writeable +[Memory_Mode.Dedicated_Sram] +const_mem_area=Axi1 +arena_mem_area=Axi1 +cache_mem_area=Axi0 +arena_cache_size=393216 + +""" + ], +) +def test_backend_compiler_parsing_vela_ini_file_missing_header( + vela_ini_file: Path, + input_str: str, +) -> None: + """Test that ensures a KeyError + is raised when a vela.ini file with a missing header + is parsed. + """ + with open(vela_ini_file, "w", encoding="utf8") as vela_file: + vela_file.write(input_str) + with pytest.raises( + KeyError, match="Vela.ini file missing expected header: core_clock" + ): + parse_vela_initialisation_file( + vela_ini_file, "Ethos_U55_High_End_Embedded", "Shared_Sram" + ) + + +def test_backend_compiler_model_already_compiled( + test_tflite_model: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """Test that if we try compile a model twice, + the correct flag is passed and that main is called only once. + """ + target_config = EthosUConfiguration.load_profile("ethos-u55-256") + recreate_directory(Path(target_config.compiler_options.output_dir)) + + main_mock = MagicMock(side_effect=main) + monkeypatch.setattr("mlia.backend.vela.compiler.main", main_mock) + compile_model(test_tflite_model, target_config.compiler_options) + + def vela_compiler_compile_model_mock( + model_path: Path, *_: Any + ) -> tuple[None, Path]: + return None, Path( + Path(target_config.compiler_options.output_dir).as_posix() + + "/" + + model_path.stem + + "_vela" + + model_path.suffix + ) + + compiler_mock = MagicMock(side_effect=vela_compiler_compile_model_mock) + monkeypatch.setattr( + "mlia.backend.vela.compiler.VelaCompiler.compile_model", compiler_mock + ) + compile_model(test_tflite_model, target_config.compiler_options) + main_mock.assert_called_once() + compiler_mock.assert_called_once_with(test_tflite_model, True) diff --git a/tests/test_backend_vela_performance.py b/tests/test_backend_vela_performance.py index 5800630..b4f8d4c 100644 --- a/tests/test_backend_vela_performance.py +++ b/tests/test_backend_vela_performance.py @@ -6,13 +6,14 @@ from unittest.mock import MagicMock import pytest -from mlia.backend.vela.compiler import optimize_model +from mlia.backend.vela.compiler import compile_model from mlia.backend.vela.performance import estimate_performance from mlia.backend.vela.performance import layer_metrics from mlia.backend.vela.performance import LayerwisePerfInfo from mlia.backend.vela.performance import parse_layerwise_perf_csv from mlia.backend.vela.performance import PerformanceMetrics from mlia.target.ethos_u.config import EthosUConfiguration +from mlia.utils.filesystem import recreate_directory def test_estimate_performance(test_tflite_model: Path) -> None: @@ -142,24 +143,6 @@ def test_estimate_performance_parse_layerwise_empty_csv_file( assert len(layerwise_object.layerwise_info) == 0 -def test_estimate_performance_already_optimized( - tmp_path: Path, test_tflite_model: Path -) -> None: - """Test that performance estimation should fail for already optimized model.""" - target_config = EthosUConfiguration.load_profile("ethos-u55-256") - - optimized_model_path = tmp_path / "optimized_model.tflite" - - optimize_model( - test_tflite_model, target_config.compiler_options, optimized_model_path - ) - - with pytest.raises( - Exception, match="Unable to estimate performance for the given optimized model" - ): - estimate_performance(optimized_model_path, target_config.compiler_options) - - def test_read_invalid_model(test_tflite_invalid_model: Path) -> None: """Test that reading invalid model should fail with exception.""" with pytest.raises( @@ -173,16 +156,18 @@ def test_compile_invalid_model( test_tflite_model: Path, monkeypatch: pytest.MonkeyPatch, tmp_path: Path ) -> None: """Test that if model could not be compiled then correct exception raised.""" + mock_compiler = MagicMock() mock_compiler.side_effect = Exception("Bad model!") - monkeypatch.setattr("mlia.backend.vela.compiler.compiler_driver", mock_compiler) + monkeypatch.setattr("mlia.backend.vela.compiler.main", mock_compiler) model_path = tmp_path / "optimized_model.tflite" with pytest.raises( Exception, match="Model could not be optimized with Vela compiler" ): target_config = EthosUConfiguration.load_profile("ethos-u55-256") - optimize_model(test_tflite_model, target_config.compiler_options, model_path) + recreate_directory(Path(target_config.compiler_options.output_dir)) + compile_model(test_tflite_model, target_config.compiler_options) assert not model_path.exists() diff --git a/tests/test_cli_commands.py b/tests/test_cli_commands.py index 480e642..1ce793f 100644 --- a/tests/test_cli_commands.py +++ b/tests/test_cli_commands.py @@ -207,7 +207,7 @@ def mock_performance_estimation(monkeypatch: pytest.MonkeyPatch) -> None: metrics = PerformanceMetrics( EthosUConfiguration.load_profile("ethos-u55-256"), NPUCycles(1, 2, 3, 4, 5, 6), - MemoryUsage(1, 2, 3, 4, 5), + MemoryUsage(1, 2, 3, 4), LayerwisePerfInfo(layerwise_info=[]), ) monkeypatch.setattr( diff --git a/tests/test_core_context.py b/tests/test_core_context.py index 0810ad0..9eb3d63 100644 --- a/tests/test_core_context.py +++ b/tests/test_core_context.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright 2022-2023, Arm Limited and/or its affiliates. +# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates. # SPDX-License-Identifier: Apache-2.0 """Tests for the module context.""" from __future__ import annotations @@ -59,7 +59,6 @@ def test_execution_context(tmp_path: Path) -> None: event_publisher=publisher, verbose=True, logs_dir="logs_directory", - models_dir="models_directory", output_format="json", ) @@ -74,7 +73,7 @@ def test_execution_context(tmp_path: Path) -> None: assert context.event_handlers == [] assert context.event_publisher == publisher assert context.logs_path == output_dir / "logs_directory" - expected_model_path = output_dir / "models_directory/sample.model" + expected_model_path = output_dir / "sample.model" assert context.get_model_path("sample.model") == expected_model_path assert context.verbose is True assert context.output_format == "json" @@ -107,7 +106,7 @@ def test_execution_context_with_default_params(tmp_path: Path) -> None: assert context_with_default_params.logs_path == output_dir / "logs" default_model_path = context_with_default_params.get_model_path("sample.model") - expected_default_model_path = output_dir / "models/sample.model" + expected_default_model_path = output_dir / "sample.model" assert default_model_path == expected_default_model_path assert context_with_default_params.output_format == "plain_text" diff --git a/tests/test_target_ethos_u_data_analysis.py b/tests/test_target_ethos_u_data_analysis.py index 3cddf10..0add7c2 100644 --- a/tests/test_target_ethos_u_data_analysis.py +++ b/tests/test_target_ethos_u_data_analysis.py @@ -98,7 +98,7 @@ def test_perf_metrics_diff() -> None: cast(EthosUConfiguration, profile("ethos-u55-256")), NPUCycles(1, 2, 3, 4, 5, 6), # memory metrics are in kilobytes - MemoryUsage(*[i * 1024 for i in range(1, 6)]), # type: ignore + MemoryUsage(*list(range(1, 5))), # type: ignore LayerwisePerfInfo(layerwise_info=[]), ), [ @@ -110,9 +110,7 @@ def test_perf_metrics_diff() -> None: cast(EthosUConfiguration, profile("ethos-u55-256")), NPUCycles(1, 2, 3, 4, 5, 6), # memory metrics are in kilobytes - MemoryUsage( - *[i * 1024 for i in range(1, 6)] # type: ignore - ), + MemoryUsage(*list(range(1, 5))), # type: ignore LayerwisePerfInfo(layerwise_info=[]), ), ], @@ -128,8 +126,8 @@ def test_perf_metrics_diff() -> None: opt_diffs={ "sram": PerfMetricDiff(1.0, 1.0), "dram": PerfMetricDiff(2.0, 2.0), - "on_chip_flash": PerfMetricDiff(4.0, 4.0), - "off_chip_flash": PerfMetricDiff(5.0, 5.0), + "on_chip_flash": PerfMetricDiff(3.0, 3.0), + "off_chip_flash": PerfMetricDiff(4.0, 4.0), "npu_total_cycles": PerfMetricDiff(3, 3), }, ) @@ -143,7 +141,7 @@ def test_perf_metrics_diff() -> None: cast(EthosUConfiguration, profile("ethos-u55-256")), NPUCycles(1, 2, 3, 4, 5, 6), # memory metrics are in kilobytes - MemoryUsage(*[i * 1024 for i in range(1, 6)]), # type: ignore + MemoryUsage(*list(range(1, 5))), # type: ignore LayerwisePerfInfo(layerwise_info=[]), ), [], diff --git a/tests/test_target_ethos_u_data_collection.py b/tests/test_target_ethos_u_data_collection.py index 3868b95..e034884 100644 --- a/tests/test_target_ethos_u_data_collection.py +++ b/tests/test_target_ethos_u_data_collection.py @@ -162,7 +162,7 @@ def mock_performance_estimation( metrics = PerformanceMetrics( target, NPUCycles(1, 2, 3, 4, 5, 6), - MemoryUsage(1, 2, 3, 4, 5), + MemoryUsage(1, 2, 3, 4), LayerwisePerfInfo(layerwise_info=[]), ) monkeypatch.setattr( diff --git a/tests/test_target_ethos_u_performance.py b/tests/test_target_ethos_u_performance.py index 76860b5..3042265 100644 --- a/tests/test_target_ethos_u_performance.py +++ b/tests/test_target_ethos_u_performance.py @@ -1,24 +1,10 @@ -# SPDX-FileCopyrightText: Copyright 2022, Arm Limited and/or its affiliates. +# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates. # SPDX-License-Identifier: Apache-2.0 """Performance estimation tests.""" from unittest.mock import MagicMock import pytest -from mlia.target.ethos_u.performance import MemorySizeType -from mlia.target.ethos_u.performance import MemoryUsage - - -def test_memory_usage_conversion() -> None: - """Test MemoryUsage objects conversion.""" - memory_usage_in_kb = MemoryUsage(1, 2, 3, 4, 5, MemorySizeType.KILOBYTES) - assert memory_usage_in_kb.in_kilobytes() == memory_usage_in_kb - - memory_usage_in_bytes = MemoryUsage( - 1 * 1024, 2 * 1024, 3 * 1024, 4 * 1024, 5 * 1024 - ) - assert memory_usage_in_bytes.in_kilobytes() == memory_usage_in_kb - def mock_performance_estimation(monkeypatch: pytest.MonkeyPatch) -> None: """Mock performance estimation.""" diff --git a/tests/test_target_ethos_u_reporters.py b/tests/test_target_ethos_u_reporters.py index 6dff6e1..cfee86d 100644 --- a/tests/test_target_ethos_u_reporters.py +++ b/tests/test_target_ethos_u_reporters.py @@ -41,7 +41,6 @@ from mlia.utils.console import remove_ascii_codes memory_usage=MemoryUsage( sram_memory_area_size=10, dram_memory_area_size=0, - unknown_memory_area_size=0, on_chip_flash_memory_area_size=0, off_chip_flash_memory_area_size=20, memory_size_type=MemorySizeType.KILOBYTES, @@ -140,7 +139,6 @@ Layer-Wise Metrics: memory_usage=MemoryUsage( sram_memory_area_size=10, dram_memory_area_size=0, - unknown_memory_area_size=0, on_chip_flash_memory_area_size=0, off_chip_flash_memory_area_size=20, memory_size_type=MemorySizeType.KILOBYTES, @@ -362,7 +360,6 @@ def test_report_operators( Const mem area Axi1 Arena mem area Axi0 Cache mem area Axi0 - Arena cache size 2,096,768 bytes System config Ethos_U55_High_End_Embedded Accelerator clock 500,000,000 Hz @@ -376,28 +373,11 @@ def test_report_operators( Read latency 32 cycles Write latency 32 cycles - Dram: - Clock scales 1.0 - Burst length 1 byte - Read latency 0 cycles - Write latency 0 cycles - - OnChipFlash: - Clock scales 1.0 - Burst length 1 byte - Read latency 0 cycles - Write latency 0 cycles - OffChipFlash: Clock scales 0.125 Burst length 128 bytes Read latency 64 cycles - Write latency 64 cycles - - Architecture settings: - Permanent storage mem area OffChipFlash - Feature map storage mem area Sram - Fast storage mem area Sram""", + Write latency 64 cycles""", { "target": { "target": "ethos-u55", @@ -406,7 +386,6 @@ def test_report_operators( "const_mem_area": "Axi1", "arena_mem_area": "Axi0", "cache_mem_area": "Axi0", - "arena_cache_size": {"value": 2096768, "unit": "bytes"}, }, "system_config": { "accelerator_clock": {"value": 500000000.0, "unit": "Hz"}, @@ -419,18 +398,6 @@ def test_report_operators( "read_latency": {"value": 32, "unit": "cycles"}, "write_latency": {"value": 32, "unit": "cycles"}, }, - "Dram": { - "clock_scales": 1.0, - "burst_length": {"value": 1, "unit": "byte"}, - "read_latency": {"value": 0, "unit": "cycles"}, - "write_latency": {"value": 0, "unit": "cycles"}, - }, - "OnChipFlash": { - "clock_scales": 1.0, - "burst_length": {"value": 1, "unit": "byte"}, - "read_latency": {"value": 0, "unit": "cycles"}, - "write_latency": {"value": 0, "unit": "cycles"}, - }, "OffChipFlash": { "clock_scales": 0.125, "burst_length": {"value": 128, "unit": "bytes"}, @@ -439,11 +406,6 @@ def test_report_operators( }, }, }, - "arch_settings": { - "permanent_storage_mem_area": "OffChipFlash", - "feature_map_storage_mem_area": "Sram", - "fast_storage_mem_area": "Sram", - }, } }, ], -- cgit v1.2.1