From d08513a72e7fbf0626c3d69b9c4cc7056b3da4ae Mon Sep 17 00:00:00 2001 From: Nathan Bailey Date: Tue, 16 Jan 2024 16:39:06 +0000 Subject: feat: Integrate Vela's per-layer performance estimates Resolves: MLIA-1055, MLIA-1056, MLIA-1057 Signed-off-by: Nathan Bailey Change-Id: Id573cec94e4a69117051dcd5175f383c0955d890 --- src/mlia/backend/vela/compiler.py | 9 +- src/mlia/backend/vela/performance.py | 151 +++++++++++++++++- src/mlia/core/workflow.py | 3 +- src/mlia/target/ethos_u/advisor.py | 3 +- src/mlia/target/ethos_u/performance.py | 46 ++++-- src/mlia/target/ethos_u/reporters.py | 109 +++++++++++-- tests/conftest.py | 31 +++- tests/test_backend_vela_compiler.py | 27 +++- tests/test_backend_vela_performance.py | 122 +++++++++++++- tests/test_cli_commands.py | 4 +- tests/test_target_ethos_u_config.py | 3 +- tests/test_target_ethos_u_data_analysis.py | 6 +- tests/test_target_ethos_u_data_collection.py | 4 +- tests/test_target_ethos_u_reporters.py | 230 ++++++++++++++++++++++++++- 14 files changed, 701 insertions(+), 47 deletions(-) diff --git a/src/mlia/backend/vela/compiler.py b/src/mlia/backend/vela/compiler.py index b591056..fe9e365 100644 --- a/src/mlia/backend/vela/compiler.py +++ b/src/mlia/backend/vela/compiler.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright 2022-2023, Arm Limited and/or its affiliates. +# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates. # SPDX-License-Identifier: Apache-2.0 """Vela compiler wrapper module.""" from __future__ import annotations @@ -90,7 +90,7 @@ class VelaCompilerOptions: # pylint: disable=too-many-instance-attributes tensor_allocator: TensorAllocatorType = "HillClimb" cpu_tensor_alignment: int = Tensor.AllocationQuantum optimization_strategy: OptimizationStrategyType = "Performance" - output_dir: str = "output" + output_dir: Path = Path("output") recursion_limit: int = 1000 @@ -251,6 +251,7 @@ class VelaCompiler: # pylint: disable=too-many-instance-attributes verbose_register_command_stream=False, verbose_operators=False, verbose_weights=False, + verbose_performance=True, show_cpu_operations=False, tensor_allocator=self.tensor_allocator, timing=False, @@ -258,6 +259,10 @@ class VelaCompiler: # pylint: disable=too-many-instance-attributes cpu_tensor_alignment=self.cpu_tensor_alignment, ) + def return_compiler_options(self) -> CompilerOptions: + """Return CompilerOptions instance for test purposes.""" + return self._compiler_options() + def resolve_compiler_config( vela_compiler_options: VelaCompilerOptions, diff --git a/src/mlia/backend/vela/performance.py b/src/mlia/backend/vela/performance.py index a548b26..72a8ceb 100644 --- a/src/mlia/backend/vela/performance.py +++ b/src/mlia/backend/vela/performance.py @@ -1,11 +1,16 @@ -# SPDX-FileCopyrightText: Copyright 2022-2023, Arm Limited and/or its affiliates. +# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates. # SPDX-License-Identifier: Apache-2.0 """Vela performance module.""" from __future__ import annotations +import csv import logging +import os +from collections import Counter from dataclasses import dataclass +from dataclasses import fields from pathlib import Path +from pydoc import locate import numpy as np from ethosu.vela.npu_performance import PassCycles @@ -37,6 +42,130 @@ class PerformanceMetrics: # pylint: disable=too-many-instance-attributes dram_memory_area_size: int on_chip_flash_memory_area_size: int off_chip_flash_memory_area_size: int + layerwise_performance_info: LayerwisePerfInfo + + +@dataclass +class LayerPerfInfo: # pylint: disable=too-many-instance-attributes + """Contains metrics from a row from the per-layer csv file from Vela.""" + + name: str + tflite_operator: str + sram_usage: int + op_cycles: int + npu_cycles: int + sram_access_cycles: int + dram_access_cycles: int + on_chip_flash_access_cycles: int + off_chip_flash_access_cycles: int + mac_count: int + util_mac_percentage: float + + def __repr__(self) -> str: + """Return String Representation of LayerPerfInfo object.""" + header_values = {key: value for key, value, _ in layer_metrics} + string_to_check = "" + for field in fields(self): + string_to_check += ( + f"{header_values[field.name]}: {getattr(self, field.name)}, " + ) + return string_to_check + + +@dataclass +class LayerwisePerfInfo: + """Contains all the per-layer metrics from the per-layer csv file from Vela.""" + + layerwise_info: list[LayerPerfInfo] + + def __repr__(self) -> str: + """Return String Representation of LayerwisePerfInfo object.""" + strings_to_check_layerwise_object = "" + for layer in self.layerwise_info: + string_to_check = repr(layer) + strings_to_check_layerwise_object += string_to_check + return strings_to_check_layerwise_object + + +complete_layer_metrics = [ + ("tflite_operator", "TFLite_operator", "TFLite Operator"), + ("nng_operator", "NNG Operator", "NNG Operator"), + ("sram_usage", "SRAM Usage", "SRAM Usage"), + ("peak_percentage", "Peak%", "Peak SRAM Usage (%)"), + ("op_cycles", "Op Cycles", "OP Cycles"), + ("network_percentage_1", "Network%", "OP Cycles in Network (%)"), + ("npu_cycles", "NPU", "NPU Cycles"), + ("sram_access_cycles", "SRAM AC", "SRAM AC"), + ("dram_access_cycles", "DRAM AC", "DRAM AC"), + ("on_chip_flash_access_cycles", "OnFlash AC", "OnFlash AC"), + ("off_chip_flash_access_cycles", "OffFlash AC", "OffFlash AC"), + ("mac_count", "MAC Count", "MAC Count"), + ("network_percentage_2", "Network% (1)", "MAC Count in Network (%)"), + ("util_mac_percentage", "Util%", "MAC Util (%)"), + ("name", "Name", "Layer Name"), +] + +OUTPUT_METRICS = [field.name for field in fields(LayerPerfInfo)] + +layer_metrics = [ + layer_metric + for layer_metric in complete_layer_metrics + if layer_metric[0] in OUTPUT_METRICS +] +layer_metrics.sort(key=lambda e: OUTPUT_METRICS.index(e[0])) + + +def parse_layerwise_perf_csv( # pylint: disable=too-many-locals + vela_csv_file: Path, metrics: list +) -> LayerwisePerfInfo: + """Parse the per-layer csv file from backend vela.""" + if not vela_csv_file.is_file(): + raise FileNotFoundError(f"CSV File not found at {vela_csv_file}\n") + layerwise_info = [] # type: list[LayerPerfInfo] + with open(vela_csv_file, encoding="UTF-8") as csv_file: + layerwise_reader = csv.reader(csv_file, delimiter=",") + try: + headers = list(next(layerwise_reader)) + except StopIteration: + return LayerwisePerfInfo(layerwise_info=layerwise_info) + headers_to_check_cpu_ops = headers.copy() + multiple_header_count = Counter(headers) + # Deal with multiple of the same values in CSV header. + for idx, header in enumerate(reversed(headers)): + if multiple_header_count[header] > 1: + headers[len(headers) - idx - 1] = ( + headers[len(headers) - idx - 1] + + " (" + + str(multiple_header_count[header] - 1) + + ")" + ) + multiple_header_count[header] -= 1 + for row in layerwise_reader: + row_as_dict = dict(zip(headers, row)) + if row == headers_to_check_cpu_ops: + continue + try: + key_types = { + field.name: locate(str(field.type)) + for field in fields(LayerPerfInfo) + } + ids_to_metrics = {} + for key, title, _ in metrics: + try: + ids_to_metrics[key] = key_types[key]( # type: ignore + row_as_dict[title] + ) + except ValueError as err: + if "invalid literal for int() with base 10" in str(err): + ids_to_metrics[key] = key_types[key]( # type: ignore + float(row_as_dict[title]) + ) + else: + raise + layerwise_info.append(LayerPerfInfo(**ids_to_metrics)) + except KeyError as err: + raise KeyError("Generated CSV missing expected headers") from err + return LayerwisePerfInfo(layerwise_info=layerwise_info) def estimate_performance( @@ -61,11 +190,26 @@ def estimate_performance( ) optimized_model = vela_compiler.compile_model(initial_model) + output_dir = optimized_model.compiler_options.output_dir + csv_paths = [entry for entry in os.listdir(output_dir) if "per-layer.csv" in entry] + model_name = str(model_path.stem) + csv_file_found = None + for path in csv_paths: + if model_name in path: + csv_file_found = path + if csv_file_found is None: + raise FileNotFoundError("Vela per-layer CSV file not found") + csv_path = Path(output_dir) / csv_file_found + layerwise_performance_info = parse_layerwise_perf_csv( + vela_csv_file=csv_path, metrics=layer_metrics + ) - return _performance_metrics(optimized_model) + return _performance_metrics(layerwise_performance_info, optimized_model) -def _performance_metrics(optimized_model: OptimizedModel) -> PerformanceMetrics: +def _performance_metrics( + layerwise_performance_info: LayerwisePerfInfo, optimized_model: OptimizedModel +) -> PerformanceMetrics: """Return performance metrics for optimized model.""" cycles = optimized_model.nng.cycles @@ -96,4 +240,5 @@ def _performance_metrics(optimized_model: OptimizedModel) -> PerformanceMetrics: dram_memory_area_size=memory_usage(MemArea.Dram), on_chip_flash_memory_area_size=memory_usage(MemArea.OnChipFlash), off_chip_flash_memory_area_size=memory_usage(MemArea.OffChipFlash), + layerwise_performance_info=layerwise_performance_info, ) diff --git a/src/mlia/core/workflow.py b/src/mlia/core/workflow.py index 9f8ac83..c645857 100644 --- a/src/mlia/core/workflow.py +++ b/src/mlia/core/workflow.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright 2022-2023, Arm Limited and/or its affiliates. +# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates. # SPDX-License-Identifier: Apache-2.0 """Module for executors. @@ -114,6 +114,7 @@ class DefaultWorkflowExecutor(WorkflowExecutor): self.before_start() collected_data = self.collect_data() + analyzed_data = self.analyze_data(collected_data) self.produce_advice(analyzed_data) diff --git a/src/mlia/target/ethos_u/advisor.py b/src/mlia/target/ethos_u/advisor.py index 9f5b3a6..b5932d0 100644 --- a/src/mlia/target/ethos_u/advisor.py +++ b/src/mlia/target/ethos_u/advisor.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright 2022-2023, Arm Limited and/or its affiliates. +# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates. # SPDX-License-Identifier: Apache-2.0 """Ethos-U MLIA module.""" from __future__ import annotations @@ -44,6 +44,7 @@ class EthosUInferenceAdvisor(DefaultInferenceAdvisor): """Return list of the data collectors.""" model = self.get_model(context) target_config = self._get_target_config(context) + target_config.compiler_options.output_dir = context.output_dir # type: ignore backends = self._get_backends(context) collectors: list[DataCollector] = [] diff --git a/src/mlia/target/ethos_u/performance.py b/src/mlia/target/ethos_u/performance.py index a0526e4..8decb75 100644 --- a/src/mlia/target/ethos_u/performance.py +++ b/src/mlia/target/ethos_u/performance.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright 2022-2023, Arm Limited and/or its affiliates. +# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates. # SPDX-License-Identifier: Apache-2.0 """Performance estimation.""" from __future__ import annotations @@ -13,6 +13,7 @@ import mlia.backend.vela.compiler as vela_comp import mlia.backend.vela.performance as vela_perf from mlia.backend.corstone import is_corstone_backend from mlia.backend.corstone.performance import estimate_performance +from mlia.backend.vela.performance import LayerwisePerfInfo from mlia.core.context import Context from mlia.core.performance import PerformanceEstimator from mlia.nn.select import OptimizationSettings @@ -95,16 +96,23 @@ class PerformanceMetrics: target_config: EthosUConfiguration npu_cycles: NPUCycles | None memory_usage: MemoryUsage | None + layerwise_perf_info: LayerwisePerfInfo | None def in_kilobytes(self) -> PerformanceMetrics: """Return metrics with memory usage in KiB.""" if self.memory_usage is None: return PerformanceMetrics( - self.target_config, self.npu_cycles, self.memory_usage + self.target_config, + self.npu_cycles, + self.memory_usage, + self.layerwise_perf_info, ) return PerformanceMetrics( - self.target_config, self.npu_cycles, self.memory_usage.in_kilobytes() + self.target_config, + self.npu_cycles, + self.memory_usage.in_kilobytes(), + self.layerwise_perf_info, ) @@ -119,7 +127,9 @@ class OptimizationPerformanceMetrics: class VelaPerformanceEstimator( - PerformanceEstimator[Union[Path, ModelConfiguration], MemoryUsage] + PerformanceEstimator[ + Union[Path, ModelConfiguration], tuple[MemoryUsage, LayerwisePerfInfo] + ] ): """Vela based performance estimator.""" @@ -128,7 +138,9 @@ class VelaPerformanceEstimator( self.context = context self.target = target_config - def estimate(self, model: Path | ModelConfiguration) -> MemoryUsage: + def estimate( + self, model: Path | ModelConfiguration + ) -> tuple[MemoryUsage, LayerwisePerfInfo]: """Estimate performance.""" with log_action("Getting the memory usage metrics ..."): model_path = ( @@ -141,12 +153,15 @@ class VelaPerformanceEstimator( model_path, self.target.compiler_options ) - return MemoryUsage( - vela_perf_metrics.sram_memory_area_size, - vela_perf_metrics.dram_memory_area_size, - vela_perf_metrics.unknown_memory_area_size, - vela_perf_metrics.on_chip_flash_memory_area_size, - vela_perf_metrics.off_chip_flash_memory_area_size, + return ( + MemoryUsage( + vela_perf_metrics.sram_memory_area_size, + vela_perf_metrics.dram_memory_area_size, + vela_perf_metrics.unknown_memory_area_size, + vela_perf_metrics.on_chip_flash_memory_area_size, + vela_perf_metrics.off_chip_flash_memory_area_size, + ), + vela_perf_metrics.layerwise_performance_info, ) @@ -238,12 +253,15 @@ class EthosUPerformanceEstimator( memory_usage = None npu_cycles = None + layerwise_perf_info = None for backend in self.backends: if backend == "vela": vela_estimator = VelaPerformanceEstimator( self.context, self.target_config ) - memory_usage = vela_estimator.estimate(tflite_model) + memory_usage, layerwise_perf_info = vela_estimator.estimate( + tflite_model + ) elif is_corstone_backend(backend): corstone_estimator = CorstonePerformanceEstimator( self.context, self.target_config, backend @@ -256,4 +274,6 @@ class EthosUPerformanceEstimator( backend, ) - return PerformanceMetrics(self.target_config, npu_cycles, memory_usage) + return PerformanceMetrics( + self.target_config, npu_cycles, memory_usage, layerwise_perf_info + ) diff --git a/src/mlia/target/ethos_u/reporters.py b/src/mlia/target/ethos_u/reporters.py index 711f036..b747ce5 100644 --- a/src/mlia/target/ethos_u/reporters.py +++ b/src/mlia/target/ethos_u/reporters.py @@ -1,14 +1,16 @@ -# SPDX-FileCopyrightText: Copyright 2022-2023, Arm Limited and/or its affiliates. +# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates. # SPDX-License-Identifier: Apache-2.0 """Reports module.""" from __future__ import annotations from collections import defaultdict +from dataclasses import fields from typing import Any from typing import Callable from mlia.backend.vela.compat import Operator from mlia.backend.vela.compat import Operators +from mlia.backend.vela.performance import layer_metrics from mlia.core.advice_generation import Advice from mlia.core.reporters import report_advice from mlia.core.reporting import BytesCell @@ -16,6 +18,7 @@ from mlia.core.reporting import Cell from mlia.core.reporting import ClockCell from mlia.core.reporting import Column from mlia.core.reporting import CompoundFormatter +from mlia.core.reporting import CompoundReport from mlia.core.reporting import CyclesCell from mlia.core.reporting import Format from mlia.core.reporting import NestedReport @@ -237,10 +240,59 @@ def report_target_details(target_config: EthosUConfiguration) -> Report: ) -def metrics_as_records(perf_metrics: list[PerformanceMetrics]) -> list[tuple]: +def metrics_as_records( + perf_metrics: list[PerformanceMetrics], +) -> tuple[list[tuple], list[tuple]]: """Convert perf metrics object into list of records.""" perf_metrics = [item.in_kilobytes() for item in perf_metrics] + def _layerwise_as_metrics( + perf_metrics: list[PerformanceMetrics], + ) -> list[tuple]: + metric_map = defaultdict(list) # type: dict[str, list] + format_types = {int: "12,d", str: "", float: "12.2f"} + rows = [] + for perf_metric in perf_metrics: + if perf_metric.layerwise_perf_info: + for layerwise_metric in perf_metric.layerwise_perf_info.layerwise_info: + field_names = [ + field.name + for field in fields(layerwise_metric) + if field.name != "name" + ] + duplicate_idx = 1 + dict_key = getattr(layerwise_metric, "name") + while dict_key in metric_map: + dict_key = ( + getattr(layerwise_metric, "name") + + " (" + + str(duplicate_idx) + + ")" + ) + duplicate_idx += 1 + for field_name in field_names: + metric_map[dict_key].append( + getattr(layerwise_metric, field_name) + ) + rows = [ + ( + name, + *( + Cell( + value, + Format( + str_fmt=format_types[type(value)] + if type(value) in format_types + else "" + ), + ) + for value in values + ), + ) + for name, values in metric_map.items() + ] + return rows + def _cycles_as_records(perf_metrics: list[PerformanceMetrics]) -> list[tuple]: metric_map = defaultdict(list) for metrics in perf_metrics: @@ -306,7 +358,7 @@ def metrics_as_records(perf_metrics: list[PerformanceMetrics]) -> list[tuple]: _data_beats_as_records, ) for metrics in metrics_func(perf_metrics) - ] + ], _layerwise_as_metrics(perf_metrics) def report_perf_metrics( @@ -315,9 +367,9 @@ def report_perf_metrics( """Return comparison table for the performance metrics.""" if isinstance(perf_metrics, PerformanceMetrics): perf_metrics = [perf_metrics] + rows, layerwise_rows = metrics_as_records(perf_metrics) - rows = metrics_as_records(perf_metrics) - + # Create a seperate table for layerwise data if len(perf_metrics) == 2: return Table( columns=[ @@ -349,17 +401,42 @@ def report_perf_metrics( alias="performance_metrics", notes="IMPORTANT: The performance figures above refer to NPU only", ) - - return Table( - columns=[ - Column("Metric", alias="metric", fmt=Format(wrap_width=30)), - Column("Value", alias="value", fmt=Format(wrap_width=15)), - Column("Unit", alias="unit", fmt=Format(wrap_width=15)), - ], - rows=rows, - name="Performance metrics", - alias="performance_metrics", - notes="IMPORTANT: The performance figures above refer to NPU only", + if layerwise_rows == []: + return Table( + columns=[ + Column("Metric", alias="metric", fmt=Format(wrap_width=30)), + Column("Value", alias="value", fmt=Format(wrap_width=15)), + Column("Unit", alias="unit", fmt=Format(wrap_width=15)), + ], + rows=rows, + name="Performance metrics", + alias="performance_metrics", + notes="IMPORTANT: The performance figures above refer to NPU only", + ) + return CompoundReport( + [ + Table( + columns=[ + Column("Metric", alias="metric", fmt=Format(wrap_width=30)), + Column("Value", alias="value", fmt=Format(wrap_width=15)), + Column("Unit", alias="unit", fmt=Format(wrap_width=15)), + ], + rows=rows, + name="Performance metrics", + alias="performance_metrics", + notes="IMPORTANT: The performance figures above refer to NPU only", + ), + Table( + columns=[ + Column(name, alias=alias, fmt=Format(wrap_width=30)) + for alias, _, name in layer_metrics + ], + rows=layerwise_rows, + name="Layer-Wise Metrics", + alias="layerwise_metrics", + notes="", + ), + ] ) diff --git a/tests/conftest.py b/tests/conftest.py index 345eb8d..9dc1d16 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright 2022-2023, Arm Limited and/or its affiliates. +# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates. # SPDX-License-Identifier: Apache-2.0 """Pytest conf module.""" import shutil @@ -50,6 +50,32 @@ def invalid_input_model_file(test_tflite_invalid_model: Path) -> Path: return test_tflite_invalid_model +@pytest.fixture(scope="session", name="empty_test_csv_file") +def fixture_empty_test_csv_file( # pylint: disable=too-many-locals + test_csv_path: Path, +) -> Path: + """Return empty test csv file path.""" + return test_csv_path / "empty_test_csv_file.csv" + + +@pytest.fixture(scope="session", name="test_csv_file") +def fixture_test_csv_file( # pylint: disable=too-many-locals + test_csv_path: Path, +) -> Path: + """Return test csv file path.""" + return test_csv_path / "test_csv_file.csv" + + +@pytest.fixture(scope="session", name="test_csv_path") +def fixture_test_csv_path( # pylint: disable=too-many-locals + tmp_path_factory: pytest.TempPathFactory, +) -> Generator[Path, None, None]: + """Return test csv file path.""" + tmp_path = tmp_path_factory.mktemp("csv_files") + yield tmp_path + shutil.rmtree(tmp_path) + + def get_test_keras_model() -> tf.keras.Model: """Return test Keras model.""" model = tf.keras.Sequential( @@ -87,6 +113,9 @@ def fixture_test_models_path( """Provide path to the test models.""" tmp_path = tmp_path_factory.mktemp("models") + # Need an output directory for verbose performance + Path("output").mkdir(exist_ok=True) + # Keras Model keras_model = get_test_keras_model() save_keras_model(keras_model, tmp_path / TEST_MODEL_KERAS_FILE) diff --git a/tests/test_backend_vela_compiler.py b/tests/test_backend_vela_compiler.py index 9f09efb..5554efb 100644 --- a/tests/test_backend_vela_compiler.py +++ b/tests/test_backend_vela_compiler.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright 2022-2023, Arm Limited and/or its affiliates. +# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates. # SPDX-License-Identifier: Apache-2.0 """Tests for module vela/compiler.""" from pathlib import Path @@ -29,7 +29,7 @@ def test_default_vela_compiler() -> None: assert default_compiler.tensor_allocator == TensorAllocator.HillClimb assert default_compiler.cpu_tensor_alignment == 16 assert default_compiler.optimization_strategy == OptimizationStrategy.Performance - assert default_compiler.output_dir == "output" + assert default_compiler.output_dir == Path("output") assert default_compiler.get_config() == { "accelerator_config": "ethos-u55-256", @@ -88,7 +88,7 @@ def test_vela_compiler_with_parameters(test_resources_path: Path) -> None: tensor_allocator="Greedy", cpu_tensor_alignment=4, optimization_strategy="Size", - output_dir="custom_output", + output_dir=Path("custom_output"), ) compiler = VelaCompiler(compiler_options) @@ -101,7 +101,7 @@ def test_vela_compiler_with_parameters(test_resources_path: Path) -> None: assert compiler.tensor_allocator == TensorAllocator.Greedy assert compiler.cpu_tensor_alignment == 4 assert compiler.optimization_strategy == OptimizationStrategy.Size - assert compiler.output_dir == "custom_output" + assert compiler.output_dir == Path("custom_output") assert compiler.get_config() == { "accelerator_config": "ethos-u65-256", @@ -156,6 +156,25 @@ def test_compile_model(test_tflite_model: Path) -> None: assert isinstance(optimized_model, OptimizedModel) +def test_csv_file_created(test_tflite_model: Path) -> None: + """Test that a csv file is created by the vela backend""" + compiler = VelaCompiler( + EthosUConfiguration.load_profile("ethos-u55-256").compiler_options + ) + csv_file_name = test_tflite_model.stem + "_per-layer.csv" + compiler.compile_model(test_tflite_model) + assert (compiler.output_dir / csv_file_name).is_file() + + +# Test to see if the new flag is passed to Vela +def test_verbose_flag_passed() -> None: + """Test that the verbose_performance flag is passed to vela backend""" + compiler = VelaCompiler( + EthosUConfiguration.load_profile("ethos-u55-256").compiler_options + ) + assert compiler.return_compiler_options().verbose_performance + + def test_compile_model_fail_sram_exceeded( test_tflite_model: Path, monkeypatch: pytest.MonkeyPatch ) -> None: diff --git a/tests/test_backend_vela_performance.py b/tests/test_backend_vela_performance.py index df2ce08..5800630 100644 --- a/tests/test_backend_vela_performance.py +++ b/tests/test_backend_vela_performance.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright 2022-2023, Arm Limited and/or its affiliates. +# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates. # SPDX-License-Identifier: Apache-2.0 """Tests for module vela/performance.""" from pathlib import Path @@ -8,6 +8,9 @@ import pytest from mlia.backend.vela.compiler import optimize_model from mlia.backend.vela.performance import estimate_performance +from mlia.backend.vela.performance import layer_metrics +from mlia.backend.vela.performance import LayerwisePerfInfo +from mlia.backend.vela.performance import parse_layerwise_perf_csv from mlia.backend.vela.performance import PerformanceMetrics from mlia.target.ethos_u.config import EthosUConfiguration @@ -22,6 +25,123 @@ def test_estimate_performance(test_tflite_model: Path) -> None: assert isinstance(perf_metrics, PerformanceMetrics) +def test_estimate_performance_csv_parser_called( + monkeypatch: pytest.MonkeyPatch, test_tflite_model: Path +) -> None: + """Test that estimate_performance from backend.vela.performance is called.""" + target_config = EthosUConfiguration.load_profile("ethos-u55-256") + csv_file_name = target_config.compiler_options.output_dir / ( + test_tflite_model.stem + "_per-layer.csv" + ) + mock = MagicMock() + monkeypatch.setattr("mlia.backend.vela.performance.parse_layerwise_perf_csv", mock) + estimate_performance(test_tflite_model, target_config.compiler_options) + mock.assert_called_with(vela_csv_file=csv_file_name, metrics=layer_metrics) + + +LAYERWISE_TMP_DATA_STR = """ +TFLite_operator,NNG Operator,SRAM Usage,Peak%,Op Cycles,Network%,NPU,SRAM AC,DRAM AC,OnFlash AC,OffFlash AC,MAC Count,Network%,Util%,Name +CONV_2D,Conv2DBias,11936,54.65201465201465,7312.0,17.648194632168373,7312.0,2000.0,0.0,0.0,0.0,73008,8.653353814644136,3.9002666849015313,sequential/conv1/Relu;sequential/conv1/Conv2D +MAX_POOL_2D,MaxPool,10944,50.10989010989011,2992.0,7.22147132651091,1330.0,2992.0,0.0,0.0,0.0,6912,0.819252432155658,0.9024064171122994,sequential/max_pooling2d/MaxPool +""".strip() + +LAYERWISE_TMP_DATA_MISSING_HEADER_STR = """ +TFLite_operator,NNG Operator,Peak%,Op Cycles,Network%,NPU,SRAM AC,DRAM AC,OnFlash AC,OffFlash AC,MAC Count,Network%,Util%,Name +CONV_2D,Conv2DBias,54.65201465201465,7312.0,17.648194632168373,7312.0,2000.0,0.0,0.0,0.0,73008,8.653353814644136,3.9002666849015313,sequential/conv1/Relu;sequential/conv1/Conv2D +MAX_POOL_2D,MaxPool,50.10989010989011,2992.0,7.22147132651091,1330.0,2992.0,0.0,0.0,0.0,6912,0.819252432155658,0.9024064171122994,sequential/max_pooling2d/MaxPool +""".strip() + +LAYERWISE_MULTI_HEADER_TMP_DATA_STR = """ +TFLite_operator,NNG Operator,SRAM Usage,Peak%,Op Cycles,Network%,NPU,SRAM AC,DRAM AC,OnFlash AC,OffFlash AC,MAC Count,Network%,Util%,Name +CONV_2D,Conv2DBias,11936,54.65201465201465,7312.0,17.648194632168373,7312.0,2000.0,0.0,0.0,0.0,73008,8.653353814644136,3.9002666849015313,sequential/conv1/Relu;sequential/conv1/Conv2D +TFLite_operator,NNG Operator,SRAM Usage,Peak%,Op Cycles,Network%,NPU,SRAM AC,DRAM AC,OnFlash AC,OffFlash AC,MAC Count,Network%,Util%,Name +MAX_POOL_2D,MaxPool,10944,50.10989010989011,2992.0,7.22147132651091,1330.0,2992.0,0.0,0.0,0.0,6912,0.819252432155658,0.9024064171122994,sequential/max_pooling2d/MaxPool +""".strip() + + +TMP_DATA_EXPECTED_STRING = "\ +Name: sequential/conv1/Relu;sequential/conv1/Conv2D, \ +TFLite_operator: CONV_2D, \ +SRAM Usage: 11936, \ +Op Cycles: 7312, \ +NPU: 7312, \ +SRAM AC: 2000, \ +DRAM AC: 0, \ +OnFlash AC: 0, \ +OffFlash AC: 0, \ +MAC Count: 73008, \ +Util%: 3.9002666849015313, \ +\ +Name: sequential/max_pooling2d/MaxPool, \ +TFLite_operator: MAX_POOL_2D, \ +SRAM Usage: 10944, \ +Op Cycles: 2992, \ +NPU: 1330, \ +SRAM AC: 2992, \ +DRAM AC: 0, \ +OnFlash AC: 0, \ +OffFlash AC: 0, \ +MAC Count: 6912, \ +Util%: 0.9024064171122994, \ +" + + +@pytest.mark.parametrize( + "input_csv_content, expected_output", + [ + (LAYERWISE_TMP_DATA_STR, TMP_DATA_EXPECTED_STRING), + ( + LAYERWISE_MULTI_HEADER_TMP_DATA_STR, + TMP_DATA_EXPECTED_STRING, + ), + ], +) +def test_estimate_performance_parse_layerwise_csv_file( + test_csv_file: Path, input_csv_content: str, expected_output: str +) -> None: + """Test that parsing a csv file produces a LayerwisePerfInfo object.""" + with open(test_csv_file, "w", encoding="utf8") as csv_file: + csv_file.write(input_csv_content) + layerwise_object = parse_layerwise_perf_csv(test_csv_file, layer_metrics) + strings_to_check_layerwise_object = repr(layerwise_object) + assert isinstance(layerwise_object, LayerwisePerfInfo) + assert expected_output == strings_to_check_layerwise_object + + +def test_estimate_performance_parse_layerwise_csv_file_with_missing_headers( + test_csv_file: Path, +) -> None: + """Test that ensures a KeyError + is raised when a csv file is parsed with missing headers. + """ + with open(test_csv_file, "w", encoding="utf8") as csv_file: + csv_file.write(LAYERWISE_TMP_DATA_MISSING_HEADER_STR) + with pytest.raises(KeyError, match="Generated CSV missing expected headers"): + parse_layerwise_perf_csv(test_csv_file, layer_metrics) + + +def test_estimate_performance_parse_layerwise_csv_file_missing_file() -> None: + """Test that ensures a FileNotFoundError + is raised when a non-existent csv file is parsed. + """ + with pytest.raises( + FileNotFoundError, match="CSV File not found at missing_file.csv" + ): + parse_layerwise_perf_csv(Path("missing_file.csv"), layer_metrics) + + +def test_estimate_performance_parse_layerwise_empty_csv_file( + empty_test_csv_file: Path, +) -> None: + """Test that ensures that if an empty csv file + is parsed, we return an empty layerwise object. + """ + empty_test_csv_file.touch() + layerwise_object = parse_layerwise_perf_csv(empty_test_csv_file, layer_metrics) + assert isinstance(layerwise_object, LayerwisePerfInfo) + assert len(layerwise_object.layerwise_info) == 0 + + def test_estimate_performance_already_optimized( tmp_path: Path, test_tflite_model: Path ) -> None: diff --git a/tests/test_cli_commands.py b/tests/test_cli_commands.py index 6b1f19d..480e642 100644 --- a/tests/test_cli_commands.py +++ b/tests/test_cli_commands.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright 2022-2023, Arm Limited and/or its affiliates. +# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates. # SPDX-License-Identifier: Apache-2.0 """Tests for cli.commands module.""" from __future__ import annotations @@ -13,6 +13,7 @@ from unittest.mock import MagicMock import pytest from mlia.backend.manager import DefaultInstallationManager +from mlia.backend.vela.performance import LayerwisePerfInfo from mlia.cli.commands import backend_install from mlia.cli.commands import backend_list from mlia.cli.commands import backend_uninstall @@ -207,6 +208,7 @@ def mock_performance_estimation(monkeypatch: pytest.MonkeyPatch) -> None: EthosUConfiguration.load_profile("ethos-u55-256"), NPUCycles(1, 2, 3, 4, 5, 6), MemoryUsage(1, 2, 3, 4, 5), + LayerwisePerfInfo(layerwise_info=[]), ) monkeypatch.setattr( "mlia.target.ethos_u.data_collection.EthosUPerformanceEstimator.estimate", diff --git a/tests/test_target_ethos_u_config.py b/tests/test_target_ethos_u_config.py index 49e7a40..1025827 100644 --- a/tests/test_target_ethos_u_config.py +++ b/tests/test_target_ethos_u_config.py @@ -4,6 +4,7 @@ from __future__ import annotations from contextlib import ExitStack as does_not_raise +from pathlib import Path from typing import Any import pytest @@ -25,7 +26,7 @@ def test_compiler_options_default_init() -> None: assert opts.tensor_allocator == "HillClimb" assert opts.cpu_tensor_alignment == 16 assert opts.optimization_strategy == "Performance" - assert opts.output_dir == "output" + assert opts.output_dir == Path("output") def test_ethosu_target() -> None: diff --git a/tests/test_target_ethos_u_data_analysis.py b/tests/test_target_ethos_u_data_analysis.py index 713e8ef..3cddf10 100644 --- a/tests/test_target_ethos_u_data_analysis.py +++ b/tests/test_target_ethos_u_data_analysis.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright 2022-2023, Arm Limited and/or its affiliates. +# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates. # SPDX-License-Identifier: Apache-2.0 """Tests for Ethos-U data analysis module.""" from __future__ import annotations @@ -10,6 +10,7 @@ import pytest from mlia.backend.vela.compat import NpuSupported from mlia.backend.vela.compat import Operator from mlia.backend.vela.compat import Operators +from mlia.backend.vela.performance import LayerwisePerfInfo from mlia.core.common import DataItem from mlia.core.data_analysis import Fact from mlia.nn.select import OptimizationSettings @@ -98,6 +99,7 @@ def test_perf_metrics_diff() -> None: NPUCycles(1, 2, 3, 4, 5, 6), # memory metrics are in kilobytes MemoryUsage(*[i * 1024 for i in range(1, 6)]), # type: ignore + LayerwisePerfInfo(layerwise_info=[]), ), [ [ @@ -111,6 +113,7 @@ def test_perf_metrics_diff() -> None: MemoryUsage( *[i * 1024 for i in range(1, 6)] # type: ignore ), + LayerwisePerfInfo(layerwise_info=[]), ), ], ], @@ -141,6 +144,7 @@ def test_perf_metrics_diff() -> None: NPUCycles(1, 2, 3, 4, 5, 6), # memory metrics are in kilobytes MemoryUsage(*[i * 1024 for i in range(1, 6)]), # type: ignore + LayerwisePerfInfo(layerwise_info=[]), ), [], ), diff --git a/tests/test_target_ethos_u_data_collection.py b/tests/test_target_ethos_u_data_collection.py index be93c26..3868b95 100644 --- a/tests/test_target_ethos_u_data_collection.py +++ b/tests/test_target_ethos_u_data_collection.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright 2022-2023, Arm Limited and/or its affiliates. +# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates. # SPDX-License-Identifier: Apache-2.0 """Tests for the data collection module for Ethos-U.""" from pathlib import Path @@ -7,6 +7,7 @@ from unittest.mock import MagicMock import pytest from mlia.backend.vela.compat import Operators +from mlia.backend.vela.performance import LayerwisePerfInfo from mlia.core.context import Context from mlia.core.context import ExecutionContext from mlia.core.data_collection import DataCollector @@ -162,6 +163,7 @@ def mock_performance_estimation( target, NPUCycles(1, 2, 3, 4, 5, 6), MemoryUsage(1, 2, 3, 4, 5), + LayerwisePerfInfo(layerwise_info=[]), ) monkeypatch.setattr( "mlia.target.ethos_u.data_collection.EthosUPerformanceEstimator.estimate", diff --git a/tests/test_target_ethos_u_reporters.py b/tests/test_target_ethos_u_reporters.py index debeeb2..6dff6e1 100644 --- a/tests/test_target_ethos_u_reporters.py +++ b/tests/test_target_ethos_u_reporters.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright 2022-2023, Arm Limited and/or its affiliates. +# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates. # SPDX-License-Identifier: Apache-2.0 """Tests for reports module.""" from __future__ import annotations @@ -10,18 +10,246 @@ import pytest from mlia.backend.vela.compat import NpuSupported from mlia.backend.vela.compat import Operator +from mlia.backend.vela.performance import LayerPerfInfo +from mlia.backend.vela.performance import LayerwisePerfInfo +from mlia.core.reporting import CompoundReport from mlia.core.reporting import Report from mlia.core.reporting import Table from mlia.nn.tensorflow.tflite_compat import TFLiteCompatibilityInfo from mlia.nn.tensorflow.tflite_compat import TFLiteCompatibilityStatus from mlia.target.ethos_u.config import EthosUConfiguration +from mlia.target.ethos_u.performance import MemorySizeType +from mlia.target.ethos_u.performance import MemoryUsage +from mlia.target.ethos_u.performance import PerformanceMetrics from mlia.target.ethos_u.reporters import ethos_u_formatters from mlia.target.ethos_u.reporters import report_operators +from mlia.target.ethos_u.reporters import report_perf_metrics from mlia.target.ethos_u.reporters import report_target_details from mlia.target.registry import profile from mlia.utils.console import remove_ascii_codes +# pylint: disable=line-too-long +@pytest.mark.parametrize( + "perf_metrics, expected_plain_text, expected_json_dict", + [ + ( + [ + PerformanceMetrics( + target_config=EthosUConfiguration.load_profile("ethos-u55-256"), + npu_cycles=None, + memory_usage=MemoryUsage( + sram_memory_area_size=10, + dram_memory_area_size=0, + unknown_memory_area_size=0, + on_chip_flash_memory_area_size=0, + off_chip_flash_memory_area_size=20, + memory_size_type=MemorySizeType.KILOBYTES, + ), + layerwise_perf_info=LayerwisePerfInfo( + layerwise_info=[ + LayerPerfInfo( + name="Test Layer", + tflite_operator="test_operator", + sram_usage=0, + op_cycles=0.0, + npu_cycles=0.0, + sram_access_cycles=0.0, + dram_access_cycles=0.0, + on_chip_flash_access_cycles=0.0, + off_chip_flash_access_cycles=0.0, + mac_count=0, + util_mac_percentage=0.0, + ), + LayerPerfInfo( + name="Test Layer 1", + tflite_operator="test_operator", + sram_usage=0, + op_cycles=0.0, + npu_cycles=0.0, + sram_access_cycles=0.0, + dram_access_cycles=0.0, + on_chip_flash_access_cycles=0.0, + off_chip_flash_access_cycles=0.0, + mac_count=0, + util_mac_percentage=0.0, + ), + ] + ), + ) + ], + """ +Performance metrics: +┌─────────────────────┬──────────────┬──────┐ +│ Metric │ Value │ Unit │ +╞═════════════════════╪══════════════╪══════╡ +│ SRAM used │ 10.00 │ KiB │ +├─────────────────────┼──────────────┼──────┤ +│ Off-chip flash used │ 20.00 │ KiB │ +└─────────────────────┴──────────────┴──────┘ +IMPORTANT: The performance figures above refer to NPU only +Layer-Wise Metrics: +┌──────────────┬─────────────────┬──────────────┬──────────────┬──────────────┬──────────────┬──────────────┬──────────────┬──────────────┬──────────────┬──────────────┐ +│ Layer Name │ TFLite Operator │ SRAM Usage │ OP Cycles │ NPU Cycles │ SRAM AC │ DRAM AC │ OnFlash AC │ OffFlash AC │ MAC Count │ MAC Util (%) │ +╞══════════════╪═════════════════╪══════════════╪══════════════╪══════════════╪══════════════╪══════════════╪══════════════╪══════════════╪══════════════╪══════════════╡ +│ Test Layer │ test_operator │ 0 │ 0.00 │ 0.00 │ 0.00 │ 0.00 │ 0.00 │ 0.00 │ 0 │ 0.00 │ +├──────────────┼─────────────────┼──────────────┼──────────────┼──────────────┼──────────────┼──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤ +│ Test Layer 1 │ test_operator │ 0 │ 0.00 │ 0.00 │ 0.00 │ 0.00 │ 0.00 │ 0.00 │ 0 │ 0.00 │ +└──────────────┴─────────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────────┘ +""".strip(), + { + "performance_metrics": [ + {"metric": "SRAM used", "value": 10, "unit": "KiB"}, + {"metric": "Off-chip flash used", "value": 20, "unit": "KiB"}, + ], + "layerwise_metrics": [ + { + "name": "Test Layer", + "tflite_operator": "test_operator", + "sram_usage": 0, + "op_cycles": 0.0, + "npu_cycles": 0.0, + "sram_access_cycles": 0.0, + "dram_access_cycles": 0.0, + "on_chip_flash_access_cycles": 0.0, + "off_chip_flash_access_cycles": 0.0, + "mac_count": 0, + "util_mac_percentage": 0.0, + }, + { + "name": "Test Layer 1", + "tflite_operator": "test_operator", + "sram_usage": 0, + "op_cycles": 0.0, + "npu_cycles": 0.0, + "sram_access_cycles": 0.0, + "dram_access_cycles": 0.0, + "on_chip_flash_access_cycles": 0.0, + "off_chip_flash_access_cycles": 0.0, + "mac_count": 0, + "util_mac_percentage": 0.0, + }, + ], + }, + ), + ( + [ + PerformanceMetrics( + target_config=EthosUConfiguration.load_profile("ethos-u55-256"), + npu_cycles=None, + memory_usage=MemoryUsage( + sram_memory_area_size=10, + dram_memory_area_size=0, + unknown_memory_area_size=0, + on_chip_flash_memory_area_size=0, + off_chip_flash_memory_area_size=20, + memory_size_type=MemorySizeType.KILOBYTES, + ), + layerwise_perf_info=LayerwisePerfInfo( + layerwise_info=[ + LayerPerfInfo( + name="Test Layer", + tflite_operator="test_operator", + sram_usage=0, + op_cycles=0.0, + npu_cycles=0.0, + sram_access_cycles=0.0, + dram_access_cycles=0.0, + on_chip_flash_access_cycles=0.0, + off_chip_flash_access_cycles=0.0, + mac_count=0, + util_mac_percentage=0.0, + ), + LayerPerfInfo( + name="Test Layer", + tflite_operator="test_operator", + sram_usage=0, + op_cycles=0.0, + npu_cycles=0.0, + sram_access_cycles=0.0, + dram_access_cycles=0.0, + on_chip_flash_access_cycles=0.0, + off_chip_flash_access_cycles=0.0, + mac_count=0, + util_mac_percentage=0.0, + ), + ] + ), + ) + ], + """ +Performance metrics: +┌─────────────────────┬──────────────┬──────┐ +│ Metric │ Value │ Unit │ +╞═════════════════════╪══════════════╪══════╡ +│ SRAM used │ 10.00 │ KiB │ +├─────────────────────┼──────────────┼──────┤ +│ Off-chip flash used │ 20.00 │ KiB │ +└─────────────────────┴──────────────┴──────┘ +IMPORTANT: The performance figures above refer to NPU only +Layer-Wise Metrics: +┌────────────────┬─────────────────┬──────────────┬──────────────┬──────────────┬──────────────┬──────────────┬──────────────┬──────────────┬──────────────┬──────────────┐ +│ Layer Name │ TFLite Operator │ SRAM Usage │ OP Cycles │ NPU Cycles │ SRAM AC │ DRAM AC │ OnFlash AC │ OffFlash AC │ MAC Count │ MAC Util (%) │ +╞════════════════╪═════════════════╪══════════════╪══════════════╪══════════════╪══════════════╪══════════════╪══════════════╪══════════════╪══════════════╪══════════════╡ +│ Test Layer │ test_operator │ 0 │ 0.00 │ 0.00 │ 0.00 │ 0.00 │ 0.00 │ 0.00 │ 0 │ 0.00 │ +├────────────────┼─────────────────┼──────────────┼──────────────┼──────────────┼──────────────┼──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤ +│ Test Layer (1) │ test_operator │ 0 │ 0.00 │ 0.00 │ 0.00 │ 0.00 │ 0.00 │ 0.00 │ 0 │ 0.00 │ +└────────────────┴─────────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────────┘ +""".strip(), + { + "performance_metrics": [ + {"metric": "SRAM used", "value": 10, "unit": "KiB"}, + {"metric": "Off-chip flash used", "value": 20, "unit": "KiB"}, + ], + "layerwise_metrics": [ + { + "name": "Test Layer", + "tflite_operator": "test_operator", + "sram_usage": 0, + "op_cycles": 0.0, + "npu_cycles": 0.0, + "sram_access_cycles": 0.0, + "dram_access_cycles": 0.0, + "on_chip_flash_access_cycles": 0.0, + "off_chip_flash_access_cycles": 0.0, + "mac_count": 0, + "util_mac_percentage": 0.0, + }, + { + "name": "Test Layer (1)", + "tflite_operator": "test_operator", + "sram_usage": 0, + "op_cycles": 0.0, + "npu_cycles": 0.0, + "sram_access_cycles": 0.0, + "dram_access_cycles": 0.0, + "on_chip_flash_access_cycles": 0.0, + "off_chip_flash_access_cycles": 0.0, + "mac_count": 0, + "util_mac_percentage": 0.0, + }, + ], + }, + ), + ], +) +# pylint: enable=line-too-long +def test_report_perf_metrics( + perf_metrics: PerformanceMetrics, + expected_plain_text: str, + expected_json_dict: dict, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Test report_perf_metrics formatter.""" + monkeypatch.setenv("COLUMNS", "5000") + report = report_perf_metrics(perf_metrics) + assert isinstance(report, CompoundReport) + plain_text = remove_ascii_codes(report.to_plain_text()) + assert plain_text == expected_plain_text + json_dict = report.to_json() + assert json_dict == expected_json_dict + + @pytest.mark.parametrize( "ops, expected_plain_text, expected_json_dict", [ -- cgit v1.2.1