diff options
Diffstat (limited to 'src/mlia/backend')
-rw-r--r-- | src/mlia/backend/vela/compiler.py | 9 | ||||
-rw-r--r-- | src/mlia/backend/vela/performance.py | 151 |
2 files changed, 155 insertions, 5 deletions
diff --git a/src/mlia/backend/vela/compiler.py b/src/mlia/backend/vela/compiler.py index b591056..fe9e365 100644 --- a/src/mlia/backend/vela/compiler.py +++ b/src/mlia/backend/vela/compiler.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright 2022-2023, Arm Limited and/or its affiliates. +# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates. # SPDX-License-Identifier: Apache-2.0 """Vela compiler wrapper module.""" from __future__ import annotations @@ -90,7 +90,7 @@ class VelaCompilerOptions: # pylint: disable=too-many-instance-attributes tensor_allocator: TensorAllocatorType = "HillClimb" cpu_tensor_alignment: int = Tensor.AllocationQuantum optimization_strategy: OptimizationStrategyType = "Performance" - output_dir: str = "output" + output_dir: Path = Path("output") recursion_limit: int = 1000 @@ -251,6 +251,7 @@ class VelaCompiler: # pylint: disable=too-many-instance-attributes verbose_register_command_stream=False, verbose_operators=False, verbose_weights=False, + verbose_performance=True, show_cpu_operations=False, tensor_allocator=self.tensor_allocator, timing=False, @@ -258,6 +259,10 @@ class VelaCompiler: # pylint: disable=too-many-instance-attributes cpu_tensor_alignment=self.cpu_tensor_alignment, ) + def return_compiler_options(self) -> CompilerOptions: + """Return CompilerOptions instance for test purposes.""" + return self._compiler_options() + def resolve_compiler_config( vela_compiler_options: VelaCompilerOptions, diff --git a/src/mlia/backend/vela/performance.py b/src/mlia/backend/vela/performance.py index a548b26..72a8ceb 100644 --- a/src/mlia/backend/vela/performance.py +++ b/src/mlia/backend/vela/performance.py @@ -1,11 +1,16 @@ -# SPDX-FileCopyrightText: Copyright 2022-2023, Arm Limited and/or its affiliates. +# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates. # SPDX-License-Identifier: Apache-2.0 """Vela performance module.""" from __future__ import annotations +import csv import logging +import os +from collections import Counter from dataclasses import dataclass +from dataclasses import fields from pathlib import Path +from pydoc import locate import numpy as np from ethosu.vela.npu_performance import PassCycles @@ -37,6 +42,130 @@ class PerformanceMetrics: # pylint: disable=too-many-instance-attributes dram_memory_area_size: int on_chip_flash_memory_area_size: int off_chip_flash_memory_area_size: int + layerwise_performance_info: LayerwisePerfInfo + + +@dataclass +class LayerPerfInfo: # pylint: disable=too-many-instance-attributes + """Contains metrics from a row from the per-layer csv file from Vela.""" + + name: str + tflite_operator: str + sram_usage: int + op_cycles: int + npu_cycles: int + sram_access_cycles: int + dram_access_cycles: int + on_chip_flash_access_cycles: int + off_chip_flash_access_cycles: int + mac_count: int + util_mac_percentage: float + + def __repr__(self) -> str: + """Return String Representation of LayerPerfInfo object.""" + header_values = {key: value for key, value, _ in layer_metrics} + string_to_check = "" + for field in fields(self): + string_to_check += ( + f"{header_values[field.name]}: {getattr(self, field.name)}, " + ) + return string_to_check + + +@dataclass +class LayerwisePerfInfo: + """Contains all the per-layer metrics from the per-layer csv file from Vela.""" + + layerwise_info: list[LayerPerfInfo] + + def __repr__(self) -> str: + """Return String Representation of LayerwisePerfInfo object.""" + strings_to_check_layerwise_object = "" + for layer in self.layerwise_info: + string_to_check = repr(layer) + strings_to_check_layerwise_object += string_to_check + return strings_to_check_layerwise_object + + +complete_layer_metrics = [ + ("tflite_operator", "TFLite_operator", "TFLite Operator"), + ("nng_operator", "NNG Operator", "NNG Operator"), + ("sram_usage", "SRAM Usage", "SRAM Usage"), + ("peak_percentage", "Peak%", "Peak SRAM Usage (%)"), + ("op_cycles", "Op Cycles", "OP Cycles"), + ("network_percentage_1", "Network%", "OP Cycles in Network (%)"), + ("npu_cycles", "NPU", "NPU Cycles"), + ("sram_access_cycles", "SRAM AC", "SRAM AC"), + ("dram_access_cycles", "DRAM AC", "DRAM AC"), + ("on_chip_flash_access_cycles", "OnFlash AC", "OnFlash AC"), + ("off_chip_flash_access_cycles", "OffFlash AC", "OffFlash AC"), + ("mac_count", "MAC Count", "MAC Count"), + ("network_percentage_2", "Network% (1)", "MAC Count in Network (%)"), + ("util_mac_percentage", "Util%", "MAC Util (%)"), + ("name", "Name", "Layer Name"), +] + +OUTPUT_METRICS = [field.name for field in fields(LayerPerfInfo)] + +layer_metrics = [ + layer_metric + for layer_metric in complete_layer_metrics + if layer_metric[0] in OUTPUT_METRICS +] +layer_metrics.sort(key=lambda e: OUTPUT_METRICS.index(e[0])) + + +def parse_layerwise_perf_csv( # pylint: disable=too-many-locals + vela_csv_file: Path, metrics: list +) -> LayerwisePerfInfo: + """Parse the per-layer csv file from backend vela.""" + if not vela_csv_file.is_file(): + raise FileNotFoundError(f"CSV File not found at {vela_csv_file}\n") + layerwise_info = [] # type: list[LayerPerfInfo] + with open(vela_csv_file, encoding="UTF-8") as csv_file: + layerwise_reader = csv.reader(csv_file, delimiter=",") + try: + headers = list(next(layerwise_reader)) + except StopIteration: + return LayerwisePerfInfo(layerwise_info=layerwise_info) + headers_to_check_cpu_ops = headers.copy() + multiple_header_count = Counter(headers) + # Deal with multiple of the same values in CSV header. + for idx, header in enumerate(reversed(headers)): + if multiple_header_count[header] > 1: + headers[len(headers) - idx - 1] = ( + headers[len(headers) - idx - 1] + + " (" + + str(multiple_header_count[header] - 1) + + ")" + ) + multiple_header_count[header] -= 1 + for row in layerwise_reader: + row_as_dict = dict(zip(headers, row)) + if row == headers_to_check_cpu_ops: + continue + try: + key_types = { + field.name: locate(str(field.type)) + for field in fields(LayerPerfInfo) + } + ids_to_metrics = {} + for key, title, _ in metrics: + try: + ids_to_metrics[key] = key_types[key]( # type: ignore + row_as_dict[title] + ) + except ValueError as err: + if "invalid literal for int() with base 10" in str(err): + ids_to_metrics[key] = key_types[key]( # type: ignore + float(row_as_dict[title]) + ) + else: + raise + layerwise_info.append(LayerPerfInfo(**ids_to_metrics)) + except KeyError as err: + raise KeyError("Generated CSV missing expected headers") from err + return LayerwisePerfInfo(layerwise_info=layerwise_info) def estimate_performance( @@ -61,11 +190,26 @@ def estimate_performance( ) optimized_model = vela_compiler.compile_model(initial_model) + output_dir = optimized_model.compiler_options.output_dir + csv_paths = [entry for entry in os.listdir(output_dir) if "per-layer.csv" in entry] + model_name = str(model_path.stem) + csv_file_found = None + for path in csv_paths: + if model_name in path: + csv_file_found = path + if csv_file_found is None: + raise FileNotFoundError("Vela per-layer CSV file not found") + csv_path = Path(output_dir) / csv_file_found + layerwise_performance_info = parse_layerwise_perf_csv( + vela_csv_file=csv_path, metrics=layer_metrics + ) - return _performance_metrics(optimized_model) + return _performance_metrics(layerwise_performance_info, optimized_model) -def _performance_metrics(optimized_model: OptimizedModel) -> PerformanceMetrics: +def _performance_metrics( + layerwise_performance_info: LayerwisePerfInfo, optimized_model: OptimizedModel +) -> PerformanceMetrics: """Return performance metrics for optimized model.""" cycles = optimized_model.nng.cycles @@ -96,4 +240,5 @@ def _performance_metrics(optimized_model: OptimizedModel) -> PerformanceMetrics: dram_memory_area_size=memory_usage(MemArea.Dram), on_chip_flash_memory_area_size=memory_usage(MemArea.OnChipFlash), off_chip_flash_memory_area_size=memory_usage(MemArea.OffChipFlash), + layerwise_performance_info=layerwise_performance_info, ) |