diff options
author | Dmitrii Agibov <dmitrii.agibov@arm.com> | 2022-11-18 16:34:03 +0000 |
---|---|---|
committer | Dmitrii Agibov <dmitrii.agibov@arm.com> | 2022-11-29 14:44:13 +0000 |
commit | 37959522a805a5e23c930ed79aac84920c3cb208 (patch) | |
tree | 484af1240a93c955a72ce2e452432383b6704b56 /src/mlia/tools/vela_wrapper.py | |
parent | 5568f9f000d673ac53e710dcc8991fec6e8a5488 (diff) | |
download | mlia-37959522a805a5e23c930ed79aac84920c3cb208.tar.gz |
Move backends functionality into separate modules
- Move backend management/executor code into module backend_core
- Create separate module for each backend in "backend" module
- Move each backend into corresponding module
- Split Vela wrapper into several submodules
Change-Id: If01b6774aab6501951212541cc5d7f5aa7c97e95
Diffstat (limited to 'src/mlia/tools/vela_wrapper.py')
-rw-r--r-- | src/mlia/tools/vela_wrapper.py | 497 |
1 files changed, 0 insertions, 497 deletions
diff --git a/src/mlia/tools/vela_wrapper.py b/src/mlia/tools/vela_wrapper.py deleted file mode 100644 index 00d2f2c..0000000 --- a/src/mlia/tools/vela_wrapper.py +++ /dev/null @@ -1,497 +0,0 @@ -# SPDX-FileCopyrightText: Copyright 2022, Arm Limited and/or its affiliates. -# SPDX-License-Identifier: Apache-2.0 -"""Vela wrapper module.""" -from __future__ import annotations - -import itertools -import logging -import sys -from dataclasses import dataclass -from pathlib import Path -from typing import Any -from typing import Literal - -import numpy as np -from ethosu.vela.architecture_features import ArchitectureFeatures -from ethosu.vela.compiler_driver import compiler_driver -from ethosu.vela.compiler_driver import CompilerOptions -from ethosu.vela.compiler_driver import TensorAllocator -from ethosu.vela.model_reader import ModelReaderOptions -from ethosu.vela.model_reader import read_model -from ethosu.vela.nn_graph import Graph -from ethosu.vela.nn_graph import NetworkType -from ethosu.vela.npu_performance import PassCycles -from ethosu.vela.operation import CustomType -from ethosu.vela.operation import Op -from ethosu.vela.scheduler import OptimizationStrategy -from ethosu.vela.scheduler import SchedulerOptions -from ethosu.vela.tensor import BandwidthDirection -from ethosu.vela.tensor import MemArea -from ethosu.vela.tensor import Tensor -from ethosu.vela.tflite_mapping import optype_to_builtintype -from ethosu.vela.tflite_model_semantic import TFLiteSemantic -from ethosu.vela.tflite_supported_operators import TFLiteSupportedOperators -from ethosu.vela.tflite_writer import write_tflite -from ethosu.vela.vela import generate_supported_ops - -from mlia.utils.logging import redirect_output - - -logger = logging.getLogger(__name__) - -VELA_INTERNAL_OPS = (Op.Placeholder, Op.SubgraphInput, Op.Const) - - -@dataclass -class PerformanceMetrics: # pylint: disable=too-many-instance-attributes - """Contains all the performance metrics Vela generates in a run.""" - - npu_cycles: int - sram_access_cycles: int - dram_access_cycles: int - on_chip_flash_access_cycles: int - off_chip_flash_access_cycles: int - total_cycles: int - batch_inference_time: float - inferences_per_second: float - batch_size: int - unknown_memory_area_size: int - sram_memory_area_size: int - dram_memory_area_size: int - on_chip_flash_memory_area_size: int - off_chip_flash_memory_area_size: int - - -@dataclass -class NpuSupported: - """Operator's npu supported attribute.""" - - supported: bool - reasons: list[tuple[str, str]] - - -@dataclass -class Operator: - """Model operator.""" - - name: str - op_type: str - run_on_npu: NpuSupported - - @property - def cpu_only(self) -> bool: - """Return true if operator is CPU only.""" - cpu_only_reasons = [("CPU only operator", "")] - return ( - not self.run_on_npu.supported - and self.run_on_npu.reasons == cpu_only_reasons - ) - - -@dataclass -class Operators: - """Model's operators.""" - - ops: list[Operator] - - @property - def npu_supported_ratio(self) -> float: - """Return NPU supported ratio.""" - total = self.total_number - npu_supported = self.npu_supported_number - - if total == 0 or npu_supported == 0: - return 0 - - return npu_supported / total - - @property - def npu_unsupported_ratio(self) -> float: - """Return NPU unsupported ratio.""" - return 1 - self.npu_supported_ratio - - @property - def total_number(self) -> int: - """Return total number of operators.""" - return len(self.ops) - - @property - def npu_supported_number(self) -> int: - """Return number of npu supported operators.""" - return sum(op.run_on_npu.supported for op in self.ops) - - -@dataclass -class Model: - """Model metadata.""" - - nng: Graph - network_type: NetworkType - - @property - def optimized(self) -> bool: - """Return true if model is already optimized.""" - return any( - op.attrs.get("custom_type") == CustomType.ExistingNpuOp - for sg in self.nng.subgraphs - for op in sg.get_all_ops() - ) - - -@dataclass -class OptimizedModel: - """Instance of the Vela optimized model.""" - - nng: Graph - arch: ArchitectureFeatures - compiler_options: CompilerOptions - scheduler_options: SchedulerOptions - - def save(self, output_filename: str | Path) -> None: - """Save instance of the optimized model to the file.""" - write_tflite(self.nng, output_filename) - - -AcceleratorConfigType = Literal[ - "ethos-u55-32", - "ethos-u55-64", - "ethos-u55-128", - "ethos-u55-256", - "ethos-u65-256", - "ethos-u65-512", -] - -TensorAllocatorType = Literal["LinearAlloc", "Greedy", "HillClimb"] - -OptimizationStrategyType = Literal["Performance", "Size"] - - -@dataclass -class VelaCompilerOptions: # pylint: disable=too-many-instance-attributes - """Vela compiler options.""" - - config_files: str | list[str] | None = None - system_config: str = ArchitectureFeatures.DEFAULT_CONFIG - memory_mode: str = ArchitectureFeatures.DEFAULT_CONFIG - accelerator_config: AcceleratorConfigType | None = None - max_block_dependency: int = ArchitectureFeatures.MAX_BLOCKDEP - arena_cache_size: int | None = None - tensor_allocator: TensorAllocatorType = "HillClimb" - cpu_tensor_alignment: int = Tensor.AllocationQuantum - optimization_strategy: OptimizationStrategyType = "Performance" - output_dir: str | None = None - recursion_limit: int = 1000 - - -class VelaCompiler: # pylint: disable=too-many-instance-attributes - """Vela compiler wrapper.""" - - def __init__(self, compiler_options: VelaCompilerOptions): - """Init Vela wrapper instance.""" - self.config_files = compiler_options.config_files - self.system_config = compiler_options.system_config - self.memory_mode = compiler_options.memory_mode - self.accelerator_config = compiler_options.accelerator_config - self.max_block_dependency = compiler_options.max_block_dependency - self.arena_cache_size = compiler_options.arena_cache_size - self.tensor_allocator = TensorAllocator[compiler_options.tensor_allocator] - self.cpu_tensor_alignment = compiler_options.cpu_tensor_alignment - self.optimization_strategy = OptimizationStrategy[ - compiler_options.optimization_strategy - ] - self.output_dir = compiler_options.output_dir - self.recursion_limit = compiler_options.recursion_limit - - sys.setrecursionlimit(self.recursion_limit) - - def read_model(self, model: str | Path) -> Model: - """Read model.""" - logger.debug("Read model %s", model) - - nng, network_type = self._read_model(model) - return Model(nng, network_type) - - def compile_model(self, model: str | Path | Model) -> OptimizedModel: - """Compile the model.""" - if isinstance(model, (str, Path)): - nng, network_type = self._read_model(model) - else: - nng, network_type = model.nng, NetworkType.TFLite - - if not nng: - raise Exception("Unable to read model") - - try: - arch = self._architecture_features() - compiler_options = self._compiler_options() - scheduler_options = self._scheduler_options() - - with redirect_output( - logger, stdout_level=logging.DEBUG, stderr_level=logging.DEBUG - ): - compiler_driver( - nng, arch, compiler_options, scheduler_options, network_type - ) - - return OptimizedModel(nng, arch, compiler_options, scheduler_options) - except (SystemExit, Exception) as err: - raise Exception("Model could not be optimized with Vela compiler") from err - - def get_config(self) -> dict[str, Any]: - """Get compiler configuration.""" - arch = self._architecture_features() - - memory_area = { - mem.name: { - "clock_scales": arch.memory_clock_scales[mem], - "burst_length": arch.memory_burst_length[mem], - "read_latency": arch.memory_latency[mem][BandwidthDirection.Read], - "write_latency": arch.memory_latency[mem][BandwidthDirection.Write], - } - for mem in ( - MemArea.Sram, - MemArea.Dram, - MemArea.OnChipFlash, - MemArea.OffChipFlash, - ) - } - - return { - "accelerator_config": arch.accelerator_config.value, - "system_config": arch.system_config, - "core_clock": arch.core_clock, - "axi0_port": arch.axi0_port.name, - "axi1_port": arch.axi1_port.name, - "memory_mode": arch.memory_mode, - "const_mem_area": arch.const_mem_area.name, - "arena_mem_area": arch.arena_mem_area.name, - "cache_mem_area": arch.cache_mem_area.name, - "arena_cache_size": arch.arena_cache_size, - "permanent_storage_mem_area": arch.permanent_storage_mem_area.name, - "feature_map_storage_mem_area": arch.feature_map_storage_mem_area.name, - "fast_storage_mem_area": arch.fast_storage_mem_area.name, - "memory_area": memory_area, - } - - @staticmethod - def _read_model(model: str | Path) -> tuple[Graph, NetworkType]: - """Read TensorFlow Lite model.""" - try: - model_path = str(model) if isinstance(model, Path) else model - - with redirect_output( - logger, stdout_level=logging.DEBUG, stderr_level=logging.DEBUG - ): - return read_model(model_path, ModelReaderOptions()) # type: ignore - except (SystemExit, Exception) as err: - raise Exception(f"Unable to read model {model_path}") from err - - def _architecture_features(self) -> ArchitectureFeatures: - """Return ArchitectureFeatures instance.""" - return ArchitectureFeatures( - vela_config_files=self.config_files, - accelerator_config=self.accelerator_config, - system_config=self.system_config, - memory_mode=self.memory_mode, - max_blockdep=self.max_block_dependency, - verbose_config=False, - arena_cache_size=self.arena_cache_size, - ) - - def _scheduler_options(self) -> SchedulerOptions: - """Return SchedulerOptions instance.""" - arch = self._architecture_features() - - return SchedulerOptions( - optimization_strategy=self.optimization_strategy, - sram_target=arch.arena_cache_size, - verbose_schedule=False, - ) - - def _compiler_options(self) -> CompilerOptions: - """Return CompilerOptions instance.""" - return CompilerOptions( - verbose_graph=False, - verbose_quantization=False, - verbose_packing=False, - verbose_tensor_purpose=False, - verbose_tensor_format=False, - verbose_allocation=False, - verbose_high_level_command_stream=False, - verbose_register_command_stream=False, - verbose_operators=False, - verbose_weights=False, - show_cpu_operations=False, - tensor_allocator=self.tensor_allocator, - timing=False, - output_dir=self.output_dir, - cpu_tensor_alignment=self.cpu_tensor_alignment, - ) - - -def resolve_compiler_config( - vela_compiler_options: VelaCompilerOptions, -) -> dict[str, Any]: - """Resolve passed compiler options. - - Vela has number of configuration parameters that being - resolved during passing compiler options. E.g. Vela - reads configuration parameters from vela.ini and fills - it's internal structures with resolved values (memory mode, - system mode, etc.). - - In order to get this information we need to create - instance of the Vela compiler first. - """ - vela_compiler = VelaCompiler(vela_compiler_options) - return vela_compiler.get_config() - - -def estimate_performance( - model_path: Path, compiler_options: VelaCompilerOptions -) -> PerformanceMetrics: - """Return performance estimations for the model/device. - - Logic for this function comes from Vela module stats_writer.py - """ - logger.debug( - "Estimate performance for the model %s on %s", - model_path, - compiler_options.accelerator_config, - ) - - vela_compiler = VelaCompiler(compiler_options) - - initial_model = vela_compiler.read_model(model_path) - if initial_model.optimized: - raise Exception("Unable to estimate performance for the given optimized model") - - optimized_model = vela_compiler.compile_model(initial_model) - - return _performance_metrics(optimized_model) - - -def optimize_model( - model_path: Path, compiler_options: VelaCompilerOptions, output_model_path: Path -) -> None: - """Optimize model and return it's path after optimization.""" - logger.debug( - "Optimize model %s for device %s", - model_path, - compiler_options.accelerator_config, - ) - - vela_compiler = VelaCompiler(compiler_options) - optimized_model = vela_compiler.compile_model(model_path) - - logger.debug("Save optimized model into %s", output_model_path) - optimized_model.save(output_model_path) - - -def _performance_metrics(optimized_model: OptimizedModel) -> PerformanceMetrics: - """Return performance metrics for optimized model.""" - cycles = optimized_model.nng.cycles - - def memory_usage(mem_area: MemArea) -> int: - """Get memory usage for the proviced memory area type.""" - memory_used: dict[MemArea, int] = optimized_model.nng.memory_used - bandwidths = optimized_model.nng.bandwidths - - return memory_used.get(mem_area, 0) if np.sum(bandwidths[mem_area]) > 0 else 0 - - midpoint_fps = np.nan - midpoint_inference_time = cycles[PassCycles.Total] / optimized_model.arch.core_clock - if midpoint_inference_time > 0: - midpoint_fps = 1 / midpoint_inference_time - - return PerformanceMetrics( - npu_cycles=int(cycles[PassCycles.Npu]), - sram_access_cycles=int(cycles[PassCycles.SramAccess]), - dram_access_cycles=int(cycles[PassCycles.DramAccess]), - on_chip_flash_access_cycles=int(cycles[PassCycles.OnChipFlashAccess]), - off_chip_flash_access_cycles=int(cycles[PassCycles.OffChipFlashAccess]), - total_cycles=int(cycles[PassCycles.Total]), - batch_inference_time=midpoint_inference_time * 1000, - inferences_per_second=midpoint_fps, - batch_size=optimized_model.nng.batch_size, - unknown_memory_area_size=memory_usage(MemArea.Unknown), - sram_memory_area_size=memory_usage(MemArea.Sram), - dram_memory_area_size=memory_usage(MemArea.Dram), - on_chip_flash_memory_area_size=memory_usage(MemArea.OnChipFlash), - off_chip_flash_memory_area_size=memory_usage(MemArea.OffChipFlash), - ) - - -def supported_operators( - model_path: Path, compiler_options: VelaCompilerOptions -) -> Operators: - """Return list of model's operators.""" - logger.debug("Check supported operators for the model %s", model_path) - - vela_compiler = VelaCompiler(compiler_options) - initial_model = vela_compiler.read_model(model_path) - - return Operators( - [ - Operator(op.name, optype_to_builtintype(op.type), run_on_npu(op)) - for sg in initial_model.nng.subgraphs - for op in sg.get_all_ops() - if op.type not in VELA_INTERNAL_OPS - ] - ) - - -def run_on_npu(operator: Op) -> NpuSupported: - """Return information if operator can run on NPU. - - Vela does a number of checks that can help establish whether - a particular operator is supported to run on NPU. - - There are two groups of checks: - - general TensorFlow Lite constraints - - operator specific constraints - - If an operator is not supported on NPU then this function - will return the reason of that. - - The reason is split in two parts: - - general description of why the operator cannot be placed on NPU - - details on the particular operator - """ - semantic_checker = TFLiteSemantic() - semantic_constraints = itertools.chain( - semantic_checker.generic_constraints, - semantic_checker.specific_constraints[operator.type], - ) - - for constraint in semantic_constraints: - op_valid, op_reason = constraint(operator) - if not op_valid: - return NpuSupported(False, [(constraint.__doc__, op_reason)]) - - if operator.type not in TFLiteSupportedOperators.supported_operators: - reasons = ( - [("CPU only operator", "")] - if operator.type not in VELA_INTERNAL_OPS - else [] - ) - - return NpuSupported(False, reasons) - - tflite_supported_operators = TFLiteSupportedOperators() - operation_constraints = itertools.chain( - tflite_supported_operators.generic_constraints, - tflite_supported_operators.specific_constraints[operator.type], - ) - for constraint in operation_constraints: - op_valid, op_reason = constraint(operator) - if not op_valid: - return NpuSupported(False, [(constraint.__doc__, op_reason)]) - - return NpuSupported(True, []) - - -def generate_supported_operators_report() -> None: - """Generate supported operators report in current working directory.""" - with redirect_output(logger): - generate_supported_ops() |