From 37959522a805a5e23c930ed79aac84920c3cb208 Mon Sep 17 00:00:00 2001
From: Dmitrii Agibov <dmitrii.agibov@arm.com>
Date: Fri, 18 Nov 2022 16:34:03 +0000
Subject: Move backends functionality into separate modules

- Move backend management/executor code into module backend_core
- Create separate module for each backend in "backend" module
- Move each backend into corresponding module
- Split Vela wrapper into several submodules

Change-Id: If01b6774aab6501951212541cc5d7f5aa7c97e95
---
 src/mlia/backend/vela/__init__.py    |   3 +
 src/mlia/backend/vela/compat.py      | 158 ++++++++++++++++++++
 src/mlia/backend/vela/compiler.py    | 274 +++++++++++++++++++++++++++++++++++
 src/mlia/backend/vela/performance.py |  97 +++++++++++++
 4 files changed, 532 insertions(+)
 create mode 100644 src/mlia/backend/vela/__init__.py
 create mode 100644 src/mlia/backend/vela/compat.py
 create mode 100644 src/mlia/backend/vela/compiler.py
 create mode 100644 src/mlia/backend/vela/performance.py

(limited to 'src/mlia/backend/vela')

diff --git a/src/mlia/backend/vela/__init__.py b/src/mlia/backend/vela/__init__.py
new file mode 100644
index 0000000..6ea0c21
--- /dev/null
+++ b/src/mlia/backend/vela/__init__.py
@@ -0,0 +1,3 @@
+# SPDX-FileCopyrightText: Copyright 2022, Arm Limited and/or its affiliates.
+# SPDX-License-Identifier: Apache-2.0
+"""Vela backend module."""
diff --git a/src/mlia/backend/vela/compat.py b/src/mlia/backend/vela/compat.py
new file mode 100644
index 0000000..3ec42d1
--- /dev/null
+++ b/src/mlia/backend/vela/compat.py
@@ -0,0 +1,158 @@
+# SPDX-FileCopyrightText: Copyright 2022, Arm Limited and/or its affiliates.
+# SPDX-License-Identifier: Apache-2.0
+"""Vela operator compatibility module."""
+from __future__ import annotations
+
+import itertools
+import logging
+from dataclasses import dataclass
+from pathlib import Path
+
+from ethosu.vela.operation import Op
+from ethosu.vela.tflite_mapping import optype_to_builtintype
+from ethosu.vela.tflite_model_semantic import TFLiteSemantic
+from ethosu.vela.tflite_supported_operators import TFLiteSupportedOperators
+from ethosu.vela.vela import generate_supported_ops
+
+from mlia.backend.vela.compiler import VelaCompiler
+from mlia.backend.vela.compiler import VelaCompilerOptions
+from mlia.utils.logging import redirect_output
+
+
+logger = logging.getLogger(__name__)
+
+VELA_INTERNAL_OPS = (Op.Placeholder, Op.SubgraphInput, Op.Const)
+
+
+@dataclass
+class NpuSupported:
+    """Operator's npu supported attribute."""
+
+    supported: bool
+    reasons: list[tuple[str, str]]
+
+
+@dataclass
+class Operator:
+    """Model operator."""
+
+    name: str
+    op_type: str
+    run_on_npu: NpuSupported
+
+    @property
+    def cpu_only(self) -> bool:
+        """Return true if operator is CPU only."""
+        cpu_only_reasons = [("CPU only operator", "")]
+        return (
+            not self.run_on_npu.supported
+            and self.run_on_npu.reasons == cpu_only_reasons
+        )
+
+
+@dataclass
+class Operators:
+    """Model's operators."""
+
+    ops: list[Operator]
+
+    @property
+    def npu_supported_ratio(self) -> float:
+        """Return NPU supported ratio."""
+        total = self.total_number
+        npu_supported = self.npu_supported_number
+
+        if total == 0 or npu_supported == 0:
+            return 0
+
+        return npu_supported / total
+
+    @property
+    def npu_unsupported_ratio(self) -> float:
+        """Return NPU unsupported ratio."""
+        return 1 - self.npu_supported_ratio
+
+    @property
+    def total_number(self) -> int:
+        """Return total number of operators."""
+        return len(self.ops)
+
+    @property
+    def npu_supported_number(self) -> int:
+        """Return number of npu supported operators."""
+        return sum(op.run_on_npu.supported for op in self.ops)
+
+
+def supported_operators(
+    model_path: Path, compiler_options: VelaCompilerOptions
+) -> Operators:
+    """Return list of model's operators."""
+    logger.debug("Check supported operators for the model %s", model_path)
+
+    vela_compiler = VelaCompiler(compiler_options)
+    initial_model = vela_compiler.read_model(model_path)
+
+    return Operators(
+        [
+            Operator(op.name, optype_to_builtintype(op.type), run_on_npu(op))
+            for sg in initial_model.nng.subgraphs
+            for op in sg.get_all_ops()
+            if op.type not in VELA_INTERNAL_OPS
+        ]
+    )
+
+
+def run_on_npu(operator: Op) -> NpuSupported:
+    """Return information if operator can run on NPU.
+
+    Vela does a number of checks that can help establish whether
+    a particular operator is supported to run on NPU.
+
+    There are two groups of checks:
+      - general TensorFlow Lite constraints
+      - operator specific constraints
+
+    If an operator is not supported on NPU then this function
+    will return the reason of that.
+
+    The reason is split in two parts:
+      - general description of why the operator cannot be placed on NPU
+      - details on the particular operator
+    """
+    semantic_checker = TFLiteSemantic()
+    semantic_constraints = itertools.chain(
+        semantic_checker.generic_constraints,
+        semantic_checker.specific_constraints[operator.type],
+    )
+
+    for constraint in semantic_constraints:
+        op_valid, op_reason = constraint(operator)
+        if not op_valid:
+            return NpuSupported(False, [(constraint.__doc__, op_reason)])
+
+    if operator.type not in TFLiteSupportedOperators.supported_operators:
+        reasons = (
+            [("CPU only operator", "")]
+            if operator.type not in VELA_INTERNAL_OPS
+            else []
+        )
+
+        return NpuSupported(False, reasons)
+
+    tflite_supported_operators = TFLiteSupportedOperators()
+    operation_constraints = itertools.chain(
+        tflite_supported_operators.generic_constraints,
+        tflite_supported_operators.specific_constraints[operator.type],
+    )
+    for constraint in operation_constraints:
+        op_valid, op_reason = constraint(operator)
+        if not op_valid:
+            return NpuSupported(False, [(constraint.__doc__, op_reason)])
+
+    return NpuSupported(True, [])
+
+
+def generate_supported_operators_report() -> None:
+    """Generate supported operators report in current working directory."""
+    with redirect_output(logger):
+        generate_supported_ops()
diff --git a/src/mlia/backend/vela/compiler.py b/src/mlia/backend/vela/compiler.py
new file mode 100644
index 0000000..3d3847a
--- /dev/null
+++ b/src/mlia/backend/vela/compiler.py
@@ -0,0 +1,274 @@
+# SPDX-FileCopyrightText: Copyright 2022, Arm Limited and/or its affiliates.
+# SPDX-License-Identifier: Apache-2.0
+"""Vela compiler wrapper module."""
+from __future__ import annotations
+
+import logging
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+from typing import Literal
+
+from ethosu.vela.architecture_features import ArchitectureFeatures
+from ethosu.vela.compiler_driver import compiler_driver
+from ethosu.vela.compiler_driver import CompilerOptions
+from ethosu.vela.compiler_driver import TensorAllocator
+from ethosu.vela.model_reader import ModelReaderOptions
+from ethosu.vela.model_reader import read_model
+from ethosu.vela.nn_graph import Graph
+from ethosu.vela.nn_graph import NetworkType
+from ethosu.vela.operation import CustomType
+from ethosu.vela.scheduler import OptimizationStrategy
+from ethosu.vela.scheduler import SchedulerOptions
+from ethosu.vela.tensor import BandwidthDirection
+from ethosu.vela.tensor import MemArea
+from ethosu.vela.tensor import Tensor
+from ethosu.vela.tflite_writer import write_tflite
+
+from mlia.utils.logging import redirect_output
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class Model:
+    """Model metadata."""
+
+    nng: Graph
+    network_type: NetworkType
+
+    @property
+    def optimized(self) -> bool:
+        """Return true if model is already optimized."""
+        return any(
+            op.attrs.get("custom_type") == CustomType.ExistingNpuOp
+            for sg in self.nng.subgraphs
+            for op in sg.get_all_ops()
+        )
+
+
+@dataclass
+class OptimizedModel:
+    """Instance of the Vela optimized model."""
+
+    nng: Graph
+    arch: ArchitectureFeatures
+    compiler_options: CompilerOptions
+    scheduler_options: SchedulerOptions
+
+    def save(self, output_filename: str | Path) -> None:
+        """Save instance of the optimized model to the file."""
+        write_tflite(self.nng, output_filename)
+
+
+AcceleratorConfigType = Literal[
+    "ethos-u55-32",
+    "ethos-u55-64",
+    "ethos-u55-128",
+    "ethos-u55-256",
+    "ethos-u65-256",
+    "ethos-u65-512",
+]
+
+TensorAllocatorType = Literal["LinearAlloc", "Greedy", "HillClimb"]
+
+OptimizationStrategyType = Literal["Performance", "Size"]
+
+
+@dataclass
+class VelaCompilerOptions:  # pylint: disable=too-many-instance-attributes
+    """Vela compiler options."""
+
+    config_files: str | list[str] | None = None
+    system_config: str = ArchitectureFeatures.DEFAULT_CONFIG
+    memory_mode: str = ArchitectureFeatures.DEFAULT_CONFIG
+    accelerator_config: AcceleratorConfigType | None = None
+    max_block_dependency: int = ArchitectureFeatures.MAX_BLOCKDEP
+    arena_cache_size: int | None = None
+    tensor_allocator: TensorAllocatorType = "HillClimb"
+    cpu_tensor_alignment: int = Tensor.AllocationQuantum
+    optimization_strategy: OptimizationStrategyType = "Performance"
+    output_dir: str | None = None
+    recursion_limit: int = 1000
+
+
+class VelaCompiler:  # pylint: disable=too-many-instance-attributes
+    """Vela compiler wrapper."""
+
+    def __init__(self, compiler_options: VelaCompilerOptions):
+        """Init Vela wrapper instance."""
+        self.config_files = compiler_options.config_files
+        self.system_config = compiler_options.system_config
+        self.memory_mode = compiler_options.memory_mode
+        self.accelerator_config = compiler_options.accelerator_config
+        self.max_block_dependency = compiler_options.max_block_dependency
+        self.arena_cache_size = compiler_options.arena_cache_size
+        self.tensor_allocator = TensorAllocator[compiler_options.tensor_allocator]
+        self.cpu_tensor_alignment = compiler_options.cpu_tensor_alignment
+        self.optimization_strategy = OptimizationStrategy[
+            compiler_options.optimization_strategy
+        ]
+        self.output_dir = compiler_options.output_dir
+        self.recursion_limit = compiler_options.recursion_limit
+
+        sys.setrecursionlimit(self.recursion_limit)
+
+    def read_model(self, model: str | Path) -> Model:
+        """Read model."""
+        logger.debug("Read model %s", model)
+
+        nng, network_type = self._read_model(model)
+        return Model(nng, network_type)
+
+    def compile_model(self, model: str | Path | Model) -> OptimizedModel:
+        """Compile the model."""
+        if isinstance(model, (str, Path)):
+            nng, network_type = self._read_model(model)
+        else:
+            nng, network_type = model.nng, NetworkType.TFLite
+
+        if not nng:
+            raise Exception("Unable to read model")
+
+        try:
+            arch = self._architecture_features()
+            compiler_options = self._compiler_options()
+            scheduler_options = self._scheduler_options()
+
+            with redirect_output(
+                logger, stdout_level=logging.DEBUG, stderr_level=logging.DEBUG
+            ):
+                compiler_driver(
+                    nng, arch, compiler_options, scheduler_options, network_type
+                )
+
+            return OptimizedModel(nng, arch, compiler_options, scheduler_options)
+        except (SystemExit, Exception) as err:
+            raise Exception("Model could not be optimized with Vela compiler") from err
+
+    def get_config(self) -> dict[str, Any]:
+        """Get compiler configuration."""
+        arch = self._architecture_features()
+
+        memory_area = {
+            mem.name: {
+                "clock_scales": arch.memory_clock_scales[mem],
+                "burst_length": arch.memory_burst_length[mem],
+                "read_latency": arch.memory_latency[mem][BandwidthDirection.Read],
+                "write_latency": arch.memory_latency[mem][BandwidthDirection.Write],
+            }
+            for mem in (
+                MemArea.Sram,
+                MemArea.Dram,
+                MemArea.OnChipFlash,
+                MemArea.OffChipFlash,
+            )
+        }
+
+        return {
+            "accelerator_config": arch.accelerator_config.value,
+            "system_config": arch.system_config,
+            "core_clock": arch.core_clock,
+            "axi0_port": arch.axi0_port.name,
+            "axi1_port": arch.axi1_port.name,
+            "memory_mode": arch.memory_mode,
+            "const_mem_area": arch.const_mem_area.name,
+            "arena_mem_area": arch.arena_mem_area.name,
+            "cache_mem_area": arch.cache_mem_area.name,
+            "arena_cache_size": arch.arena_cache_size,
+            "permanent_storage_mem_area": arch.permanent_storage_mem_area.name,
+            "feature_map_storage_mem_area": arch.feature_map_storage_mem_area.name,
+            "fast_storage_mem_area": arch.fast_storage_mem_area.name,
+            "memory_area": memory_area,
+        }
+
+    @staticmethod
+    def _read_model(model: str | Path) -> tuple[Graph, NetworkType]:
+        """Read TensorFlow Lite model."""
+        try:
+            model_path = str(model) if isinstance(model, Path) else model
+
+            with redirect_output(
+                logger, stdout_level=logging.DEBUG, stderr_level=logging.DEBUG
+            ):
+                return read_model(model_path, ModelReaderOptions())  # type: ignore
+        except (SystemExit, Exception) as err:
+            raise Exception(f"Unable to read model {model_path}") from err
+
+    def _architecture_features(self) -> ArchitectureFeatures:
+        """Return ArchitectureFeatures instance."""
+        return ArchitectureFeatures(
+            vela_config_files=self.config_files,
+            accelerator_config=self.accelerator_config,
+            system_config=self.system_config,
+            memory_mode=self.memory_mode,
+            max_blockdep=self.max_block_dependency,
+            verbose_config=False,
+            arena_cache_size=self.arena_cache_size,
+        )
+
+    def _scheduler_options(self) -> SchedulerOptions:
+        """Return SchedulerOptions instance."""
+        arch = self._architecture_features()
+
+        return SchedulerOptions(
+            optimization_strategy=self.optimization_strategy,
+            sram_target=arch.arena_cache_size,
+            verbose_schedule=False,
+        )
+
+    def _compiler_options(self) -> CompilerOptions:
+        """Return CompilerOptions instance."""
+        return CompilerOptions(
+            verbose_graph=False,
+            verbose_quantization=False,
+            verbose_packing=False,
+            verbose_tensor_purpose=False,
+            verbose_tensor_format=False,
+            verbose_allocation=False,
+            verbose_high_level_command_stream=False,
+            verbose_register_command_stream=False,
+            verbose_operators=False,
+            verbose_weights=False,
+            show_cpu_operations=False,
+            tensor_allocator=self.tensor_allocator,
+            timing=False,
+            output_dir=self.output_dir,
+            cpu_tensor_alignment=self.cpu_tensor_alignment,
+        )
+
+
+def resolve_compiler_config(
+    vela_compiler_options: VelaCompilerOptions,
+) -> dict[str, Any]:
+    """Resolve passed compiler options.
+
+    Vela has number of configuration parameters that being
+    resolved during passing compiler options. E.g. Vela
+    reads configuration parameters from vela.ini and fills
+    it's internal structures with resolved values (memory mode,
+    system mode, etc.).
+
+    In order to get this information we need to create
+    instance of the Vela compiler first.
+    """
+    vela_compiler = VelaCompiler(vela_compiler_options)
+    return vela_compiler.get_config()
+
+
+def optimize_model(
+    model_path: Path, compiler_options: VelaCompilerOptions, output_model_path: Path
+) -> None:
+    """Optimize model and return it's path after optimization."""
+    logger.debug(
+        "Optimize model %s for device %s",
+        model_path,
+        compiler_options.accelerator_config,
+    )
+
+    vela_compiler = VelaCompiler(compiler_options)
+    optimized_model = vela_compiler.compile_model(model_path)
+
+    logger.debug("Save optimized model into %s", output_model_path)
+    optimized_model.save(output_model_path)
diff --git a/src/mlia/backend/vela/performance.py b/src/mlia/backend/vela/performance.py
new file mode 100644
index 0000000..ccd2f6f
--- /dev/null
+++ b/src/mlia/backend/vela/performance.py
@@ -0,0 +1,97 @@
+# SPDX-FileCopyrightText: Copyright 2022, Arm Limited and/or its affiliates.
+# SPDX-License-Identifier: Apache-2.0
+"""Vela performance module."""
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass
+from pathlib import Path
+
+import numpy as np
+from ethosu.vela.npu_performance import PassCycles
+from ethosu.vela.tensor import MemArea
+
+from mlia.backend.vela.compiler import OptimizedModel
+from mlia.backend.vela.compiler import VelaCompiler
+from mlia.backend.vela.compiler import VelaCompilerOptions
+
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class PerformanceMetrics:  # pylint: disable=too-many-instance-attributes
+    """Contains all the performance metrics Vela generates in a run."""
+
+    npu_cycles: int
+    sram_access_cycles: int
+    dram_access_cycles: int
+    on_chip_flash_access_cycles: int
+    off_chip_flash_access_cycles: int
+    total_cycles: int
+    batch_inference_time: float
+    inferences_per_second: float
+    batch_size: int
+    unknown_memory_area_size: int
+    sram_memory_area_size: int
+    dram_memory_area_size: int
+    on_chip_flash_memory_area_size: int
+    off_chip_flash_memory_area_size: int
+
+
+def estimate_performance(
+    model_path: Path, compiler_options: VelaCompilerOptions
+) -> PerformanceMetrics:
+    """Return performance estimations for the model/device.
+
+    Logic for this function comes from Vela module stats_writer.py
+    """
+    logger.debug(
+        "Estimate performance for the model %s on %s",
+        model_path,
+        compiler_options.accelerator_config,
+    )
+
+    vela_compiler = VelaCompiler(compiler_options)
+
+    initial_model = vela_compiler.read_model(model_path)
+    if initial_model.optimized:
+        raise Exception("Unable to estimate performance for the given optimized model")
+
+    optimized_model = vela_compiler.compile_model(initial_model)
+
+    return _performance_metrics(optimized_model)
+
+
+def _performance_metrics(optimized_model: OptimizedModel) -> PerformanceMetrics:
+    """Return performance metrics for optimized model."""
+    cycles = optimized_model.nng.cycles
+
+    def memory_usage(mem_area: MemArea) -> int:
+        """Get memory usage for the proviced memory area type."""
+        memory_used: dict[MemArea, int] = optimized_model.nng.memory_used
+        bandwidths = optimized_model.nng.bandwidths
+
+        return memory_used.get(mem_area, 0) if np.sum(bandwidths[mem_area]) > 0 else 0
+
+    midpoint_fps = np.nan
+    midpoint_inference_time = cycles[PassCycles.Total] / optimized_model.arch.core_clock
+    if midpoint_inference_time > 0:
+        midpoint_fps = 1 / midpoint_inference_time
+
+    return PerformanceMetrics(
+        npu_cycles=int(cycles[PassCycles.Npu]),
+        sram_access_cycles=int(cycles[PassCycles.SramAccess]),
+        dram_access_cycles=int(cycles[PassCycles.DramAccess]),
+        on_chip_flash_access_cycles=int(cycles[PassCycles.OnChipFlashAccess]),
+        off_chip_flash_access_cycles=int(cycles[PassCycles.OffChipFlashAccess]),
+        total_cycles=int(cycles[PassCycles.Total]),
+        batch_inference_time=midpoint_inference_time * 1000,
+        inferences_per_second=midpoint_fps,
+        batch_size=optimized_model.nng.batch_size,
+        unknown_memory_area_size=memory_usage(MemArea.Unknown),
+        sram_memory_area_size=memory_usage(MemArea.Sram),
+        dram_memory_area_size=memory_usage(MemArea.Dram),
+        on_chip_flash_memory_area_size=memory_usage(MemArea.OnChipFlash),
+        off_chip_flash_memory_area_size=memory_usage(MemArea.OffChipFlash),
+    )
-- 
cgit v1.2.1