From d08513a72e7fbf0626c3d69b9c4cc7056b3da4ae Mon Sep 17 00:00:00 2001
From: Nathan Bailey <nathan.bailey@arm.com>
Date: Tue, 16 Jan 2024 16:39:06 +0000
Subject: feat: Integrate Vela's per-layer performance estimates

Resolves: MLIA-1055, MLIA-1056, MLIA-1057

Signed-off-by: Nathan Bailey <nathan.bailey@arm.com>
Change-Id: Id573cec94e4a69117051dcd5175f383c0955d890
---
 tests/conftest.py                            |  31 +++-
 tests/test_backend_vela_compiler.py          |  27 +++-
 tests/test_backend_vela_performance.py       | 122 +++++++++++++-
 tests/test_cli_commands.py                   |   4 +-
 tests/test_target_ethos_u_config.py          |   3 +-
 tests/test_target_ethos_u_data_analysis.py   |   6 +-
 tests/test_target_ethos_u_data_collection.py |   4 +-
 tests/test_target_ethos_u_reporters.py       | 230 ++++++++++++++++++++++++++-
 8 files changed, 416 insertions(+), 11 deletions(-)

(limited to 'tests')

diff --git a/tests/conftest.py b/tests/conftest.py
index 345eb8d..9dc1d16 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright 2022-2023, Arm Limited and/or its affiliates.
+# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates.
 # SPDX-License-Identifier: Apache-2.0
 """Pytest conf module."""
 import shutil
@@ -50,6 +50,32 @@ def invalid_input_model_file(test_tflite_invalid_model: Path) -> Path:
     return test_tflite_invalid_model
 
 
+@pytest.fixture(scope="session", name="empty_test_csv_file")
+def fixture_empty_test_csv_file(  # pylint: disable=too-many-locals
+    test_csv_path: Path,
+) -> Path:
+    """Return empty test csv file path."""
+    return test_csv_path / "empty_test_csv_file.csv"
+
+
+@pytest.fixture(scope="session", name="test_csv_file")
+def fixture_test_csv_file(  # pylint: disable=too-many-locals
+    test_csv_path: Path,
+) -> Path:
+    """Return test csv file path."""
+    return test_csv_path / "test_csv_file.csv"
+
+
+@pytest.fixture(scope="session", name="test_csv_path")
+def fixture_test_csv_path(  # pylint: disable=too-many-locals
+    tmp_path_factory: pytest.TempPathFactory,
+) -> Generator[Path, None, None]:
+    """Return test csv file path."""
+    tmp_path = tmp_path_factory.mktemp("csv_files")
+    yield tmp_path
+    shutil.rmtree(tmp_path)
+
+
 def get_test_keras_model() -> tf.keras.Model:
     """Return test Keras model."""
     model = tf.keras.Sequential(
@@ -87,6 +113,9 @@ def fixture_test_models_path(
     """Provide path to the test models."""
     tmp_path = tmp_path_factory.mktemp("models")
 
+    # Need an output directory for verbose performance
+    Path("output").mkdir(exist_ok=True)
+
     # Keras Model
     keras_model = get_test_keras_model()
     save_keras_model(keras_model, tmp_path / TEST_MODEL_KERAS_FILE)
diff --git a/tests/test_backend_vela_compiler.py b/tests/test_backend_vela_compiler.py
index 9f09efb..5554efb 100644
--- a/tests/test_backend_vela_compiler.py
+++ b/tests/test_backend_vela_compiler.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright 2022-2023, Arm Limited and/or its affiliates.
+# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates.
 # SPDX-License-Identifier: Apache-2.0
 """Tests for module vela/compiler."""
 from pathlib import Path
@@ -29,7 +29,7 @@ def test_default_vela_compiler() -> None:
     assert default_compiler.tensor_allocator == TensorAllocator.HillClimb
     assert default_compiler.cpu_tensor_alignment == 16
     assert default_compiler.optimization_strategy == OptimizationStrategy.Performance
-    assert default_compiler.output_dir == "output"
+    assert default_compiler.output_dir == Path("output")
 
     assert default_compiler.get_config() == {
         "accelerator_config": "ethos-u55-256",
@@ -88,7 +88,7 @@ def test_vela_compiler_with_parameters(test_resources_path: Path) -> None:
         tensor_allocator="Greedy",
         cpu_tensor_alignment=4,
         optimization_strategy="Size",
-        output_dir="custom_output",
+        output_dir=Path("custom_output"),
     )
     compiler = VelaCompiler(compiler_options)
 
@@ -101,7 +101,7 @@ def test_vela_compiler_with_parameters(test_resources_path: Path) -> None:
     assert compiler.tensor_allocator == TensorAllocator.Greedy
     assert compiler.cpu_tensor_alignment == 4
     assert compiler.optimization_strategy == OptimizationStrategy.Size
-    assert compiler.output_dir == "custom_output"
+    assert compiler.output_dir == Path("custom_output")
 
     assert compiler.get_config() == {
         "accelerator_config": "ethos-u65-256",
@@ -156,6 +156,25 @@ def test_compile_model(test_tflite_model: Path) -> None:
     assert isinstance(optimized_model, OptimizedModel)
 
 
+def test_csv_file_created(test_tflite_model: Path) -> None:
+    """Test that a csv file is created by the vela backend"""
+    compiler = VelaCompiler(
+        EthosUConfiguration.load_profile("ethos-u55-256").compiler_options
+    )
+    csv_file_name = test_tflite_model.stem + "_per-layer.csv"
+    compiler.compile_model(test_tflite_model)
+    assert (compiler.output_dir / csv_file_name).is_file()
+
+
+# Test to see if the new flag is passed to Vela
+def test_verbose_flag_passed() -> None:
+    """Test that the verbose_performance flag is passed to vela backend"""
+    compiler = VelaCompiler(
+        EthosUConfiguration.load_profile("ethos-u55-256").compiler_options
+    )
+    assert compiler.return_compiler_options().verbose_performance
+
+
 def test_compile_model_fail_sram_exceeded(
     test_tflite_model: Path, monkeypatch: pytest.MonkeyPatch
 ) -> None:
diff --git a/tests/test_backend_vela_performance.py b/tests/test_backend_vela_performance.py
index df2ce08..5800630 100644
--- a/tests/test_backend_vela_performance.py
+++ b/tests/test_backend_vela_performance.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright 2022-2023, Arm Limited and/or its affiliates.
+# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates.
 # SPDX-License-Identifier: Apache-2.0
 """Tests for module vela/performance."""
 from pathlib import Path
@@ -8,6 +8,9 @@ import pytest
 
 from mlia.backend.vela.compiler import optimize_model
 from mlia.backend.vela.performance import estimate_performance
+from mlia.backend.vela.performance import layer_metrics
+from mlia.backend.vela.performance import LayerwisePerfInfo
+from mlia.backend.vela.performance import parse_layerwise_perf_csv
 from mlia.backend.vela.performance import PerformanceMetrics
 from mlia.target.ethos_u.config import EthosUConfiguration
 
@@ -22,6 +25,123 @@ def test_estimate_performance(test_tflite_model: Path) -> None:
     assert isinstance(perf_metrics, PerformanceMetrics)
 
 
+def test_estimate_performance_csv_parser_called(
+    monkeypatch: pytest.MonkeyPatch, test_tflite_model: Path
+) -> None:
+    """Test that estimate_performance from backend.vela.performance is called."""
+    target_config = EthosUConfiguration.load_profile("ethos-u55-256")
+    csv_file_name = target_config.compiler_options.output_dir / (
+        test_tflite_model.stem + "_per-layer.csv"
+    )
+    mock = MagicMock()
+    monkeypatch.setattr("mlia.backend.vela.performance.parse_layerwise_perf_csv", mock)
+    estimate_performance(test_tflite_model, target_config.compiler_options)
+    mock.assert_called_with(vela_csv_file=csv_file_name, metrics=layer_metrics)
+
+
+LAYERWISE_TMP_DATA_STR = """
+TFLite_operator,NNG Operator,SRAM Usage,Peak%,Op Cycles,Network%,NPU,SRAM AC,DRAM AC,OnFlash AC,OffFlash AC,MAC Count,Network%,Util%,Name
+CONV_2D,Conv2DBias,11936,54.65201465201465,7312.0,17.648194632168373,7312.0,2000.0,0.0,0.0,0.0,73008,8.653353814644136,3.9002666849015313,sequential/conv1/Relu;sequential/conv1/Conv2D
+MAX_POOL_2D,MaxPool,10944,50.10989010989011,2992.0,7.22147132651091,1330.0,2992.0,0.0,0.0,0.0,6912,0.819252432155658,0.9024064171122994,sequential/max_pooling2d/MaxPool
+""".strip()
+
+LAYERWISE_TMP_DATA_MISSING_HEADER_STR = """
+TFLite_operator,NNG Operator,Peak%,Op Cycles,Network%,NPU,SRAM AC,DRAM AC,OnFlash AC,OffFlash AC,MAC Count,Network%,Util%,Name
+CONV_2D,Conv2DBias,54.65201465201465,7312.0,17.648194632168373,7312.0,2000.0,0.0,0.0,0.0,73008,8.653353814644136,3.9002666849015313,sequential/conv1/Relu;sequential/conv1/Conv2D
+MAX_POOL_2D,MaxPool,50.10989010989011,2992.0,7.22147132651091,1330.0,2992.0,0.0,0.0,0.0,6912,0.819252432155658,0.9024064171122994,sequential/max_pooling2d/MaxPool
+""".strip()
+
+LAYERWISE_MULTI_HEADER_TMP_DATA_STR = """
+TFLite_operator,NNG Operator,SRAM Usage,Peak%,Op Cycles,Network%,NPU,SRAM AC,DRAM AC,OnFlash AC,OffFlash AC,MAC Count,Network%,Util%,Name
+CONV_2D,Conv2DBias,11936,54.65201465201465,7312.0,17.648194632168373,7312.0,2000.0,0.0,0.0,0.0,73008,8.653353814644136,3.9002666849015313,sequential/conv1/Relu;sequential/conv1/Conv2D
+TFLite_operator,NNG Operator,SRAM Usage,Peak%,Op Cycles,Network%,NPU,SRAM AC,DRAM AC,OnFlash AC,OffFlash AC,MAC Count,Network%,Util%,Name
+MAX_POOL_2D,MaxPool,10944,50.10989010989011,2992.0,7.22147132651091,1330.0,2992.0,0.0,0.0,0.0,6912,0.819252432155658,0.9024064171122994,sequential/max_pooling2d/MaxPool
+""".strip()
+
+
+TMP_DATA_EXPECTED_STRING = "\
+Name: sequential/conv1/Relu;sequential/conv1/Conv2D, \
+TFLite_operator: CONV_2D, \
+SRAM Usage: 11936, \
+Op Cycles: 7312, \
+NPU: 7312, \
+SRAM AC: 2000, \
+DRAM AC: 0, \
+OnFlash AC: 0, \
+OffFlash AC: 0, \
+MAC Count: 73008, \
+Util%: 3.9002666849015313, \
+\
+Name: sequential/max_pooling2d/MaxPool, \
+TFLite_operator: MAX_POOL_2D, \
+SRAM Usage: 10944, \
+Op Cycles: 2992, \
+NPU: 1330, \
+SRAM AC: 2992, \
+DRAM AC: 0, \
+OnFlash AC: 0, \
+OffFlash AC: 0, \
+MAC Count: 6912, \
+Util%: 0.9024064171122994, \
+"
+
+
+@pytest.mark.parametrize(
+    "input_csv_content, expected_output",
+    [
+        (LAYERWISE_TMP_DATA_STR, TMP_DATA_EXPECTED_STRING),
+        (
+            LAYERWISE_MULTI_HEADER_TMP_DATA_STR,
+            TMP_DATA_EXPECTED_STRING,
+        ),
+    ],
+)
+def test_estimate_performance_parse_layerwise_csv_file(
+    test_csv_file: Path, input_csv_content: str, expected_output: str
+) -> None:
+    """Test that parsing a csv file produces a LayerwisePerfInfo object."""
+    with open(test_csv_file, "w", encoding="utf8") as csv_file:
+        csv_file.write(input_csv_content)
+    layerwise_object = parse_layerwise_perf_csv(test_csv_file, layer_metrics)
+    strings_to_check_layerwise_object = repr(layerwise_object)
+    assert isinstance(layerwise_object, LayerwisePerfInfo)
+    assert expected_output == strings_to_check_layerwise_object
+
+
+def test_estimate_performance_parse_layerwise_csv_file_with_missing_headers(
+    test_csv_file: Path,
+) -> None:
+    """Test that ensures a KeyError
+    is raised when a csv file is parsed with missing headers.
+    """
+    with open(test_csv_file, "w", encoding="utf8") as csv_file:
+        csv_file.write(LAYERWISE_TMP_DATA_MISSING_HEADER_STR)
+    with pytest.raises(KeyError, match="Generated CSV missing expected headers"):
+        parse_layerwise_perf_csv(test_csv_file, layer_metrics)
+
+
+def test_estimate_performance_parse_layerwise_csv_file_missing_file() -> None:
+    """Test that ensures a FileNotFoundError
+    is raised when a non-existent csv file is parsed.
+    """
+    with pytest.raises(
+        FileNotFoundError, match="CSV File not found at missing_file.csv"
+    ):
+        parse_layerwise_perf_csv(Path("missing_file.csv"), layer_metrics)
+
+
+def test_estimate_performance_parse_layerwise_empty_csv_file(
+    empty_test_csv_file: Path,
+) -> None:
+    """Test that ensures that if an empty csv file
+    is parsed, we return an empty layerwise object.
+    """
+    empty_test_csv_file.touch()
+    layerwise_object = parse_layerwise_perf_csv(empty_test_csv_file, layer_metrics)
+    assert isinstance(layerwise_object, LayerwisePerfInfo)
+    assert len(layerwise_object.layerwise_info) == 0
+
+
 def test_estimate_performance_already_optimized(
     tmp_path: Path, test_tflite_model: Path
 ) -> None:
diff --git a/tests/test_cli_commands.py b/tests/test_cli_commands.py
index 6b1f19d..480e642 100644
--- a/tests/test_cli_commands.py
+++ b/tests/test_cli_commands.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright 2022-2023, Arm Limited and/or its affiliates.
+# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates.
 # SPDX-License-Identifier: Apache-2.0
 """Tests for cli.commands module."""
 from __future__ import annotations
@@ -13,6 +13,7 @@ from unittest.mock import MagicMock
 import pytest
 
 from mlia.backend.manager import DefaultInstallationManager
+from mlia.backend.vela.performance import LayerwisePerfInfo
 from mlia.cli.commands import backend_install
 from mlia.cli.commands import backend_list
 from mlia.cli.commands import backend_uninstall
@@ -207,6 +208,7 @@ def mock_performance_estimation(monkeypatch: pytest.MonkeyPatch) -> None:
         EthosUConfiguration.load_profile("ethos-u55-256"),
         NPUCycles(1, 2, 3, 4, 5, 6),
         MemoryUsage(1, 2, 3, 4, 5),
+        LayerwisePerfInfo(layerwise_info=[]),
     )
     monkeypatch.setattr(
         "mlia.target.ethos_u.data_collection.EthosUPerformanceEstimator.estimate",
diff --git a/tests/test_target_ethos_u_config.py b/tests/test_target_ethos_u_config.py
index 49e7a40..1025827 100644
--- a/tests/test_target_ethos_u_config.py
+++ b/tests/test_target_ethos_u_config.py
@@ -4,6 +4,7 @@
 from __future__ import annotations
 
 from contextlib import ExitStack as does_not_raise
+from pathlib import Path
 from typing import Any
 
 import pytest
@@ -25,7 +26,7 @@ def test_compiler_options_default_init() -> None:
     assert opts.tensor_allocator == "HillClimb"
     assert opts.cpu_tensor_alignment == 16
     assert opts.optimization_strategy == "Performance"
-    assert opts.output_dir == "output"
+    assert opts.output_dir == Path("output")
 
 
 def test_ethosu_target() -> None:
diff --git a/tests/test_target_ethos_u_data_analysis.py b/tests/test_target_ethos_u_data_analysis.py
index 713e8ef..3cddf10 100644
--- a/tests/test_target_ethos_u_data_analysis.py
+++ b/tests/test_target_ethos_u_data_analysis.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright 2022-2023, Arm Limited and/or its affiliates.
+# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates.
 # SPDX-License-Identifier: Apache-2.0
 """Tests for Ethos-U data analysis module."""
 from __future__ import annotations
@@ -10,6 +10,7 @@ import pytest
 from mlia.backend.vela.compat import NpuSupported
 from mlia.backend.vela.compat import Operator
 from mlia.backend.vela.compat import Operators
+from mlia.backend.vela.performance import LayerwisePerfInfo
 from mlia.core.common import DataItem
 from mlia.core.data_analysis import Fact
 from mlia.nn.select import OptimizationSettings
@@ -98,6 +99,7 @@ def test_perf_metrics_diff() -> None:
                     NPUCycles(1, 2, 3, 4, 5, 6),
                     # memory metrics are in kilobytes
                     MemoryUsage(*[i * 1024 for i in range(1, 6)]),  # type: ignore
+                    LayerwisePerfInfo(layerwise_info=[]),
                 ),
                 [
                     [
@@ -111,6 +113,7 @@ def test_perf_metrics_diff() -> None:
                             MemoryUsage(
                                 *[i * 1024 for i in range(1, 6)]  # type: ignore
                             ),
+                            LayerwisePerfInfo(layerwise_info=[]),
                         ),
                     ],
                 ],
@@ -141,6 +144,7 @@ def test_perf_metrics_diff() -> None:
                     NPUCycles(1, 2, 3, 4, 5, 6),
                     # memory metrics are in kilobytes
                     MemoryUsage(*[i * 1024 for i in range(1, 6)]),  # type: ignore
+                    LayerwisePerfInfo(layerwise_info=[]),
                 ),
                 [],
             ),
diff --git a/tests/test_target_ethos_u_data_collection.py b/tests/test_target_ethos_u_data_collection.py
index be93c26..3868b95 100644
--- a/tests/test_target_ethos_u_data_collection.py
+++ b/tests/test_target_ethos_u_data_collection.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright 2022-2023, Arm Limited and/or its affiliates.
+# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates.
 # SPDX-License-Identifier: Apache-2.0
 """Tests for the data collection module for Ethos-U."""
 from pathlib import Path
@@ -7,6 +7,7 @@ from unittest.mock import MagicMock
 import pytest
 
 from mlia.backend.vela.compat import Operators
+from mlia.backend.vela.performance import LayerwisePerfInfo
 from mlia.core.context import Context
 from mlia.core.context import ExecutionContext
 from mlia.core.data_collection import DataCollector
@@ -162,6 +163,7 @@ def mock_performance_estimation(
         target,
         NPUCycles(1, 2, 3, 4, 5, 6),
         MemoryUsage(1, 2, 3, 4, 5),
+        LayerwisePerfInfo(layerwise_info=[]),
     )
     monkeypatch.setattr(
         "mlia.target.ethos_u.data_collection.EthosUPerformanceEstimator.estimate",
diff --git a/tests/test_target_ethos_u_reporters.py b/tests/test_target_ethos_u_reporters.py
index debeeb2..6dff6e1 100644
--- a/tests/test_target_ethos_u_reporters.py
+++ b/tests/test_target_ethos_u_reporters.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright 2022-2023, Arm Limited and/or its affiliates.
+# SPDX-FileCopyrightText: Copyright 2022-2024, Arm Limited and/or its affiliates.
 # SPDX-License-Identifier: Apache-2.0
 """Tests for reports module."""
 from __future__ import annotations
@@ -10,18 +10,246 @@ import pytest
 
 from mlia.backend.vela.compat import NpuSupported
 from mlia.backend.vela.compat import Operator
+from mlia.backend.vela.performance import LayerPerfInfo
+from mlia.backend.vela.performance import LayerwisePerfInfo
+from mlia.core.reporting import CompoundReport
 from mlia.core.reporting import Report
 from mlia.core.reporting import Table
 from mlia.nn.tensorflow.tflite_compat import TFLiteCompatibilityInfo
 from mlia.nn.tensorflow.tflite_compat import TFLiteCompatibilityStatus
 from mlia.target.ethos_u.config import EthosUConfiguration
+from mlia.target.ethos_u.performance import MemorySizeType
+from mlia.target.ethos_u.performance import MemoryUsage
+from mlia.target.ethos_u.performance import PerformanceMetrics
 from mlia.target.ethos_u.reporters import ethos_u_formatters
 from mlia.target.ethos_u.reporters import report_operators
+from mlia.target.ethos_u.reporters import report_perf_metrics
 from mlia.target.ethos_u.reporters import report_target_details
 from mlia.target.registry import profile
 from mlia.utils.console import remove_ascii_codes
 
 
+# pylint: disable=line-too-long
+@pytest.mark.parametrize(
+    "perf_metrics, expected_plain_text, expected_json_dict",
+    [
+        (
+            [
+                PerformanceMetrics(
+                    target_config=EthosUConfiguration.load_profile("ethos-u55-256"),
+                    npu_cycles=None,
+                    memory_usage=MemoryUsage(
+                        sram_memory_area_size=10,
+                        dram_memory_area_size=0,
+                        unknown_memory_area_size=0,
+                        on_chip_flash_memory_area_size=0,
+                        off_chip_flash_memory_area_size=20,
+                        memory_size_type=MemorySizeType.KILOBYTES,
+                    ),
+                    layerwise_perf_info=LayerwisePerfInfo(
+                        layerwise_info=[
+                            LayerPerfInfo(
+                                name="Test Layer",
+                                tflite_operator="test_operator",
+                                sram_usage=0,
+                                op_cycles=0.0,
+                                npu_cycles=0.0,
+                                sram_access_cycles=0.0,
+                                dram_access_cycles=0.0,
+                                on_chip_flash_access_cycles=0.0,
+                                off_chip_flash_access_cycles=0.0,
+                                mac_count=0,
+                                util_mac_percentage=0.0,
+                            ),
+                            LayerPerfInfo(
+                                name="Test Layer 1",
+                                tflite_operator="test_operator",
+                                sram_usage=0,
+                                op_cycles=0.0,
+                                npu_cycles=0.0,
+                                sram_access_cycles=0.0,
+                                dram_access_cycles=0.0,
+                                on_chip_flash_access_cycles=0.0,
+                                off_chip_flash_access_cycles=0.0,
+                                mac_count=0,
+                                util_mac_percentage=0.0,
+                            ),
+                        ]
+                    ),
+                )
+            ],
+            """
+Performance metrics:
+┌─────────────────────┬──────────────┬──────┐
+│ Metric              │ Value        │ Unit │
+╞═════════════════════╪══════════════╪══════╡
+│ SRAM used           │        10.00 │ KiB  │
+├─────────────────────┼──────────────┼──────┤
+│ Off-chip flash used │        20.00 │ KiB  │
+└─────────────────────┴──────────────┴──────┘
+IMPORTANT: The performance figures above refer to NPU only
+Layer-Wise Metrics:
+┌──────────────┬─────────────────┬──────────────┬──────────────┬──────────────┬──────────────┬──────────────┬──────────────┬──────────────┬──────────────┬──────────────┐
+│ Layer Name   │ TFLite Operator │ SRAM Usage   │ OP Cycles    │ NPU Cycles   │ SRAM AC      │ DRAM AC      │ OnFlash AC   │ OffFlash AC  │ MAC Count    │ MAC Util (%) │
+╞══════════════╪═════════════════╪══════════════╪══════════════╪══════════════╪══════════════╪══════════════╪══════════════╪══════════════╪══════════════╪══════════════╡
+│ Test Layer   │ test_operator   │            0 │         0.00 │         0.00 │         0.00 │         0.00 │         0.00 │         0.00 │            0 │         0.00 │
+├──────────────┼─────────────────┼──────────────┼──────────────┼──────────────┼──────────────┼──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
+│ Test Layer 1 │ test_operator   │            0 │         0.00 │         0.00 │         0.00 │         0.00 │         0.00 │         0.00 │            0 │         0.00 │
+└──────────────┴─────────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────────┘
+""".strip(),
+            {
+                "performance_metrics": [
+                    {"metric": "SRAM used", "value": 10, "unit": "KiB"},
+                    {"metric": "Off-chip flash used", "value": 20, "unit": "KiB"},
+                ],
+                "layerwise_metrics": [
+                    {
+                        "name": "Test Layer",
+                        "tflite_operator": "test_operator",
+                        "sram_usage": 0,
+                        "op_cycles": 0.0,
+                        "npu_cycles": 0.0,
+                        "sram_access_cycles": 0.0,
+                        "dram_access_cycles": 0.0,
+                        "on_chip_flash_access_cycles": 0.0,
+                        "off_chip_flash_access_cycles": 0.0,
+                        "mac_count": 0,
+                        "util_mac_percentage": 0.0,
+                    },
+                    {
+                        "name": "Test Layer 1",
+                        "tflite_operator": "test_operator",
+                        "sram_usage": 0,
+                        "op_cycles": 0.0,
+                        "npu_cycles": 0.0,
+                        "sram_access_cycles": 0.0,
+                        "dram_access_cycles": 0.0,
+                        "on_chip_flash_access_cycles": 0.0,
+                        "off_chip_flash_access_cycles": 0.0,
+                        "mac_count": 0,
+                        "util_mac_percentage": 0.0,
+                    },
+                ],
+            },
+        ),
+        (
+            [
+                PerformanceMetrics(
+                    target_config=EthosUConfiguration.load_profile("ethos-u55-256"),
+                    npu_cycles=None,
+                    memory_usage=MemoryUsage(
+                        sram_memory_area_size=10,
+                        dram_memory_area_size=0,
+                        unknown_memory_area_size=0,
+                        on_chip_flash_memory_area_size=0,
+                        off_chip_flash_memory_area_size=20,
+                        memory_size_type=MemorySizeType.KILOBYTES,
+                    ),
+                    layerwise_perf_info=LayerwisePerfInfo(
+                        layerwise_info=[
+                            LayerPerfInfo(
+                                name="Test Layer",
+                                tflite_operator="test_operator",
+                                sram_usage=0,
+                                op_cycles=0.0,
+                                npu_cycles=0.0,
+                                sram_access_cycles=0.0,
+                                dram_access_cycles=0.0,
+                                on_chip_flash_access_cycles=0.0,
+                                off_chip_flash_access_cycles=0.0,
+                                mac_count=0,
+                                util_mac_percentage=0.0,
+                            ),
+                            LayerPerfInfo(
+                                name="Test Layer",
+                                tflite_operator="test_operator",
+                                sram_usage=0,
+                                op_cycles=0.0,
+                                npu_cycles=0.0,
+                                sram_access_cycles=0.0,
+                                dram_access_cycles=0.0,
+                                on_chip_flash_access_cycles=0.0,
+                                off_chip_flash_access_cycles=0.0,
+                                mac_count=0,
+                                util_mac_percentage=0.0,
+                            ),
+                        ]
+                    ),
+                )
+            ],
+            """
+Performance metrics:
+┌─────────────────────┬──────────────┬──────┐
+│ Metric              │ Value        │ Unit │
+╞═════════════════════╪══════════════╪══════╡
+│ SRAM used           │        10.00 │ KiB  │
+├─────────────────────┼──────────────┼──────┤
+│ Off-chip flash used │        20.00 │ KiB  │
+└─────────────────────┴──────────────┴──────┘
+IMPORTANT: The performance figures above refer to NPU only
+Layer-Wise Metrics:
+┌────────────────┬─────────────────┬──────────────┬──────────────┬──────────────┬──────────────┬──────────────┬──────────────┬──────────────┬──────────────┬──────────────┐
+│ Layer Name     │ TFLite Operator │ SRAM Usage   │ OP Cycles    │ NPU Cycles   │ SRAM AC      │ DRAM AC      │ OnFlash AC   │ OffFlash AC  │ MAC Count    │ MAC Util (%) │
+╞════════════════╪═════════════════╪══════════════╪══════════════╪══════════════╪══════════════╪══════════════╪══════════════╪══════════════╪══════════════╪══════════════╡
+│ Test Layer     │ test_operator   │            0 │         0.00 │         0.00 │         0.00 │         0.00 │         0.00 │         0.00 │            0 │         0.00 │
+├────────────────┼─────────────────┼──────────────┼──────────────┼──────────────┼──────────────┼──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
+│ Test Layer (1) │ test_operator   │            0 │         0.00 │         0.00 │         0.00 │         0.00 │         0.00 │         0.00 │            0 │         0.00 │
+└────────────────┴─────────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────────┘
+""".strip(),
+            {
+                "performance_metrics": [
+                    {"metric": "SRAM used", "value": 10, "unit": "KiB"},
+                    {"metric": "Off-chip flash used", "value": 20, "unit": "KiB"},
+                ],
+                "layerwise_metrics": [
+                    {
+                        "name": "Test Layer",
+                        "tflite_operator": "test_operator",
+                        "sram_usage": 0,
+                        "op_cycles": 0.0,
+                        "npu_cycles": 0.0,
+                        "sram_access_cycles": 0.0,
+                        "dram_access_cycles": 0.0,
+                        "on_chip_flash_access_cycles": 0.0,
+                        "off_chip_flash_access_cycles": 0.0,
+                        "mac_count": 0,
+                        "util_mac_percentage": 0.0,
+                    },
+                    {
+                        "name": "Test Layer (1)",
+                        "tflite_operator": "test_operator",
+                        "sram_usage": 0,
+                        "op_cycles": 0.0,
+                        "npu_cycles": 0.0,
+                        "sram_access_cycles": 0.0,
+                        "dram_access_cycles": 0.0,
+                        "on_chip_flash_access_cycles": 0.0,
+                        "off_chip_flash_access_cycles": 0.0,
+                        "mac_count": 0,
+                        "util_mac_percentage": 0.0,
+                    },
+                ],
+            },
+        ),
+    ],
+)
+# pylint: enable=line-too-long
+def test_report_perf_metrics(
+    perf_metrics: PerformanceMetrics,
+    expected_plain_text: str,
+    expected_json_dict: dict,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Test report_perf_metrics formatter."""
+    monkeypatch.setenv("COLUMNS", "5000")
+    report = report_perf_metrics(perf_metrics)
+    assert isinstance(report, CompoundReport)
+    plain_text = remove_ascii_codes(report.to_plain_text())
+    assert plain_text == expected_plain_text
+    json_dict = report.to_json()
+    assert json_dict == expected_json_dict
+
+
 @pytest.mark.parametrize(
     "ops, expected_plain_text, expected_json_dict",
     [
-- 
cgit v1.2.1