# SPDX-FileCopyrightText: Copyright 2022, Arm Limited and/or its affiliates. # SPDX-License-Identifier: Apache-2.0 """Tests for the output parsing.""" import base64 import json from typing import Any from typing import Dict import pytest from aiet.backend.output_parser import Base64OutputParser from aiet.backend.output_parser import OutputParser from aiet.backend.output_parser import RegexOutputParser OUTPUT_MATCH_ALL = bytearray( """ String1: My awesome string! String2: STRINGS_ARE_GREAT!!! Int: 12 Float: 3.14 """, encoding="utf-8", ) OUTPUT_NO_MATCH = bytearray( """ This contains no matches... Test1234567890!"£$%^&*()_+@~{}[]/.,<>?| """, encoding="utf-8", ) OUTPUT_PARTIAL_MATCH = bytearray( "String1: My awesome string!", encoding="utf-8", ) REGEX_CONFIG = { "FirstString": {"pattern": r"String1.*: (.*)", "type": "str"}, "SecondString": {"pattern": r"String2.*: (.*)!!!", "type": "str"}, "IntegerValue": {"pattern": r"Int.*: (.*)", "type": "int"}, "FloatValue": {"pattern": r"Float.*: (.*)", "type": "float"}, } EMPTY_REGEX_CONFIG: Dict[str, Dict[str, Any]] = {} EXPECTED_METRICS_ALL = { "FirstString": "My awesome string!", "SecondString": "STRINGS_ARE_GREAT", "IntegerValue": 12, "FloatValue": 3.14, } EXPECTED_METRICS_PARTIAL = { "FirstString": "My awesome string!", } class TestRegexOutputParser: """Collect tests for the RegexOutputParser.""" @staticmethod @pytest.mark.parametrize( ["output", "config", "expected_metrics"], [ (OUTPUT_MATCH_ALL, REGEX_CONFIG, EXPECTED_METRICS_ALL), (OUTPUT_MATCH_ALL + OUTPUT_NO_MATCH, REGEX_CONFIG, EXPECTED_METRICS_ALL), (OUTPUT_MATCH_ALL + OUTPUT_NO_MATCH, REGEX_CONFIG, EXPECTED_METRICS_ALL), ( OUTPUT_MATCH_ALL + OUTPUT_PARTIAL_MATCH, REGEX_CONFIG, EXPECTED_METRICS_ALL, ), (OUTPUT_NO_MATCH, REGEX_CONFIG, {}), (OUTPUT_MATCH_ALL, EMPTY_REGEX_CONFIG, {}), (bytearray(), EMPTY_REGEX_CONFIG, {}), (bytearray(), REGEX_CONFIG, {}), ], ) def test_parsing(output: bytearray, config: Dict, expected_metrics: Dict) -> None: """ Make sure the RegexOutputParser yields valid results. I.e. return an empty dict if either the input or the config is empty and return the parsed metrics otherwise. """ parser = RegexOutputParser(name="Test", regex_config=config) assert parser.name == "Test" assert isinstance(parser, OutputParser) res = parser(output) assert res == expected_metrics @staticmethod def test_unsupported_type() -> None: """An unsupported type in the regex_config must raise an exception.""" config = {"BrokenMetric": {"pattern": "(.*)", "type": "UNSUPPORTED_TYPE"}} with pytest.raises(TypeError): RegexOutputParser(name="Test", regex_config=config) @staticmethod @pytest.mark.parametrize( "config", ( {"TooManyGroups": {"pattern": r"(\w)(\d)", "type": "str"}}, {"NoGroups": {"pattern": r"\W", "type": "str"}}, ), ) def test_invalid_pattern(config: Dict) -> None: """Exactly one capturing parenthesis is allowed in the regex pattern.""" with pytest.raises(ValueError): RegexOutputParser(name="Test", regex_config=config) @pytest.mark.parametrize( "expected_metrics", [ EXPECTED_METRICS_ALL, EXPECTED_METRICS_PARTIAL, ], ) def test_base64_output_parser(expected_metrics: Dict) -> None: """ Make sure the Base64OutputParser yields valid results. I.e. return an empty dict if either the input or the config is empty and return the parsed metrics otherwise. """ parser = Base64OutputParser(name="Test") assert parser.name == "Test" assert isinstance(parser, OutputParser) def create_base64_output(expected_metrics: Dict) -> bytearray: json_str = json.dumps(expected_metrics, indent=4) json_b64 = base64.b64encode(json_str.encode("utf-8")) return ( OUTPUT_MATCH_ALL # Should not be matched by the Base64OutputParser + f"<{Base64OutputParser.TAG_NAME}>".encode("utf-8") + bytearray(json_b64) + f"".encode("utf-8") + OUTPUT_NO_MATCH # Just to add some difficulty... ) output = create_base64_output(expected_metrics) res = parser(output) assert len(res) == 1 assert isinstance(res, dict) for val in res.values(): assert val == expected_metrics output = parser.filter_out_parsed_content(output) assert output == (OUTPUT_MATCH_ALL + OUTPUT_NO_MATCH)