From 6e13113b5d1a1d8afe00c8a577a014db7df5b0a4 Mon Sep 17 00:00:00 2001 From: Kaushik Varadharajan Date: Fri, 14 Jun 2024 00:05:37 +0000 Subject: Add Pytest unit tests for serialization library Tests run with `pytest` command in repository's root directory. Signed-off-by: Kaushik Varadharajan Change-Id: I9d8a13b233ffafc9119ef503d46f7808461a194f --- python/pytests/conftest.py | 51 +++- .../examples/test_conv2d_256x256x3_f32.json | 233 ++++++++++++++++ .../examples/test_select_2048x2048x3_i8.json | 86 ++++++ python/pytests/test_example.py | 144 +++++----- python/pytests/test_single_attr.py | 225 ++++++++++++++++ python/pytests/test_single_op.py | 104 ++++++++ python/pytests/test_single_tensor.py | 293 +++++++++++++++++++++ 7 files changed, 1069 insertions(+), 67 deletions(-) create mode 100644 python/pytests/examples/test_conv2d_256x256x3_f32.json create mode 100644 python/pytests/examples/test_select_2048x2048x3_i8.json create mode 100644 python/pytests/test_single_attr.py create mode 100644 python/pytests/test_single_op.py create mode 100644 python/pytests/test_single_tensor.py (limited to 'python') diff --git a/python/pytests/conftest.py b/python/pytests/conftest.py index b595a01..1cb0857 100644 --- a/python/pytests/conftest.py +++ b/python/pytests/conftest.py @@ -16,6 +16,8 @@ import pathlib import shutil +import subprocess +import pytest def pytest_sessionstart(): @@ -24,13 +26,50 @@ def pytest_sessionstart(): base_dir = (pathlib.Path(__file__).parent / "../..").resolve() tmp_dir = base_dir / "python/pytests/tmp" - tmp_dir.mkdir(exist_ok=True) + if tmp_dir.exists(): + shutil.rmtree(tmp_dir) + tmp_dir.mkdir() + # Using flatc and flatbuffers' reflection feature to convert tosa.fbs to + # json for easier reading + flatbuffers_dir = base_dir / "third_party/flatbuffers" + flatc = flatbuffers_dir / "flatc" + reflection_fbs = flatbuffers_dir / "reflection/reflection.fbs" + tosa_fbs = base_dir / "schema/tosa.fbs" -def pytest_sessionfinish(): - """Cleaning up temporary files.""" + # Using flatbuffers reflection to serialize the TOSA flatbuffers schema + # into binary + _ = subprocess.run( + [flatc, "--binary", "--schema", "-o", tmp_dir, tosa_fbs], check=True + ) - base_dir = (pathlib.Path(__file__).parent / "../..").resolve() - tmp_dir = base_dir / "python/pytests/tmp" + # This file is generated by the previous command + tosa_bfbs = tmp_dir / "tosa.bfbs" + + # Deserializing the binary into JSON using the reflection schema + _ = subprocess.run( + [ + flatc, + "--json", + "--strict-json", + "-o", + tmp_dir, + reflection_fbs, + "--", + tosa_bfbs, + ], + check=True, + ) + + +def pytest_addoption(parser: pytest.Parser): + parser.addoption("--leave-tmp", dest="leave_tmp", action="store_true") + + +def pytest_sessionfinish(session: pytest.Session): + """Cleaning up temporary files, unless the --leave-tmp flag is set""" - shutil.rmtree(tmp_dir) + if not session.config.option.leave_tmp: + base_dir = (pathlib.Path(__file__).parent / "../..").resolve() + tmp_dir = base_dir / "python/pytests/tmp" + shutil.rmtree(tmp_dir) diff --git a/python/pytests/examples/test_conv2d_256x256x3_f32.json b/python/pytests/examples/test_conv2d_256x256x3_f32.json new file mode 100644 index 0000000..5db0e03 --- /dev/null +++ b/python/pytests/examples/test_conv2d_256x256x3_f32.json @@ -0,0 +1,233 @@ +{ + "version": { + "_major": 1, + "_minor": 1, + "_patch": 0, + "_draft": true + }, + "regions": [ + { + "name": "main", + "blocks": [ + { + "name": "main", + "operators": [ + { + "op": "CONST", + "attribute_type": "NONE", + "inputs": [], + "outputs": [ + "const-1" + ] + }, + { + "op": "CONST", + "attribute_type": "NONE", + "inputs": [], + "outputs": [ + "const-2" + ] + }, + { + "op": "CONV2D", + "attribute_type": "ConvAttribute", + "attribute": { + "pad": [ + 2, + 2, + 2, + 2 + ], + "stride": [ + 1, + 1 + ], + "dilation": [ + 1, + 1 + ], + "input_zp": 0, + "weight_zp": 0, + "local_bound": false, + "acc_type": "FP32" + }, + "inputs": [ + "input-0", + "const-1", + "const-2" + ], + "outputs": [ + "result-0" + ] + } + ], + "tensors": [ + { + "name": "const-1", + "shape": [ + 5, + 5 + ], + "type": "FP32", + "data": [ + 0, + 0, + 128, + 63, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 128, + 63, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 128, + 63, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 128, + 63, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 128, + 63 + ], + "variable": false, + "is_unranked": false + }, + { + "name": "const-2", + "shape": [ + 3 + ], + "type": "FP32", + "data": [ + 0, + 0, + 128, + 63, + 0, + 0, + 128, + 63, + 0, + 0, + 128, + 63 + ], + "variable": false, + "is_unranked": false + }, + { + "name": "input-0", + "shape": [ + 256, + 256, + 3 + ], + "type": "FP32", + "variable": false, + "is_unranked": false + }, + { + "name": "result-0", + "shape": [ + 256, + 256, + 3 + ], + "type": "FP32", + "variable": false, + "is_unranked": false + } + ], + "inputs": [ + "input-0" + ], + "outputs": [ + "result-0" + ] + } + ] + } + ] +} \ No newline at end of file diff --git a/python/pytests/examples/test_select_2048x2048x3_i8.json b/python/pytests/examples/test_select_2048x2048x3_i8.json new file mode 100644 index 0000000..510e5f6 --- /dev/null +++ b/python/pytests/examples/test_select_2048x2048x3_i8.json @@ -0,0 +1,86 @@ +{ + "version": { + "_major": 1, + "_minor": 1, + "_patch": 0, + "_draft": true + }, + "regions": [ + { + "name": "main", + "blocks": [ + { + "name": "main", + "operators": [ + { + "op": "SELECT", + "attribute_type": "NONE", + "inputs": [ + "input-0", + "input-1", + "input-2" + ], + "outputs": [ + "result-0" + ] + } + ], + "tensors": [ + { + "name": "input-0", + "shape": [ + 2048, + 2048, + 1 + ], + "type": "BOOL", + "variable": false, + "is_unranked": false + }, + { + "name": "input-1", + "shape": [ + 2048, + 2048, + 3 + ], + "type": "INT8", + "variable": false, + "is_unranked": false + }, + { + "name": "input-2", + "shape": [ + 2048, + 2048, + 3 + ], + "type": "INT8", + "variable": false, + "is_unranked": false + }, + { + "name": "result-0", + "shape": [ + 2048, + 2048, + 3 + ], + "type": "INT8", + "variable": false, + "is_unranked": false + } + ], + "inputs": [ + "input-0", + "input-1", + "input-2" + ], + "outputs": [ + "result-0" + ] + } + ] + } + ] +} \ No newline at end of file diff --git a/python/pytests/test_example.py b/python/pytests/test_example.py index e03997b..2b4de9f 100644 --- a/python/pytests/test_example.py +++ b/python/pytests/test_example.py @@ -18,78 +18,100 @@ import json import pathlib import subprocess import serializer.tosa_serializer as ts +import numpy as np -def test_example(request): - """Testing that pytest and the Python serialization library work""" +def serialize_and_load_json(ser: ts.TosaSerializer, tosa_filename) -> dict: + base_dir = (pathlib.Path(__file__).parent / "../..").resolve() + flatc = base_dir / "third_party/flatbuffers/flatc" + tosa_fbs = base_dir / "schema/tosa.fbs" - # Creating an example TOSA region - ser = ts.TosaSerializer("ser") - ser.currRegion.currBasicBlock.addTensor("t1", [3, 4, 5], ts.DType.FP16) - ser.currRegion.currBasicBlock.addTensor("t2", [2, 2], ts.DType.INT32) - ser.currRegion.currBasicBlock.addInput("t1") - ser.currRegion.currBasicBlock.addOutput("t2") + # Serializing to flatbuffer and writing to a temporary file + with open(tosa_filename, "wb") as f: + f.write(ser.serialize()) - attr = ts.TosaSerializerAttribute() - attr.ConvAttribute([1, 1], [2, 2], [3, 3], 4, 5, True, ts.DType.FP32) - ser.currRegion.currBasicBlock.addOperator( - ts.TosaOp.Op().CONV2D, ["t1"], ["t2"], attr + # Using flatc to convert the flatbuffer to strict json + _ = subprocess.run( + [ + flatc, + "--json", + "--strict-json", + "--defaults-json", + "-o", + tosa_filename.parent, + tosa_fbs, + "--", + tosa_filename, + ], + check=True, ) - # Defining filepaths + assert str(tosa_filename).endswith(".tosa") + json_filename = str(tosa_filename).removesuffix(".tosa") + ".json" + + with open(json_filename, encoding="utf-8") as f: + return json.load(f) + + +def test_example_select(request): + + base_dir = (pathlib.Path(__file__).parent / "../..").resolve() + tmp_dir = base_dir / "python/pytests/tmp" testname = request.node.name + ser = ts.TosaSerializer(tmp_dir / testname) + (tmp_dir / testname).mkdir(exist_ok=True) + + input_0 = ts.TosaSerializerTensor("input-0", [2048, 2048, 1], ts.DType.BOOL) + input_1 = ts.TosaSerializerTensor("input-1", [2048, 2048, 3], ts.DType.INT8) + input_2 = ts.TosaSerializerTensor("input-2", [2048, 2048, 3], ts.DType.INT8) + + ser.addInputTensor(input_0) + ser.addInputTensor(input_1) + ser.addInputTensor(input_2) + + result_0 = ser.addOutput([2048, 2048, 3], ts.DType.INT8) + + ser.addOperator( + ts.TosaOp.Op().SELECT, ["input-0", "input-1", "input-2"], result_0.name + ) + + serialized = serialize_and_load_json(ser, tmp_dir / testname / f"{testname}.tosa") + + with open( + base_dir / "python/pytests/examples/test_select_2048x2048x3_i8.json" + ) as f: + expected = json.load(f) + + assert serialized["regions"] == expected["regions"] + + +def test_example_conv2d(request): + """Testing that pytest and the Python serialization library work""" + + # Defining filepaths base_dir = (pathlib.Path(__file__).parent / "../..").resolve() tmp_dir = base_dir / "python/pytests/tmp" - tosa_file = tmp_dir / f"{testname}.tosa" - schema_file = base_dir / "schema/tosa.fbs" - flatc = base_dir / "third_party/flatbuffers/flatc" + testname = request.node.name - # Serializing to flatbuffer and writing to a temporary file - with open(tosa_file, "wb") as f: - f.write(ser.serialize()) + ser = ts.TosaSerializer(tmp_dir / testname) + (tmp_dir / testname).mkdir(exist_ok=True) - # Using flatc to convert the flatbuffer to strict json - _ = subprocess.run( - [flatc, "--json", "--strict-json", "-o", tmp_dir, schema_file, "--", tosa_file], - check=True, + # Creating an example TOSA region + ser.addConst([5, 5], ts.DType.FP32, np.eye(5, 5), "const-1") + ser.addConst([3], ts.DType.FP32, np.ones(3), "const-2") + ser.addInputTensor(ts.TosaSerializerTensor("input-0", [256, 256, 3], ts.DType.FP32)) + ser.addOutput([256, 256, 3], ts.DType.FP32) + + attr = ts.TosaSerializerAttribute() + attr.ConvAttribute([2, 2, 2, 2], [1, 1], [1, 1], 0, 0, False, ts.DType.FP32) + ser.addOperator( + ts.TosaOp.Op().CONV2D, ["input-0", "const-1", "const-2"], ["result-0"], attr ) - # Opening json file generated by previous command - json_file = tmp_dir / f"{testname}.json" - with open(json_file, encoding="utf-8") as f: - serialized = json.load(f) - - assert serialized["regions"] == [ - { - "name": "main", - "blocks": [ - { - "name": "main", - "inputs": ["t1"], - "outputs": ["t2"], - "operators": [ - { - "op": "CONV2D", - "attribute_type": "ConvAttribute", - "attribute": { - "pad": [1, 1], - "stride": [2, 2], - "dilation": [3, 3], - "input_zp": 4, - "weight_zp": 5, - "local_bound": True, - "acc_type": "FP32", - }, - "inputs": ["t1"], - "outputs": ["t2"], - } - ], - "tensors": [ - {"name": "t1", "shape": [3, 4, 5], "type": "FP16"}, - {"name": "t2", "shape": [2, 2], "type": "INT32"}, - ], - } - ], - } - ] + serialized = serialize_and_load_json(ser, tmp_dir / testname / f"{testname}.tosa") + + with open(base_dir / "python/pytests/examples/test_conv2d_256x256x3_f32.json") as f: + expected = json.load(f) + + assert serialized["regions"] == expected["regions"] diff --git a/python/pytests/test_single_attr.py b/python/pytests/test_single_attr.py new file mode 100644 index 0000000..f126be3 --- /dev/null +++ b/python/pytests/test_single_attr.py @@ -0,0 +1,225 @@ +#!/usr/bin/env python3 + +# Copyright (c) 2024, ARM Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import pathlib +import random +import subprocess +import string +import serializer.tosa_serializer as ts +import pytest + + +# In some instances, the Python parameter names in TosaSerializerAttribute +# differ from the argument names in the schema. Here are the discrepancies. +# ('schema': 'python') +FIELD_NAME_REPLACEMENTS = { + # spelling differences + ("TransposeConvAttribute", "out_pad"): "outpad", + ("MatMulAttribute", "a_zp"): "A_zp", + ("MatMulAttribute", "b_zp"): "B_zp", + # these are for a reason; PadAttribute and ClampAttribute have + # inputs that are byte arrays, and the param names reflect this + ("PadAttribute", "pad_const"): "pad_const_val_as_bytes", + ("ClampAttribute", "min_val"): "min_val_as_bytes", + ("ClampAttribute", "max_val"): "max_val_as_bytes", +} + +# When converting the tosa schema to json, the enums are lost and +# replaced with UInt, so the enum names are hard-coded here. +ENUM_FIELDS = { + ("ConvAttribute", "acc_type"): "DType", + ("PoolAttribute", "acc_type"): "DType", + ("TransposeConvAttribute", "acc_type"): "DType", + ("ResizeAttribute", "mode"): "ResizeMode", +} + + +def get_attributes(): + base_dir = (pathlib.Path(__file__).parent / "../..").resolve() + tmp_dir = base_dir / "python/pytests/tmp" + + # Helper function for querying lists of dictionaries for a value + def select(data: list[dict], key, value) -> dict: + return next(filter(lambda item: item[key] == value, data), None) + + with open(tmp_dir / "tosa.json", encoding="utf-8") as f: + tosa_schema = json.load(f) + + attribute_info = select( + tosa_schema["enums"], + "name", + "tosa.Attribute", + )["values"] + + for i in attribute_info: + # The library doesn't support custom or none attributes. + if i["name"] not in ["NONE", "CustomAttribute"]: + yield i["name"] + + +@pytest.mark.parametrize("attribute_name", get_attributes()) +def test_single_attr(request, attribute_name): + """ + Creating an attribute of each type, serializing, deserializing, and + checking that arguments are preserved. + """ + + base_dir = (pathlib.Path(__file__).parent / "../..").resolve() + tmp_dir = base_dir / "python/pytests/tmp" + testname = request.node.name + (tmp_dir / testname).mkdir(exist_ok=True) + + flatc = base_dir / "third_party/flatbuffers/flatc" + tosa_fbs = base_dir / "schema/tosa.fbs" + + with open(tmp_dir / "tosa.json", encoding="utf-8") as f: + tosa_schema = json.load(f) + + # Defining temp filepaths + testname = request.node.name + tosa_file = tmp_dir / testname / f"{testname}.tosa" + json_file = tmp_dir / testname / f"{testname}.json" + + # Helper function for querying lists of dictionaries for a value + def select(data: list[dict], key, value) -> dict: + return next(filter(lambda item: item[key] == value, data), None) + + # Creating example tensors to reference in the operator + ser = ts.TosaSerializer(tmp_dir / testname) + ser.currRegion.currBasicBlock.addTensor("t1", [1], ts.DType.INT32) + ser.currRegion.currBasicBlock.addTensor("t2", [1], ts.DType.INT32) + ser.currRegion.currBasicBlock.addInput("t1") + ser.currRegion.currBasicBlock.addOutput("t2") + + # 'py_kwargs' is what we will pass to the Python function to + # create the attribute, while 'expected' is what we expect + # to see from the graph serialized as json. + # So 'py_kwargs' needs to worry about the FIELD_NAME_REPLACEMENTS, + # but we use the unaltered field names from the schema for 'expected'. + expected = {} + py_kwargs = {} + + if attribute_name in ["PadAttribute", "ClampAttribute"]: + py_kwargs["serializer_builder"] = ser.builder + + # Getting the fields of the attribute from the schema + fields = select( + tosa_schema["objects"], + "name", + f"tosa.{attribute_name}", + )["fields"] + for field in fields: + if field.get("deprecated", False): + continue + + field_name = field["name"] + kwarg = FIELD_NAME_REPLACEMENTS.get( + (attribute_name, field_name), + field_name, + ) + + # Randomly generating the field based on type + + if (attribute_name, field_name) in ENUM_FIELDS: + enum_name = ENUM_FIELDS[(attribute_name, field_name)] + enum = select( + tosa_schema["enums"], + "name", + f"tosa.{enum_name}", + )["values"] + choice = random.choice(enum) + + py_kwargs[kwarg] = choice["value"] + expected[field_name] = choice["name"] + continue + + field_type = field["type"] + + if field_type["base_type"] == "Vector" and field_type["element"] == "UByte": + py_kwargs[kwarg] = random.randbytes(random.randint(1, 16)) + # json stores bytes as list[uint8] + expected[field_name] = list(py_kwargs[kwarg]) + elif field_type["base_type"] == "Vector" and field_type["element"] == "Int": + expected[field_name] = py_kwargs[kwarg] = random.sample( + range(-(2**31), 2**31), random.randint(1, 16) + ) + elif field_type["base_type"] == "Vector" and field_type["element"] == "Short": + expected[field_name] = py_kwargs[kwarg] = random.sample( + range(-(2**15), 2**15), random.randint(1, 16) + ) + elif field_type["base_type"] == "Int": + expected[field_name] = py_kwargs[kwarg] = random.randint( + -(2**31), 2**31 - 1 + ) + elif field_type["base_type"] == "Bool": + expected[field_name] = py_kwargs[kwarg] = random.choice( + [True, False], + ) + elif field_type["base_type"] == "String": + expected[field_name] = py_kwargs[kwarg] = "".join( + random.choices( + string.ascii_uppercase + string.digits, + k=random.randint(1, 16), + ) + ) + else: + raise NotImplementedError( + f"{attribute_name}.{field_name} is of an unknown type and random " + "arguments couldn't be generated for testing. If it uses an enum, " + f"consider adding to ENUM_FIELDS. {field_type}" + ) + + # Creating the attribute and adding it to the serializer + attr = ts.TosaSerializerAttribute() + + # This line calls the attribute function, + # e.g. attr.ConvAttribute(pad=[...], ...) + getattr(attr, attribute_name)(**py_kwargs) + + ser.currRegion.currBasicBlock.addOperator( + ts.TosaOp.Op().UNKNOWN, ["t1"], ["t2"], attr + ) + # TODO: we use Op.UNKNOWN since there's no easy mapping + # for attribute <-> operator. Op is just a uint so we're + # not losing much coverage, but this would be useful + + # Serializing to flatbuffer and writing to a temporary file + with open(tosa_file, "wb") as f: + f.write(ser.serialize()) + + # Using flatc to convert the flatbuffer to strict json + _ = subprocess.run( + [ + flatc, + "--json", + "--strict-json", + "--defaults-json", + "-o", + tosa_file.parent, + tosa_fbs, + "--", + tosa_file, + ], + check=True, + ) + + with open(json_file, encoding="utf-8") as f: + serialized = json.load(f) + + # Getting the arguments of the attribute that we serialized + new_attr = serialized["regions"][0]["blocks"][0]["operators"][0]["attribute"] + assert expected == new_attr diff --git a/python/pytests/test_single_op.py b/python/pytests/test_single_op.py new file mode 100644 index 0000000..81ee094 --- /dev/null +++ b/python/pytests/test_single_op.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python3 + +# Copyright (c) 2024, ARM Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import pathlib +import subprocess +import serializer.tosa_serializer as ts +import pytest + + +def get_ops(): + base_dir = (pathlib.Path(__file__).parent / "../..").resolve() + tmp_dir = base_dir / "python/pytests/tmp" + + # Helper function for querying lists of dictionaries for a value + def select(data: list[dict], key, value) -> dict: + return next(filter(lambda item: item[key] == value, data), None) + + with open(tmp_dir / "tosa.json", encoding="utf-8") as f: + tosa_schema = json.load(f) + + op_info = select(tosa_schema["enums"], "name", "tosa.Op")["values"] + + for i in op_info: + yield i["name"] + + +@pytest.mark.parametrize("op_name", get_ops()) +def test_single_op(request, op_name): + """ + Creating an operator of each type with empty input and output tensors + and an empty attribute, serializing, deserializing, and checking that + arguments are preserved. + """ + + base_dir = (pathlib.Path(__file__).parent / "../..").resolve() + tmp_dir = base_dir / "python/pytests/tmp" + testname = request.node.name + (tmp_dir / testname).mkdir(exist_ok=True) + + flatc = base_dir / "third_party/flatbuffers/flatc" + tosa_fbs = base_dir / "schema/tosa.fbs" + + # Defining temp filepaths + tosa_file = tmp_dir / testname / f"{testname}.tosa" + json_file = tmp_dir / testname / f"{testname}.json" + + # Creating example tensors to reference in the operator + ser = ts.TosaSerializer(tmp_dir) + ser.currRegion.currBasicBlock.addTensor("t1", [1], ts.DType.INT32) + ser.currRegion.currBasicBlock.addTensor("t2", [1], ts.DType.INT32) + ser.currRegion.currBasicBlock.addInput("t1") + ser.currRegion.currBasicBlock.addOutput("t2") + + # Adding an operator of the given op_name. + ser.currRegion.currBasicBlock.addOperator( + getattr(ts.TosaOp.Op(), op_name), ["t1"], ["t2"], None + ) + + # Serializing to flatbuffer and writing to a temporary file + with open(tosa_file, "wb") as f: + f.write(ser.serialize()) + + # Using flatc to convert the flatbuffer to strict json + _ = subprocess.run( + [ + flatc, + "--json", + "--strict-json", + "--defaults-json", + "-o", + tosa_file.parent, + tosa_fbs, + "--", + tosa_file, + ], + check=True, + ) + + with open(json_file, encoding="utf-8") as f: + serialized = json.load(f) + + # Getting the arguments of the operator that we serialized + new_op = serialized["regions"][0]["blocks"][0]["operators"][0] + + assert new_op == { + "attribute_type": "NONE", + "inputs": ["t1"], + "outputs": ["t2"], + "op": op_name, + } diff --git a/python/pytests/test_single_tensor.py b/python/pytests/test_single_tensor.py new file mode 100644 index 0000000..f665161 --- /dev/null +++ b/python/pytests/test_single_tensor.py @@ -0,0 +1,293 @@ +#!/usr/bin/env python3 + +# Copyright (c) 2024, ARM Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import pathlib +import random +import subprocess +import serializer.tosa_serializer as ts +import pytest +import numpy as np +from ml_dtypes import bfloat16, float8_e4m3fn, float8_e5m2, int4, finfo, iinfo + + +def generate_random_data(dtype_str): + # Creating the random data. + + shape = random.sample(range(1, 16), random.randint(1, 3)) + + FLOAT_TYPES = { + "FP32": np.float32, + "FP16": np.float16, + "BF16": bfloat16, + "FP8E4M3": float8_e4m3fn, + "FP8E5M2": float8_e5m2, + } + INT_TYPES = { + "INT4": int4, + "INT8": np.int8, + "INT16": np.int16, + "INT32": np.int32, + "UINT16": np.uint16, + "UINT8": np.uint8, + } + + if dtype_str in FLOAT_TYPES: + py_dtype = FLOAT_TYPES[dtype_str] + data = np.random.uniform( + finfo(py_dtype).min, finfo(py_dtype).max, shape + ).astype(py_dtype) + + # Generating -inf, inf, -nan, nan with a 10% chance each. + # Note that fp8e4m3 doesn't have infinities so they become NaN + mask = np.random.rand(*shape) + data = np.select( + [mask < 0.1, mask < 0.2, mask < 0.3, mask < 0.4], + np.array([-np.inf, np.inf, -np.nan, np.nan]).astype(py_dtype), + data, + ) + + elif dtype_str in INT_TYPES: + py_dtype = INT_TYPES[dtype_str] + data = np.random.uniform( + iinfo(py_dtype).min, iinfo(py_dtype).max, shape + ).astype(py_dtype) + elif dtype_str == "BOOL": + py_dtype = bool + data = (np.random.rand(*shape) >= 0.5).astype(bool) + elif dtype_str == "INT48": + py_dtype = np.int64 + data = np.random.uniform(-(2**47), 2**47 - 1, shape).astype(py_dtype) + elif dtype_str == "SHAPE": + py_dtype = np.int64 + data = np.random.uniform( + iinfo(py_dtype).min, iinfo(py_dtype).max, shape + ).astype(py_dtype) + else: + raise NotImplementedError( + f"Random tensor generation for type {dtype_str} not implemented" + ) + + return data, shape, py_dtype + + +def serialize_and_load_json(ser: ts.TosaSerializer, tosa_filename) -> dict: + base_dir = (pathlib.Path(__file__).parent / "../..").resolve() + flatc = base_dir / "third_party/flatbuffers/flatc" + tosa_fbs = base_dir / "schema/tosa.fbs" + + # Serializing to flatbuffer and writing to a temporary file + with open(tosa_filename, "wb") as f: + f.write(ser.serialize()) + + # Using flatc to convert the flatbuffer to strict json + _ = subprocess.run( + [ + flatc, + "--json", + "--strict-json", + "--defaults-json", + "-o", + tosa_filename.parent, + tosa_fbs, + "--", + tosa_filename, + ], + check=True, + ) + + assert str(tosa_filename).endswith(".tosa") + json_filename = str(tosa_filename).removesuffix(".tosa") + ".json" + + with open(json_filename, encoding="utf-8") as f: + return json.load(f) + + +@pytest.mark.parametrize("dtype_str", ts.DTypeNames) +def test_single_intermediate(request, dtype_str): + """ + Creating an intermediate tensor of each dtype + """ + + base_dir = (pathlib.Path(__file__).parent / "../..").resolve() + tmp_dir = base_dir / "python/pytests/tmp" + testname = request.node.name + + # Creating a new folder for each test case to dump numpy files + ser = ts.TosaSerializer(tmp_dir / testname) + (tmp_dir / testname).mkdir(exist_ok=True) + + shape = random.sample(range(1, 2**31), random.randint(1, 16)) + tensor = ser.addIntermediate(shape, ts.dtype_str_to_val(dtype_str)) + + assert tensor.dtype == ts.dtype_str_to_val(dtype_str) + assert tensor.shape == shape + + serialized = serialize_and_load_json(ser, tmp_dir / testname / f"{testname}.tosa") + + tensor_serialized = serialized["regions"][0]["blocks"][0]["tensors"][0] + + assert ( + tensor_serialized.items() + >= { + "is_unranked": False, + "shape": shape, + "type": dtype_str, + "variable": False, + }.items() + ) + + +def placeholder_cases(): + for dtype_str in ts.DTypeNames: + # The ml_dtypes library has issues with serializing FP8E5M2 to .npy + # files, so we don't test it. + if dtype_str in ["UNKNOWN", "FP8E5M2"]: + continue + yield dtype_str + + +@pytest.mark.parametrize("dtype_str", placeholder_cases()) +def test_single_placeholder(request, dtype_str): + """ + Creating a placeholder tensor of each dtype. The data of these placeholder + tensors is saved in .npy files. + """ + + base_dir = (pathlib.Path(__file__).parent / "../..").resolve() + tmp_dir = base_dir / "python/pytests/tmp" + testname: str = request.node.name + + data, shape, py_dtype = generate_random_data(dtype_str) + + # Creating a new folder for each test case to dump numpy files + ser = ts.TosaSerializer(tmp_dir / testname) + (tmp_dir / testname).mkdir(exist_ok=True) + + tensor = ser.addPlaceholder(shape, ts.dtype_str_to_val(dtype_str), data) + + serialized = serialize_and_load_json(ser, tmp_dir / testname / f"{testname}.tosa") + + tensor_serialized = serialized["regions"][0]["blocks"][0]["tensors"][0] + + assert ( + tensor_serialized.items() + >= { + "is_unranked": False, + "shape": shape, + "type": dtype_str, + "variable": False, + }.items() + ) + + npy_data = np.load( + ser.currRegion.pathPrefix / tensor.placeholderFilename, + ).view(py_dtype) + + assert np.array_equal(npy_data, data, equal_nan=True) + + +def const_cases(): + for dtype_str in ts.DTypeNames: + for const_mode in ts.ConstMode.__members__.values(): + # We don't support uint8 or uint16 serialization to flatbuffer; + # see convertDataToUint8Vec + if dtype_str in ["UNKNOWN", "UINT8", "UINT16"]: + continue + # The ml_dtypes library has issues with serializing FP8E5M2 to + # .npy files, so we don't test it. + if dtype_str == "FP8E5M2" and const_mode != ts.ConstMode.EMBED: + continue + yield dtype_str, const_mode + + +@pytest.mark.parametrize("dtype_str,const_mode", const_cases()) +def test_single_const(request, dtype_str, const_mode): + """ + Creating a const tensor of each dtype. The data of these placeholder + tensors is saved in .npy files and/or the flatbuffer itself, depending + on the const mode. + """ + + base_dir = (pathlib.Path(__file__).parent / "../..").resolve() + tmp_dir = base_dir / "python/pytests/tmp" + testname = request.node.name + + data, shape, py_dtype = generate_random_data(dtype_str) + + # Creating a new folder for each test case to dump numpy files + ser = ts.TosaSerializer(tmp_dir / testname, constMode=const_mode) + (tmp_dir / testname).mkdir(exist_ok=True) + + tensor = ser.addConst(shape, ts.dtype_str_to_val(dtype_str), data) + + serialized = serialize_and_load_json(ser, tmp_dir / testname / f"{testname}.tosa") + + tensor_serialized = serialized["regions"][0]["blocks"][0]["tensors"][0] + + assert ( + tensor_serialized.items() + >= { + "is_unranked": False, + "shape": shape, + "type": dtype_str, + "variable": False, + }.items() + ) + + # Testing if data is correctly serialized to .npy + if const_mode in [ts.ConstMode.INPUTS, ts.ConstMode.EMBED_DUMP]: + npy_data = np.load( + ser.currRegion.pathPrefix / f"{tensor.name}.npy", + ).view(py_dtype) + assert np.array_equal(npy_data, data, equal_nan=True) + + # Testing if data is correctly serialized as bytes to flatbuffer. + if const_mode in [ts.ConstMode.EMBED, ts.ConstMode.EMBED_DUMP]: + u8_data = np.array(tensor_serialized["data"], dtype=np.uint8) + + # Note that TOSA flatbuffer INT/SHAPE serialization is ALWAYS + # little-endian regardless of the system byteorder; see + # TosaSerializer.convertDataToUint8Vec. So all + # uses of .view() here are forced little-endian. + + if dtype_str == "INT48": + assert np.array_equal( + np.bitwise_and(data, 0x0000_FFFF_FFFF_FFFF), + np.pad(u8_data.reshape(-1, 6), ((0, 0), (0, 2))) + .view(np.dtype("> 4).astype(int4) + alternating = np.ravel((first, second), order="F").copy() + + # There could be an extra int4 added for padding, so we check + # that the flatbuffer array's size is correct and then force + # it to the shape we want + assert alternating.size == (np.prod(shape) + 1) // 2 * 2 + assert np.array_equal(data, np.resize(alternating, shape)) + else: + assert np.array_equal( + data, + u8_data.view( + np.dtype(py_dtype).newbyteorder("<") + # forced little-endian + ).reshape(shape), + equal_nan=True, + ) -- cgit v1.2.1