From 65ba809d7a8b4ddd0a51f6c76ad0afc5f417de07 Mon Sep 17 00:00:00 2001 From: Jeremy Johnson Date: Mon, 9 Oct 2023 16:31:13 +0100 Subject: Data generator library python interface added Added support for using generate library in tosa_verif_build_tests and tosa_verif_run_tests tosa tool scripts. Reduced scope of compliance test creation and verification to the supported type of FP32. Fix missing virtual destructor warning in generate_dot_product.h and add config file for generate library. Simple pytests included to check python interface. Signed-off-by: Jeremy Johnson Change-Id: I6cdad9b00660d6ddc8bd07fdea813937fb48626a --- verif/generator/datagenerator.py | 196 ++++++++++++++++++++++++++++++ verif/generator/tosa_arg_gen.py | 23 ++-- verif/generator/tosa_test_gen.py | 43 ++++--- verif/generator/tosa_utils.py | 5 +- verif/generator/tosa_verif_build_tests.py | 20 +++ 5 files changed, 260 insertions(+), 27 deletions(-) create mode 100644 verif/generator/datagenerator.py (limited to 'verif/generator') diff --git a/verif/generator/datagenerator.py b/verif/generator/datagenerator.py new file mode 100644 index 0000000..408c83e --- /dev/null +++ b/verif/generator/datagenerator.py @@ -0,0 +1,196 @@ +# Copyright (c) 2023, ARM Limited. +# SPDX-License-Identifier: Apache-2.0 +"""Calls the data generation library to create the test data.""" +import ctypes as ct +import json +from pathlib import Path + +import numpy as np +from schemavalidation import schemavalidation + + +class GenerateError(Exception): + """Exception raised for errors performing data generation.""" + + +class GenerateLibrary: + """Python interface to the C generate library.""" + + def __init__(self, generate_lib_path): + """Find the library and set up the interface.""" + self.lib_path = generate_lib_path + if not self.lib_path.is_file(): + raise GenerateError(f"Could not find generate library - {self.lib_path}") + + self.test_desc = None + self.json_config = None + self.lib = ct.cdll.LoadLibrary(self.lib_path) + + self.tgd_generate_data = self.lib.tgd_generate_data + self.tgd_generate_data.argtypes = [ + ct.c_char_p, + ct.c_char_p, + ct.c_void_p, + ct.c_size_t, + ] + self.tgd_generate_data.restype = ct.c_bool + + def check_config(self, test_desc: dict): + """Quick check that the config supports data generation.""" + return ("meta" in test_desc) and ("data_gen" in test_desc["meta"]) + + def set_config(self, test_desc: dict): + """Set the test config in the library. + + test_desc - the test desc.json file + """ + self.test_desc = None + self.json_config = None + + if not self.check_config(test_desc): + raise GenerateError("No meta/data_gen section found in desc.json") + + # Validate the config versus the schema + tdsv = schemavalidation.TestDescSchemaValidator() + tdsv.validate_config(test_desc) + + self.test_desc = test_desc + self.json_config = test_desc["meta"]["data_gen"] + + def _create_buffer(self, dtype: str, shape: tuple): + """Helper to create a buffer of the required type.""" + size = 1 + for dim in shape: + size *= dim + + if dtype == "FP32": + # Create buffer and initialize to zero + buffer = (ct.c_float * size)(0) + size_bytes = size * 4 + else: + raise GenerateError(f"Unsupported data type {dtype}") + + return buffer, size_bytes + + def _data_gen_write( + self, test_path: Path, json_bytes: bytes, ifm_name: str, ifm_file: str + ): + """Generate the named tensor data and save it in numpy format.""" + try: + tensor = self.json_config["tensors"][ifm_name] + dtype = tensor["data_type"] + shape = tuple(tensor["shape"]) + except KeyError as e: + raise GenerateError( + f"Missing data in desc.json for input {ifm_name} - {repr(e)}" + ) + + buffer, size_bytes = self._create_buffer(dtype, shape) + buffer_ptr = ct.cast(buffer, ct.c_void_p) + + result = self.tgd_generate_data( + ct.c_char_p(json_bytes), + ct.c_char_p(bytes(ifm_name, "utf8")), + buffer_ptr, + ct.c_size_t(size_bytes), + ) + if not result: + raise GenerateError("Data generate failed") + + arr = np.ctypeslib.as_array(buffer) + arr = np.reshape(arr, shape) + + file_name = test_path / ifm_file + np.save(file_name, arr) + + def write_numpy_files(self, test_path: Path): + """Write out all the specified tensors to numpy data files.""" + if self.test_desc is None or self.json_config is None: + raise GenerateError("Cannot write numpy files as no config set up") + + try: + ifm_names = self.test_desc["ifm_name"] + ifm_files = self.test_desc["ifm_file"] + except KeyError as e: + raise GenerateError(f"Missing data in desc.json - {repr(e)}") + + json_bytes = bytes(json.dumps(self.json_config), "utf8") + + failures = [] + for iname, ifile in zip(ifm_names, ifm_files): + try: + self._data_gen_write(test_path, json_bytes, iname, ifile) + except GenerateError as e: + failures.append( + f"ERROR: Failed to create data for tensor {iname} - {repr(e)}" + ) + + if len(failures) > 0: + raise GenerateError("\n".join(failures)) + + +def main(argv=None): + """Simple command line interface for the data generator.""" + import argparse + import conformance.model_files as cmf + + parser = argparse.ArgumentParser() + parser.add_argument( + "--generate-lib-path", + type=Path, + help="Path to TOSA generate lib", + ) + parser.add_argument( + "path", type=Path, help="the path to the test directory to generate data for" + ) + args = parser.parse_args(argv) + test_path = args.path + + if args.generate_lib_path is None: + # Try to work out ref model directory and find the verify library + # but this default only works for the python developer environment + # i.e. when using the scripts/py-dev-env.* scripts + # otherwise use the command line option --generate-lib-path to specify path + ref_model_dir = Path(__file__).absolute().parents[2] + args.generate_lib_path = cmf.find_tosa_file( + cmf.TosaFileType.GENERATE_LIBRARY, ref_model_dir, False + ) + + if not test_path.is_dir(): + print(f"ERROR: Invalid directory - {test_path}") + return 2 + + test_desc_path = test_path / "desc.json" + + if not test_desc_path.is_file(): + print(f"ERROR: No test description found: {test_desc_path}") + return 2 + + # Load the JSON desc.json + try: + with test_desc_path.open("r") as fd: + test_desc = json.load(fd) + except Exception as e: + print(f"ERROR: Loading {test_desc_path} - {repr(e)}") + return 2 + + try: + dgl = GenerateLibrary(args.generate_lib_path) + if not dgl.check_config(test_desc): + print(f"WARNING: No data generation supported for {test_path}") + return 2 + + dgl.set_config(test_desc) + except GenerateError as e: + print(f"ERROR: Initializing generate library - {repr(e)}") + return 1 + + try: + dgl.write_numpy_files(test_path) + except GenerateError as e: + print(f"ERROR: Writing out data files to {test_path}\n{repr(e)}") + return 1 + + +if __name__ == "__main__": + exit(main()) diff --git a/verif/generator/tosa_arg_gen.py b/verif/generator/tosa_arg_gen.py index de882ca..3b5d458 100644 --- a/verif/generator/tosa_arg_gen.py +++ b/verif/generator/tosa_arg_gen.py @@ -635,15 +635,13 @@ class TosaTensorValuesGen: # Variable inputs versus constants pCount, cCount = testGen.TOSA_OP_LIST[opName]["operands"] - overrideLazy = False - if not gtu.dtypeIsFloat(dtypeList[0]) and testGen.args.lazy_data_gen: - # TEMPORARY OVERRIDE for integer types - overrideLazy = True + if error_name is not None or not gtu.dtypeIsSupportedByCompliance(dtypeList[0]): + # Fall back to original path when dealing with unsupported types + + # First turn off lazy data gen so we always produce data + lazy_data_gen = testGen.args.lazy_data_gen testGen.args.lazy_data_gen = False - # TODO - Change to generation of data using library! - # For now - we fall back to original path (or when dealing with non-floats) - if not testGen.args.lazy_data_gen: tens_ser_list = TosaTensorValuesGen.tvgDefault( testGen, testGen.TOSA_OP_LIST[opName], @@ -652,9 +650,8 @@ class TosaTensorValuesGen: [], error_name, ) - if overrideLazy: - # Return to lazy mode - testGen.args.lazy_data_gen = True + # Restore lazy data gen setting + testGen.args.lazy_data_gen = lazy_data_gen return TosaTensorValuesGen.TVGInfo(tens_ser_list, None) # Create data generator meta-data @@ -1112,7 +1109,11 @@ class TosaArgGen: @staticmethod def _add_data_generators(testGen, opName, dtype, arg_list, error_name, **kwargs): """Add extra tests for each type of data generator for this op.""" - if error_name is None and "data_gen" in testGen.TOSA_OP_LIST[opName]: + if ( + error_name is None + and "data_gen" in testGen.TOSA_OP_LIST[opName] + and gtu.dtypeIsSupportedByCompliance(dtype) + ): if dtype in [DType.FP16, DType.FP32, DType.BF16]: dataGenTypesList = testGen.TOSA_OP_LIST[opName]["data_gen"]["fp"] else: diff --git a/verif/generator/tosa_test_gen.py b/verif/generator/tosa_test_gen.py index 8beb2ae..8fcea29 100644 --- a/verif/generator/tosa_test_gen.py +++ b/verif/generator/tosa_test_gen.py @@ -9,6 +9,7 @@ from pathlib import Path import generator.tosa_utils as gtu import numpy as np import serializer.tosa_serializer as ts +from generator.datagenerator import GenerateLibrary from generator.tosa_arg_gen import TosaArgGen from generator.tosa_arg_gen import TosaQuantGen from generator.tosa_arg_gen import TosaTensorGen @@ -55,6 +56,11 @@ class TosaTestGen: self.random_fp_high = max(args.tensor_fp_value_range) # JSON schema validation self.descSchemaValidator = TestDescSchemaValidator() + # Data generator library when not generating the data later + if not args.lazy_data_gen: + self.dgl = GenerateLibrary(args.generate_lib_path) + else: + self.dgl = None def createSerializer(self, opName, testPath): self.testPath = os.path.join(opName, testPath) @@ -92,15 +98,21 @@ class TosaTestGen: self.descSchemaValidator.validate_config(desc) if metaData: - if self.args.lazy_data_gen and "data_gen" in metaData: - # Output datagen meta data as CPP data - path_md = path / f"{testName}_meta_data_gen.cpp" - with path_md.open("w") as fd: - fd.write(TOSA_AUTOGENERATED_HEADER) - fd.write("// Test meta data for data generation setup\n\n") - fd.write(f'const char* json_tdg_config_{path.stem} = R"(') - json.dump(metaData["data_gen"], fd) - fd.write(')";\n\n') + if "data_gen" in metaData: + if self.args.lazy_data_gen: + # Output datagen meta data as CPP data + path_md = path / f"{testName}_meta_data_gen.cpp" + with path_md.open("w") as fd: + fd.write(TOSA_AUTOGENERATED_HEADER) + fd.write("// Test meta data for data generation setup\n\n") + fd.write(f'const char* json_tdg_config_{path.stem} = R"(') + json.dump(metaData["data_gen"], fd) + fd.write(')";\n\n') + else: + # Generate the data + self.dgl.set_config(desc) + self.dgl.write_numpy_files(path) + if "compliance" in metaData: # Output datagen meta data as CPP data path_md = path / f"{testName}_meta_compliance.cpp" @@ -282,8 +294,8 @@ class TosaTestGen: ) def tensorComplianceMetaData(self, op, argsDict, outputTensor, errorName): - if errorName or not gtu.dtypeIsFloat(outputTensor.dtype): - # No compliance for error tests or integer tests currently + if errorName or not gtu.dtypeIsSupportedByCompliance(outputTensor.dtype): + # No compliance for error tests or other data types currently return None # Create compliance meta data for expected output tensor @@ -1099,9 +1111,12 @@ class TosaTestGen: self.ser.addOperator(op["op"], input_list, output_list, attr) - compliance = self.tensorComplianceMetaData( - op, args_dict, result_tensor, error_name - ) + if gtu.dtypeIsSupportedByCompliance(a.dtype): + compliance = self.tensorComplianceMetaData( + op, args_dict, result_tensor, error_name + ) + else: + compliance = None return TosaTestGen.BuildInfo(result_tensor, compliance) diff --git a/verif/generator/tosa_utils.py b/verif/generator/tosa_utils.py index dddc320..14afaa7 100644 --- a/verif/generator/tosa_utils.py +++ b/verif/generator/tosa_utils.py @@ -55,8 +55,9 @@ class DataGenType(IntEnum): DG_DOT_PRODUCT_OPTIONAL_INFO = ("acc_type", "kernel", "axis") -def dtypeIsFloat(dtype): - return dtype in (DType.FP16, DType.BF16, DType.FP32) +def dtypeIsSupportedByCompliance(dtype): + """Types supported by the new data generation and compliance flow.""" + return dtype in (DType.FP32,) def valueToName(item, value): diff --git a/verif/generator/tosa_verif_build_tests.py b/verif/generator/tosa_verif_build_tests.py index 0d532c0..954c6e9 100644 --- a/verif/generator/tosa_verif_build_tests.py +++ b/verif/generator/tosa_verif_build_tests.py @@ -3,7 +3,9 @@ import argparse import re import sys +from pathlib import Path +import conformance.model_files as cmf from generator.tosa_test_gen import TosaTestGen from serializer.tosa_serializer import dtype_str_to_val from serializer.tosa_serializer import DTypeNames @@ -86,6 +88,13 @@ def parseArgs(argv): help="Tensor data generation is delayed til test running", ) + parser.add_argument( + "--generate-lib-path", + dest="generate_lib_path", + type=Path, + help="Path to TOSA generate library.", + ) + # Constraints on tests tens_group.add_argument( "--tensor-dim-range", @@ -268,6 +277,17 @@ def main(argv=None): args = parseArgs(argv) + if not args.lazy_data_gen: + if args.generate_lib_path is None: + args.generate_lib_path = cmf.find_tosa_file( + cmf.TosaFileType.GENERATE_LIBRARY, Path("reference_model"), False + ) + if not args.generate_lib_path.is_file(): + print( + f"Argument error: Generate library (--generate-lib-path) not found - {str(args.generate_lib_path)}" + ) + exit(2) + ttg = TosaTestGen(args) if args.test_type == "both": -- cgit v1.2.1