diff options
Diffstat (limited to 'scripts/json2numpy/json2numpy.py')
-rw-r--r-- | scripts/json2numpy/json2numpy.py | 180 |
1 files changed, 180 insertions, 0 deletions
diff --git a/scripts/json2numpy/json2numpy.py b/scripts/json2numpy/json2numpy.py new file mode 100644 index 0000000..21b1acd --- /dev/null +++ b/scripts/json2numpy/json2numpy.py @@ -0,0 +1,180 @@ +"""Conversion utility from binary numpy files to JSON and the reverse.""" +# Copyright (c) 2021-2022, ARM Limited. +# SPDX-License-Identifier: Apache-2.0 +import json +from pathlib import Path +from typing import Optional +from typing import Union + +import numpy as np + + +class NumpyArrayEncoder(json.JSONEncoder): + """A JSON encoder for Numpy data types.""" + + def default(self, obj): + """Encode default operation.""" + if isinstance(obj, np.integer): + return int(obj) + elif isinstance(obj, np.floating): + return float(obj) + elif isinstance(obj, np.ndarray): + return obj.tolist() + return super(NumpyArrayEncoder, self).default(obj) + + +def get_shape(t: Union[list, tuple]): + """Get the shape of an N-Dimensional tensor.""" + # TODO: validate shape is consistent for all rows and ccolumns + if isinstance(t, (list, tuple)) and t: + return [len(t)] + get_shape(t[0]) + return [] + + +def npy_to_json(n_path: Path, j_path: Optional[Path] = None): + """Load a numpy data file and save it as a JSON file. + + n_path: the Path to the numpy file + j_path: the Path to the JSON file, if None, it is derived from n_path + """ + if not j_path: + j_path = n_path.parent / (n_path.stem + ".json") + with open(n_path, "rb") as fd: + data = np.load(fd) + jdata = { + "type": data.dtype.name, + "data": data.tolist(), + } + with open(j_path, "w") as fp: + json.dump(jdata, fp, indent=2) + + +def json_to_npy(j_path: Path, n_path: Optional[Path] = None): + """Load a JSON file and save it as a numpy data file. + + j_path: the Path to the JSON file + n_path: the Path to the numpy file, if None, it is derived from j_path + """ + if not n_path: + n_path = j_path.parent / (j_path.stem + ".npy") + with open(j_path, "rb") as fd: + jdata = json.load(fd) + raw_data = jdata["data"] + raw_type = jdata["type"] + shape = get_shape(raw_data) + data = np.asarray(raw_data).reshape(shape).astype(raw_type) + with open(n_path, "wb") as fd: + np.save(fd, data) + + +# ------------------------------------------------------------------------------ + + +def test(): + """Test conversion routines.""" + shape = [2, 3, 4] + elements = 1 + for i in shape: + elements *= i + + # file names + n_path = Path("data.npy") + j_path = Path("data.json") + j2n_path = Path("data_j2n.npy") + + datatypes = [ + np.bool_, + np.int8, + np.int16, + np.int32, + np.int64, + np.uint8, + np.uint16, + np.uint32, + np.uint64, + np.float16, + np.float32, + np.float64, + # np.float128, + # np.complex64, + # np.complex128, + # np.complex256, + # np.datetime64, + # np.str, + ] + + for data_type in datatypes: + dt = np.dtype(data_type) + print(data_type, dt, dt.char, dt.num, dt.name, dt.str) + + # create a tensor of the given shape + tensor = np.arange(elements).reshape(shape).astype(data_type) + # print(tensor) + + # save the tensor in a binary numpy file + with open(n_path, "wb") as fd: + np.save(fd, tensor) + + # read back the numpy file for verification + with open(n_path, "rb") as fd: + tensor1 = np.load(fd) + + # confirm the loaded tensor matches the original + assert tensor.shape == tensor1.shape + assert tensor.dtype == tensor1.dtype + assert (tensor == tensor1).all() + + # convert the numpy file to json + npy_to_json(n_path, j_path) + + # convert the json file to numpy + json_to_npy(j_path, j2n_path) + + # read back the json-to-numpy file for verification + with open(j2n_path, "rb") as fd: + tensor1 = np.load(fd) + + # confirm the loaded tensor matches the original + assert tensor.shape == tensor1.shape + assert tensor.dtype == tensor1.dtype + assert (tensor == tensor1).all() + + # delete the files, if no problems were found + # they are left for debugging if any of the asserts failed + n_path.unlink() + j_path.unlink() + j2n_path.unlink() + return 0 + + +def main(argv=None): + """Load and convert supplied file based on file suffix.""" + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "path", type=Path, help="the path to the file to convert, or 'test'" + ) + args = parser.parse_args(argv) + path = args.path + if str(path) == "test": + print("test") + return test() + + if not path.is_file(): + print(f"Invalid file - {path}") + return 2 + + if path.suffix == ".npy": + npy_to_json(path) + elif path.suffix == ".json": + json_to_npy(path) + else: + print("Unknown file type - {path.suffix}") + return 2 + + return 0 + + +if __name__ == "__main__": + exit(main()) |