diff options
-rw-r--r-- | ethosu/vela/tflite_reader.py | 73 | ||||
-rw-r--r-- | ethosu/vela/vela.py | 502 |
2 files changed, 303 insertions, 272 deletions
diff --git a/ethosu/vela/tflite_reader.py b/ethosu/vela/tflite_reader.py index daea1bf8..b47177f7 100644 --- a/ethosu/vela/tflite_reader.py +++ b/ethosu/vela/tflite_reader.py @@ -16,6 +16,8 @@ # Description: # Functions used to read from a TensorFlow Lite format file. import os.path +import struct +import sys import numpy as np @@ -235,34 +237,49 @@ class TFLiteGraph: with open(filename, "rb") as f: buf = bytearray(f.read()) - model = Model.GetRootAsModel(buf, 0) - - self.buffers = [] - for idx in range(model.BuffersLength()): - self.buffers.append(self.parse_buffer(model.Buffers(idx))) - - self.operator_codes = [] - for idx in range(model.OperatorCodesLength()): - self.operator_codes.append(self.parse_operator_code(model.OperatorCodes(idx))) - - self.subgraphs = [] - for idx in range(model.SubgraphsLength()): - self.subgraphs.append(TFLiteSubgraph(self, model.Subgraphs(idx))) - - self.nng = Graph(self.name, self.batch_size) - for tflite_sg in self.subgraphs: - sg = Subgraph(tflite_sg.name) - sg.original_inputs = tflite_sg.inputs # Preserve the original input order - sg.output_tensors = tflite_sg.outputs - self.nng.subgraphs.append(sg) - - # Preserve the original metadata - for idx in range(model.MetadataLength()): - meta = model.Metadata(idx) - name = meta.Name() - if name is not None: - buf_data = self.buffers[meta.Buffer()] - self.nng.metadata.append((name, buf_data)) + try: + parsing_step = "parsing root" + model = Model.GetRootAsModel(buf, 0) + + parsing_step = "parsing buffers length" + self.buffers = [] + for idx in range(model.BuffersLength()): + parsing_step = f"parsing buffer {idx}" + self.buffers.append(self.parse_buffer(model.Buffers(idx))) + + parsing_step = "parsing operator codes length" + self.operator_codes = [] + for idx in range(model.OperatorCodesLength()): + parsing_step = f"parsing operator code {idx}" + self.operator_codes.append(self.parse_operator_code(model.OperatorCodes(idx))) + + parsing_step = "parsing subgraphs length" + self.subgraphs = [] + for idx in range(model.SubgraphsLength()): + parsing_step = f"parsing subgraph {idx}" + self.subgraphs.append(TFLiteSubgraph(self, model.Subgraphs(idx))) + + self.nng = Graph(self.name, self.batch_size) + for tflite_sg in self.subgraphs: + sg = Subgraph(tflite_sg.name) + sg.original_inputs = tflite_sg.inputs # Preserve the original input order + sg.output_tensors = tflite_sg.outputs + self.nng.subgraphs.append(sg) + + parsing_step = "parsing metadata length" + # Preserve the original metadata + for idx in range(model.MetadataLength()): + parsing_step = f"parsing metadata {idx}" + meta = model.Metadata(idx) + parsing_step = f"parsing metadata name of metadata {idx}" + name = meta.Name() + if name is not None: + parsing_step = f"parsing metadata {idx} ({name})" + buf_data = self.buffers[meta.Buffer()] + self.nng.metadata.append((name, buf_data)) + except (struct.error, TypeError, RuntimeError) as e: + print(f'Error: Invalid tflite file. Got "{e}" while {parsing_step}.') + sys.exit(1) def parse_buffer(self, buf_data): if buf_data.DataLength() == 0: diff --git a/ethosu/vela/vela.py b/ethosu/vela/vela.py index 64a3381a..c9551861 100644 --- a/ethosu/vela/vela.py +++ b/ethosu/vela/vela.py @@ -35,6 +35,7 @@ from ._version import __version__ from .api import API_VERSION from .debug_database import DebugDatabase from .errors import InputFileError +from .errors import VelaError from .nn_graph import PassPlacement from .nn_graph import TensorAllocator from .scheduler import ParetoMetric @@ -221,254 +222,267 @@ def generate_supported_ops(): def main(args=None): - if args is None: - args = sys.argv[1:] - - parser = argparse.ArgumentParser(prog="vela", description="Neural network model compiler for Arm Ethos-U NPUs") - parser.add_argument("--version", action="version", version=__version__) - parser.add_argument( - "--api-version", action="version", version=API_VERSION, help="Displays the version of the external API." - ) - parser.add_argument( - "--supported-ops-report", - action="store_true", - help="Generate the SUPPORTED_OPS.md file in the current working directory and exit", - ) - - # set network nargs to be optional to allow the support-ops-report CLI option to be used standalone - parser.add_argument( - "network", - metavar="NETWORK", - type=str, - default=None, - nargs="?", - help="Filename of the input TensorFlow Lite for Microcontrollers network", - ) - parser.add_argument( - "--output-dir", type=str, default="output", help="Output directory to write files to (default: %(default)s)" - ) - parser.add_argument( - "--enable-debug-db", - action="store_true", - default=None, - help="Enables the calculation and writing of a network debug database to output directory", - ) - parser.add_argument( - "--config", type=str, action="append", help="Vela configuration file(s) in Python ConfigParser .ini file format" - ) - parser.add_argument("--verbose-all", action="store_true", help="Enable all verbose options") - parser.add_argument("--verbose-config", action="store_true", help="Verbose system configuration and memory mode") - parser.add_argument("--verbose-graph", action="store_true", help="Verbose graph rewriter") - parser.add_argument("--verbose-quantization", action="store_true", help="Verbose quantization") - parser.add_argument("--verbose-packing", action="store_true", help="Verbose pass packing") - parser.add_argument("--verbose-tensor-purpose", action="store_true", help="Verbose tensor purpose") - parser.add_argument("--verbose-tensor-format", action="store_true", help="Verbose tensor format") - parser.add_argument("--verbose-schedule", action="store_true", help="Verbose schedule") - parser.add_argument( - "--verbose-pareto-frontier-schedules", - action="store_true", - help="Show all schedules along the pareto frontier of optimisation criteria", - ) - parser.add_argument("--verbose-allocation", action="store_true", help="Verbose tensor allocation") - parser.add_argument( - "--verbose-high-level-command-stream", action="store_true", help="Verbose high level command stream" - ) - parser.add_argument( - "--verbose-register-command-stream", action="store_true", help="Verbose register command stream" - ) - parser.add_argument("--verbose-operators", action="store_true", help="Verbose operator list") - parser.add_argument( - "--show-cpu-operations", action="store_true", help="Show the operations that fall back to the CPU" - ) - parser.add_argument( - "--cache-bias-scale-tensor", - type=ast.literal_eval, - default=True, - choices=[True, False], - help="Controls the caching of the bias & scale tensors in SRAM (default: %(default)s)", - ) - parser.add_argument( - "--cascading", - type=ast.literal_eval, - default=True, - choices=[True, False], - help="Controls the packing of multiple passes into a cascade (default: %(default)s)", - ) - parser.add_argument("--force-block-config", type=str, default="", help="Force a specific block configuration WxHxC") - parser.add_argument("--timing", action="store_true", help="Time the compiler doing operations") - parser.add_argument( - "--accelerator-config", - type=str, - default="ethos-u55-256", - choices=list(architecture_features.Accelerator.member_list()), - help="Accelerator configuration to use (default: %(default)s)", - ) - parser.add_argument( - "--system-config", - type=str, - default=architecture_features.ArchitectureFeatures.DEFAULT_CONFIG, - help="System configuration to select from the Vela configuration file (default: %(default)s)", - ) - parser.add_argument( - "--memory-mode", - type=str, - default=architecture_features.ArchitectureFeatures.DEFAULT_CONFIG, - help="Memory mode to select from the Vela configuration file (default: %(default)s)", - ) - parser.add_argument( - "--tensor-allocator", - default=TensorAllocator.HillClimb, - type=lambda s: TensorAllocator[s], - choices=list(TensorAllocator), - help="Tensor Allocator algorithm (default: %(default)s)", - ) - parser.add_argument( - "--show-subgraph-io-summary", - action="store_true", - help="Shows a summary of all the subgraphs and their inputs and outputs", - ) - parser.add_argument( - "--ifm-streaming", - type=ast.literal_eval, - default=True, - choices=[True, False], - help="Controls scheduler IFM streaming search (default: %(default)s)", - ) - parser.add_argument( - "--block-config-limit", - type=int, - default=16, - help="Limit block config search space, use zero for unlimited (default: %(default)s)", - ) - parser.add_argument( - "--pareto-metric", - default=ParetoMetric.BwCycMem, - type=lambda s: ParetoMetric[s], - choices=list(ParetoMetric), - help="Controls the calculation of the pareto metric (default: %(default)s)", - ) - parser.add_argument( - "--recursion-limit", - type=int, - default=10000, - help="Set the recursion depth limit, may result in RecursionError if too low (default: %(default)s)", - ) - parser.add_argument( - "--max-block-dependency", - type=int, - default=architecture_features.ArchitectureFeatures.MAX_BLOCKDEP, - choices=range(0, architecture_features.ArchitectureFeatures.MAX_BLOCKDEP + 1), - help=( - "Set the maximum value that can be used for the block dependency between npu kernel operations" - " (default: %(default)s)" - ), - ) - parser.add_argument( - "--nhcwb16-between-cascaded-passes", - type=ast.literal_eval, - default=True, - choices=[True, False], - help="Control if NHCWB16 or NHWC should be used in between cascaded passes (default: %(default)s)", - ) - parser.add_argument( - "--weight-estimation-scaling", - type=float, - default=1.0, - help=("Performs an additional scaling of weight compression scale estimate (default: %(default)s)"), - ) - parser.add_argument( - "--cpu-tensor-alignment", - type=int, - default=Tensor.AllocationQuantum, - help=( - "Controls the allocation byte alignment of cpu tensors including Ethos-U Custom operator inputs and outputs" - " (default: %(default)s)" - ), - ) - args = parser.parse_args(args=args) - - # Generate the supported ops report and exit - if args.supported_ops_report: - generate_supported_ops() - return 0 + try: + if args is None: + args = sys.argv[1:] + + parser = argparse.ArgumentParser(prog="vela", description="Neural network model compiler for Arm Ethos-U NPUs") + parser.add_argument("--version", action="version", version=__version__) + parser.add_argument( + "--api-version", action="version", version=API_VERSION, help="Displays the version of the external API." + ) + parser.add_argument( + "--supported-ops-report", + action="store_true", + help="Generate the SUPPORTED_OPS.md file in the current working directory and exit", + ) - if args.network is None: - parser.error("the following argument is required: NETWORK") + # set network nargs to be optional to allow the support-ops-report CLI option to be used standalone + parser.add_argument( + "network", + metavar="NETWORK", + type=str, + default=None, + nargs="?", + help="Filename of the input TensorFlow Lite for Microcontrollers network", + ) + parser.add_argument( + "--output-dir", type=str, default="output", help="Output directory to write files to (default: %(default)s)" + ) + parser.add_argument( + "--enable-debug-db", + action="store_true", + default=None, + help="Enables the calculation and writing of a network debug database to output directory", + ) + parser.add_argument( + "--config", + type=str, + action="append", + help="Vela configuration file(s) in Python ConfigParser .ini file format", + ) + parser.add_argument("--verbose-all", action="store_true", help="Enable all verbose options") + parser.add_argument( + "--verbose-config", action="store_true", help="Verbose system configuration and memory mode" + ) + parser.add_argument("--verbose-graph", action="store_true", help="Verbose graph rewriter") + parser.add_argument("--verbose-quantization", action="store_true", help="Verbose quantization") + parser.add_argument("--verbose-packing", action="store_true", help="Verbose pass packing") + parser.add_argument("--verbose-tensor-purpose", action="store_true", help="Verbose tensor purpose") + parser.add_argument("--verbose-tensor-format", action="store_true", help="Verbose tensor format") + parser.add_argument("--verbose-schedule", action="store_true", help="Verbose schedule") + parser.add_argument( + "--verbose-pareto-frontier-schedules", + action="store_true", + help="Show all schedules along the pareto frontier of optimisation criteria", + ) + parser.add_argument("--verbose-allocation", action="store_true", help="Verbose tensor allocation") + parser.add_argument( + "--verbose-high-level-command-stream", action="store_true", help="Verbose high level command stream" + ) + parser.add_argument( + "--verbose-register-command-stream", action="store_true", help="Verbose register command stream" + ) + parser.add_argument("--verbose-operators", action="store_true", help="Verbose operator list") + parser.add_argument( + "--show-cpu-operations", action="store_true", help="Show the operations that fall back to the CPU" + ) + parser.add_argument( + "--cache-bias-scale-tensor", + type=ast.literal_eval, + default=True, + choices=[True, False], + help="Controls the caching of the bias & scale tensors in SRAM (default: %(default)s)", + ) + parser.add_argument( + "--cascading", + type=ast.literal_eval, + default=True, + choices=[True, False], + help="Controls the packing of multiple passes into a cascade (default: %(default)s)", + ) + parser.add_argument( + "--force-block-config", type=str, default="", help="Force a specific block configuration WxHxC" + ) + parser.add_argument("--timing", action="store_true", help="Time the compiler doing operations") + parser.add_argument( + "--accelerator-config", + type=str, + default="ethos-u55-256", + choices=list(architecture_features.Accelerator.member_list()), + help="Accelerator configuration to use (default: %(default)s)", + ) + parser.add_argument( + "--system-config", + type=str, + default=architecture_features.ArchitectureFeatures.DEFAULT_CONFIG, + help="System configuration to select from the Vela configuration file (default: %(default)s)", + ) + parser.add_argument( + "--memory-mode", + type=str, + default=architecture_features.ArchitectureFeatures.DEFAULT_CONFIG, + help="Memory mode to select from the Vela configuration file (default: %(default)s)", + ) + parser.add_argument( + "--tensor-allocator", + default=TensorAllocator.HillClimb, + type=lambda s: TensorAllocator[s], + choices=list(TensorAllocator), + help="Tensor Allocator algorithm (default: %(default)s)", + ) + parser.add_argument( + "--show-subgraph-io-summary", + action="store_true", + help="Shows a summary of all the subgraphs and their inputs and outputs", + ) + parser.add_argument( + "--ifm-streaming", + type=ast.literal_eval, + default=True, + choices=[True, False], + help="Controls scheduler IFM streaming search (default: %(default)s)", + ) + parser.add_argument( + "--block-config-limit", + type=int, + default=16, + help="Limit block config search space, use zero for unlimited (default: %(default)s)", + ) + parser.add_argument( + "--pareto-metric", + default=ParetoMetric.BwCycMem, + type=lambda s: ParetoMetric[s], + choices=list(ParetoMetric), + help="Controls the calculation of the pareto metric (default: %(default)s)", + ) + parser.add_argument( + "--recursion-limit", + type=int, + default=10000, + help="Set the recursion depth limit, may result in RecursionError if too low (default: %(default)s)", + ) + parser.add_argument( + "--max-block-dependency", + type=int, + default=architecture_features.ArchitectureFeatures.MAX_BLOCKDEP, + choices=range(0, architecture_features.ArchitectureFeatures.MAX_BLOCKDEP + 1), + help=( + "Set the maximum value that can be used for the block dependency between npu kernel operations" + " (default: %(default)s)" + ), + ) + parser.add_argument( + "--nhcwb16-between-cascaded-passes", + type=ast.literal_eval, + default=True, + choices=[True, False], + help="Control if NHCWB16 or NHWC should be used in between cascaded passes (default: %(default)s)", + ) + parser.add_argument( + "--weight-estimation-scaling", + type=float, + default=1.0, + help=("Performs an additional scaling of weight compression scale estimate (default: %(default)s)"), + ) + parser.add_argument( + "--cpu-tensor-alignment", + type=int, + default=Tensor.AllocationQuantum, + help=( + "Controls the allocation byte alignment of cpu tensors including Ethos-U Custom" + " operator inputs and outputs (default: %(default)s)" + ), + ) + args = parser.parse_args(args=args) + + # Generate the supported ops report and exit + if args.supported_ops_report: + generate_supported_ops() + return 0 + + if args.network is None: + parser.error("the following argument is required: NETWORK") + + # check all config files exist because they will be read as a group + if args.config is not None: + for filename in args.config: + if not os.access(filename, os.R_OK): + raise InputFileError(filename, "File not found or is not readable") + + sys.setrecursionlimit(args.recursion_limit) + + if args.force_block_config: + force_block_config = architecture_features.Block.from_string(args.force_block_config) + else: + force_block_config = None + + if args.cpu_tensor_alignment < 16 or args.cpu_tensor_alignment & (args.cpu_tensor_alignment - 1) != 0: + parser.error( + "Invalid argument to --cpu-tensor-alignment = {} (must be greater than or equal to 16 and a power of 2)" + "".format(args.cpu_tensor_alignment) + ) + + if args.system_config == ArchitectureFeatures.DEFAULT_CONFIG: + print(f"Warning: Using {ArchitectureFeatures.DEFAULT_CONFIG} values for system configuration") + + if args.memory_mode == ArchitectureFeatures.DEFAULT_CONFIG: + print(f"Warning: Using {ArchitectureFeatures.DEFAULT_CONFIG} values for memory mode") + + if args.verbose_all: + for v in vars(args): + if v.startswith("verbose") and v != "verbose_all": + setattr(args, v, True) + + arch = architecture_features.ArchitectureFeatures( + vela_config_files=args.config, + system_config=args.system_config, + memory_mode=args.memory_mode, + accelerator_config=args.accelerator_config, + override_block_config=force_block_config, + block_config_limit=args.block_config_limit, + max_blockdep=args.max_block_dependency, + weight_estimation_scaling=args.weight_estimation_scaling, + verbose_config=args.verbose_config, + ) - # check all config files exist because they will be read as a group - if args.config is not None: - for filename in args.config: - if not os.access(filename, os.R_OK): - raise InputFileError(filename, "File not found or is not readable") + compiler_options = compiler_driver.CompilerOptions( + verbose_graph=args.verbose_graph, + verbose_quantization=args.verbose_quantization, + verbose_packing=args.verbose_packing, + verbose_tensor_purpose=args.verbose_tensor_purpose, + verbose_tensor_format=args.verbose_tensor_format, + verbose_allocation=args.verbose_allocation, + verbose_high_level_command_stream=args.verbose_high_level_command_stream, + verbose_register_command_stream=args.verbose_register_command_stream, + verbose_operators=args.verbose_operators, + show_cpu_operations=args.show_cpu_operations, + tensor_allocator=args.tensor_allocator, + timing=args.timing, + output_dir=args.output_dir, + cpu_tensor_alignment=args.cpu_tensor_alignment, + ) - sys.setrecursionlimit(args.recursion_limit) + scheduler_options = scheduler.SchedulerOptions( + use_cascading=args.cascading, + verbose_schedule=args.verbose_schedule, + verbose_pareto_frontier_schedules=args.verbose_pareto_frontier_schedules, + use_ifm_streaming=args.ifm_streaming, + pareto_metric=args.pareto_metric, + use_nhcwb16_between_cascaded_passes=args.nhcwb16_between_cascaded_passes, + cache_bias_scale_tensor=args.cache_bias_scale_tensor, + ) - if args.force_block_config: - force_block_config = architecture_features.Block.from_string(args.force_block_config) - else: - force_block_config = None + model_reader_options = model_reader.ModelReaderOptions() - if args.cpu_tensor_alignment < 16 or args.cpu_tensor_alignment & (args.cpu_tensor_alignment - 1) != 0: - parser.error( - "Invalid argument to --cpu-tensor-alignment = {} (must be greater than or equal to 16 and a power of 2)" - "".format(args.cpu_tensor_alignment) + nng = process( + args.network, args.enable_debug_db, arch, model_reader_options, compiler_options, scheduler_options ) - if args.system_config == ArchitectureFeatures.DEFAULT_CONFIG: - print(f"Warning: Using {ArchitectureFeatures.DEFAULT_CONFIG} values for system configuration") - - if args.memory_mode == ArchitectureFeatures.DEFAULT_CONFIG: - print(f"Warning: Using {ArchitectureFeatures.DEFAULT_CONFIG} values for memory mode") - - if args.verbose_all: - for v in vars(args): - if v.startswith("verbose") and v != "verbose_all": - setattr(args, v, True) - - arch = architecture_features.ArchitectureFeatures( - vela_config_files=args.config, - system_config=args.system_config, - memory_mode=args.memory_mode, - accelerator_config=args.accelerator_config, - override_block_config=force_block_config, - block_config_limit=args.block_config_limit, - max_blockdep=args.max_block_dependency, - weight_estimation_scaling=args.weight_estimation_scaling, - verbose_config=args.verbose_config, - ) - - compiler_options = compiler_driver.CompilerOptions( - verbose_graph=args.verbose_graph, - verbose_quantization=args.verbose_quantization, - verbose_packing=args.verbose_packing, - verbose_tensor_purpose=args.verbose_tensor_purpose, - verbose_tensor_format=args.verbose_tensor_format, - verbose_allocation=args.verbose_allocation, - verbose_high_level_command_stream=args.verbose_high_level_command_stream, - verbose_register_command_stream=args.verbose_register_command_stream, - verbose_operators=args.verbose_operators, - show_cpu_operations=args.show_cpu_operations, - tensor_allocator=args.tensor_allocator, - timing=args.timing, - output_dir=args.output_dir, - cpu_tensor_alignment=args.cpu_tensor_alignment, - ) - - scheduler_options = scheduler.SchedulerOptions( - use_cascading=args.cascading, - verbose_schedule=args.verbose_schedule, - verbose_pareto_frontier_schedules=args.verbose_pareto_frontier_schedules, - use_ifm_streaming=args.ifm_streaming, - pareto_metric=args.pareto_metric, - use_nhcwb16_between_cascaded_passes=args.nhcwb16_between_cascaded_passes, - cache_bias_scale_tensor=args.cache_bias_scale_tensor, - ) - - model_reader_options = model_reader.ModelReaderOptions() - - nng = process(args.network, args.enable_debug_db, arch, model_reader_options, compiler_options, scheduler_options) - - if args.show_subgraph_io_summary: - print_subgraph_io_summary(nng) - - return 0 + if args.show_subgraph_io_summary: + print_subgraph_io_summary(nng) + + return 0 + except VelaError as e: + print(e.data) + return 1 |