aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ethosu/vela/tflite_reader.py73
-rw-r--r--ethosu/vela/vela.py502
2 files changed, 303 insertions, 272 deletions
diff --git a/ethosu/vela/tflite_reader.py b/ethosu/vela/tflite_reader.py
index daea1bf8..b47177f7 100644
--- a/ethosu/vela/tflite_reader.py
+++ b/ethosu/vela/tflite_reader.py
@@ -16,6 +16,8 @@
# Description:
# Functions used to read from a TensorFlow Lite format file.
import os.path
+import struct
+import sys
import numpy as np
@@ -235,34 +237,49 @@ class TFLiteGraph:
with open(filename, "rb") as f:
buf = bytearray(f.read())
- model = Model.GetRootAsModel(buf, 0)
-
- self.buffers = []
- for idx in range(model.BuffersLength()):
- self.buffers.append(self.parse_buffer(model.Buffers(idx)))
-
- self.operator_codes = []
- for idx in range(model.OperatorCodesLength()):
- self.operator_codes.append(self.parse_operator_code(model.OperatorCodes(idx)))
-
- self.subgraphs = []
- for idx in range(model.SubgraphsLength()):
- self.subgraphs.append(TFLiteSubgraph(self, model.Subgraphs(idx)))
-
- self.nng = Graph(self.name, self.batch_size)
- for tflite_sg in self.subgraphs:
- sg = Subgraph(tflite_sg.name)
- sg.original_inputs = tflite_sg.inputs # Preserve the original input order
- sg.output_tensors = tflite_sg.outputs
- self.nng.subgraphs.append(sg)
-
- # Preserve the original metadata
- for idx in range(model.MetadataLength()):
- meta = model.Metadata(idx)
- name = meta.Name()
- if name is not None:
- buf_data = self.buffers[meta.Buffer()]
- self.nng.metadata.append((name, buf_data))
+ try:
+ parsing_step = "parsing root"
+ model = Model.GetRootAsModel(buf, 0)
+
+ parsing_step = "parsing buffers length"
+ self.buffers = []
+ for idx in range(model.BuffersLength()):
+ parsing_step = f"parsing buffer {idx}"
+ self.buffers.append(self.parse_buffer(model.Buffers(idx)))
+
+ parsing_step = "parsing operator codes length"
+ self.operator_codes = []
+ for idx in range(model.OperatorCodesLength()):
+ parsing_step = f"parsing operator code {idx}"
+ self.operator_codes.append(self.parse_operator_code(model.OperatorCodes(idx)))
+
+ parsing_step = "parsing subgraphs length"
+ self.subgraphs = []
+ for idx in range(model.SubgraphsLength()):
+ parsing_step = f"parsing subgraph {idx}"
+ self.subgraphs.append(TFLiteSubgraph(self, model.Subgraphs(idx)))
+
+ self.nng = Graph(self.name, self.batch_size)
+ for tflite_sg in self.subgraphs:
+ sg = Subgraph(tflite_sg.name)
+ sg.original_inputs = tflite_sg.inputs # Preserve the original input order
+ sg.output_tensors = tflite_sg.outputs
+ self.nng.subgraphs.append(sg)
+
+ parsing_step = "parsing metadata length"
+ # Preserve the original metadata
+ for idx in range(model.MetadataLength()):
+ parsing_step = f"parsing metadata {idx}"
+ meta = model.Metadata(idx)
+ parsing_step = f"parsing metadata name of metadata {idx}"
+ name = meta.Name()
+ if name is not None:
+ parsing_step = f"parsing metadata {idx} ({name})"
+ buf_data = self.buffers[meta.Buffer()]
+ self.nng.metadata.append((name, buf_data))
+ except (struct.error, TypeError, RuntimeError) as e:
+ print(f'Error: Invalid tflite file. Got "{e}" while {parsing_step}.')
+ sys.exit(1)
def parse_buffer(self, buf_data):
if buf_data.DataLength() == 0:
diff --git a/ethosu/vela/vela.py b/ethosu/vela/vela.py
index 64a3381a..c9551861 100644
--- a/ethosu/vela/vela.py
+++ b/ethosu/vela/vela.py
@@ -35,6 +35,7 @@ from ._version import __version__
from .api import API_VERSION
from .debug_database import DebugDatabase
from .errors import InputFileError
+from .errors import VelaError
from .nn_graph import PassPlacement
from .nn_graph import TensorAllocator
from .scheduler import ParetoMetric
@@ -221,254 +222,267 @@ def generate_supported_ops():
def main(args=None):
- if args is None:
- args = sys.argv[1:]
-
- parser = argparse.ArgumentParser(prog="vela", description="Neural network model compiler for Arm Ethos-U NPUs")
- parser.add_argument("--version", action="version", version=__version__)
- parser.add_argument(
- "--api-version", action="version", version=API_VERSION, help="Displays the version of the external API."
- )
- parser.add_argument(
- "--supported-ops-report",
- action="store_true",
- help="Generate the SUPPORTED_OPS.md file in the current working directory and exit",
- )
-
- # set network nargs to be optional to allow the support-ops-report CLI option to be used standalone
- parser.add_argument(
- "network",
- metavar="NETWORK",
- type=str,
- default=None,
- nargs="?",
- help="Filename of the input TensorFlow Lite for Microcontrollers network",
- )
- parser.add_argument(
- "--output-dir", type=str, default="output", help="Output directory to write files to (default: %(default)s)"
- )
- parser.add_argument(
- "--enable-debug-db",
- action="store_true",
- default=None,
- help="Enables the calculation and writing of a network debug database to output directory",
- )
- parser.add_argument(
- "--config", type=str, action="append", help="Vela configuration file(s) in Python ConfigParser .ini file format"
- )
- parser.add_argument("--verbose-all", action="store_true", help="Enable all verbose options")
- parser.add_argument("--verbose-config", action="store_true", help="Verbose system configuration and memory mode")
- parser.add_argument("--verbose-graph", action="store_true", help="Verbose graph rewriter")
- parser.add_argument("--verbose-quantization", action="store_true", help="Verbose quantization")
- parser.add_argument("--verbose-packing", action="store_true", help="Verbose pass packing")
- parser.add_argument("--verbose-tensor-purpose", action="store_true", help="Verbose tensor purpose")
- parser.add_argument("--verbose-tensor-format", action="store_true", help="Verbose tensor format")
- parser.add_argument("--verbose-schedule", action="store_true", help="Verbose schedule")
- parser.add_argument(
- "--verbose-pareto-frontier-schedules",
- action="store_true",
- help="Show all schedules along the pareto frontier of optimisation criteria",
- )
- parser.add_argument("--verbose-allocation", action="store_true", help="Verbose tensor allocation")
- parser.add_argument(
- "--verbose-high-level-command-stream", action="store_true", help="Verbose high level command stream"
- )
- parser.add_argument(
- "--verbose-register-command-stream", action="store_true", help="Verbose register command stream"
- )
- parser.add_argument("--verbose-operators", action="store_true", help="Verbose operator list")
- parser.add_argument(
- "--show-cpu-operations", action="store_true", help="Show the operations that fall back to the CPU"
- )
- parser.add_argument(
- "--cache-bias-scale-tensor",
- type=ast.literal_eval,
- default=True,
- choices=[True, False],
- help="Controls the caching of the bias & scale tensors in SRAM (default: %(default)s)",
- )
- parser.add_argument(
- "--cascading",
- type=ast.literal_eval,
- default=True,
- choices=[True, False],
- help="Controls the packing of multiple passes into a cascade (default: %(default)s)",
- )
- parser.add_argument("--force-block-config", type=str, default="", help="Force a specific block configuration WxHxC")
- parser.add_argument("--timing", action="store_true", help="Time the compiler doing operations")
- parser.add_argument(
- "--accelerator-config",
- type=str,
- default="ethos-u55-256",
- choices=list(architecture_features.Accelerator.member_list()),
- help="Accelerator configuration to use (default: %(default)s)",
- )
- parser.add_argument(
- "--system-config",
- type=str,
- default=architecture_features.ArchitectureFeatures.DEFAULT_CONFIG,
- help="System configuration to select from the Vela configuration file (default: %(default)s)",
- )
- parser.add_argument(
- "--memory-mode",
- type=str,
- default=architecture_features.ArchitectureFeatures.DEFAULT_CONFIG,
- help="Memory mode to select from the Vela configuration file (default: %(default)s)",
- )
- parser.add_argument(
- "--tensor-allocator",
- default=TensorAllocator.HillClimb,
- type=lambda s: TensorAllocator[s],
- choices=list(TensorAllocator),
- help="Tensor Allocator algorithm (default: %(default)s)",
- )
- parser.add_argument(
- "--show-subgraph-io-summary",
- action="store_true",
- help="Shows a summary of all the subgraphs and their inputs and outputs",
- )
- parser.add_argument(
- "--ifm-streaming",
- type=ast.literal_eval,
- default=True,
- choices=[True, False],
- help="Controls scheduler IFM streaming search (default: %(default)s)",
- )
- parser.add_argument(
- "--block-config-limit",
- type=int,
- default=16,
- help="Limit block config search space, use zero for unlimited (default: %(default)s)",
- )
- parser.add_argument(
- "--pareto-metric",
- default=ParetoMetric.BwCycMem,
- type=lambda s: ParetoMetric[s],
- choices=list(ParetoMetric),
- help="Controls the calculation of the pareto metric (default: %(default)s)",
- )
- parser.add_argument(
- "--recursion-limit",
- type=int,
- default=10000,
- help="Set the recursion depth limit, may result in RecursionError if too low (default: %(default)s)",
- )
- parser.add_argument(
- "--max-block-dependency",
- type=int,
- default=architecture_features.ArchitectureFeatures.MAX_BLOCKDEP,
- choices=range(0, architecture_features.ArchitectureFeatures.MAX_BLOCKDEP + 1),
- help=(
- "Set the maximum value that can be used for the block dependency between npu kernel operations"
- " (default: %(default)s)"
- ),
- )
- parser.add_argument(
- "--nhcwb16-between-cascaded-passes",
- type=ast.literal_eval,
- default=True,
- choices=[True, False],
- help="Control if NHCWB16 or NHWC should be used in between cascaded passes (default: %(default)s)",
- )
- parser.add_argument(
- "--weight-estimation-scaling",
- type=float,
- default=1.0,
- help=("Performs an additional scaling of weight compression scale estimate (default: %(default)s)"),
- )
- parser.add_argument(
- "--cpu-tensor-alignment",
- type=int,
- default=Tensor.AllocationQuantum,
- help=(
- "Controls the allocation byte alignment of cpu tensors including Ethos-U Custom operator inputs and outputs"
- " (default: %(default)s)"
- ),
- )
- args = parser.parse_args(args=args)
-
- # Generate the supported ops report and exit
- if args.supported_ops_report:
- generate_supported_ops()
- return 0
+ try:
+ if args is None:
+ args = sys.argv[1:]
+
+ parser = argparse.ArgumentParser(prog="vela", description="Neural network model compiler for Arm Ethos-U NPUs")
+ parser.add_argument("--version", action="version", version=__version__)
+ parser.add_argument(
+ "--api-version", action="version", version=API_VERSION, help="Displays the version of the external API."
+ )
+ parser.add_argument(
+ "--supported-ops-report",
+ action="store_true",
+ help="Generate the SUPPORTED_OPS.md file in the current working directory and exit",
+ )
- if args.network is None:
- parser.error("the following argument is required: NETWORK")
+ # set network nargs to be optional to allow the support-ops-report CLI option to be used standalone
+ parser.add_argument(
+ "network",
+ metavar="NETWORK",
+ type=str,
+ default=None,
+ nargs="?",
+ help="Filename of the input TensorFlow Lite for Microcontrollers network",
+ )
+ parser.add_argument(
+ "--output-dir", type=str, default="output", help="Output directory to write files to (default: %(default)s)"
+ )
+ parser.add_argument(
+ "--enable-debug-db",
+ action="store_true",
+ default=None,
+ help="Enables the calculation and writing of a network debug database to output directory",
+ )
+ parser.add_argument(
+ "--config",
+ type=str,
+ action="append",
+ help="Vela configuration file(s) in Python ConfigParser .ini file format",
+ )
+ parser.add_argument("--verbose-all", action="store_true", help="Enable all verbose options")
+ parser.add_argument(
+ "--verbose-config", action="store_true", help="Verbose system configuration and memory mode"
+ )
+ parser.add_argument("--verbose-graph", action="store_true", help="Verbose graph rewriter")
+ parser.add_argument("--verbose-quantization", action="store_true", help="Verbose quantization")
+ parser.add_argument("--verbose-packing", action="store_true", help="Verbose pass packing")
+ parser.add_argument("--verbose-tensor-purpose", action="store_true", help="Verbose tensor purpose")
+ parser.add_argument("--verbose-tensor-format", action="store_true", help="Verbose tensor format")
+ parser.add_argument("--verbose-schedule", action="store_true", help="Verbose schedule")
+ parser.add_argument(
+ "--verbose-pareto-frontier-schedules",
+ action="store_true",
+ help="Show all schedules along the pareto frontier of optimisation criteria",
+ )
+ parser.add_argument("--verbose-allocation", action="store_true", help="Verbose tensor allocation")
+ parser.add_argument(
+ "--verbose-high-level-command-stream", action="store_true", help="Verbose high level command stream"
+ )
+ parser.add_argument(
+ "--verbose-register-command-stream", action="store_true", help="Verbose register command stream"
+ )
+ parser.add_argument("--verbose-operators", action="store_true", help="Verbose operator list")
+ parser.add_argument(
+ "--show-cpu-operations", action="store_true", help="Show the operations that fall back to the CPU"
+ )
+ parser.add_argument(
+ "--cache-bias-scale-tensor",
+ type=ast.literal_eval,
+ default=True,
+ choices=[True, False],
+ help="Controls the caching of the bias & scale tensors in SRAM (default: %(default)s)",
+ )
+ parser.add_argument(
+ "--cascading",
+ type=ast.literal_eval,
+ default=True,
+ choices=[True, False],
+ help="Controls the packing of multiple passes into a cascade (default: %(default)s)",
+ )
+ parser.add_argument(
+ "--force-block-config", type=str, default="", help="Force a specific block configuration WxHxC"
+ )
+ parser.add_argument("--timing", action="store_true", help="Time the compiler doing operations")
+ parser.add_argument(
+ "--accelerator-config",
+ type=str,
+ default="ethos-u55-256",
+ choices=list(architecture_features.Accelerator.member_list()),
+ help="Accelerator configuration to use (default: %(default)s)",
+ )
+ parser.add_argument(
+ "--system-config",
+ type=str,
+ default=architecture_features.ArchitectureFeatures.DEFAULT_CONFIG,
+ help="System configuration to select from the Vela configuration file (default: %(default)s)",
+ )
+ parser.add_argument(
+ "--memory-mode",
+ type=str,
+ default=architecture_features.ArchitectureFeatures.DEFAULT_CONFIG,
+ help="Memory mode to select from the Vela configuration file (default: %(default)s)",
+ )
+ parser.add_argument(
+ "--tensor-allocator",
+ default=TensorAllocator.HillClimb,
+ type=lambda s: TensorAllocator[s],
+ choices=list(TensorAllocator),
+ help="Tensor Allocator algorithm (default: %(default)s)",
+ )
+ parser.add_argument(
+ "--show-subgraph-io-summary",
+ action="store_true",
+ help="Shows a summary of all the subgraphs and their inputs and outputs",
+ )
+ parser.add_argument(
+ "--ifm-streaming",
+ type=ast.literal_eval,
+ default=True,
+ choices=[True, False],
+ help="Controls scheduler IFM streaming search (default: %(default)s)",
+ )
+ parser.add_argument(
+ "--block-config-limit",
+ type=int,
+ default=16,
+ help="Limit block config search space, use zero for unlimited (default: %(default)s)",
+ )
+ parser.add_argument(
+ "--pareto-metric",
+ default=ParetoMetric.BwCycMem,
+ type=lambda s: ParetoMetric[s],
+ choices=list(ParetoMetric),
+ help="Controls the calculation of the pareto metric (default: %(default)s)",
+ )
+ parser.add_argument(
+ "--recursion-limit",
+ type=int,
+ default=10000,
+ help="Set the recursion depth limit, may result in RecursionError if too low (default: %(default)s)",
+ )
+ parser.add_argument(
+ "--max-block-dependency",
+ type=int,
+ default=architecture_features.ArchitectureFeatures.MAX_BLOCKDEP,
+ choices=range(0, architecture_features.ArchitectureFeatures.MAX_BLOCKDEP + 1),
+ help=(
+ "Set the maximum value that can be used for the block dependency between npu kernel operations"
+ " (default: %(default)s)"
+ ),
+ )
+ parser.add_argument(
+ "--nhcwb16-between-cascaded-passes",
+ type=ast.literal_eval,
+ default=True,
+ choices=[True, False],
+ help="Control if NHCWB16 or NHWC should be used in between cascaded passes (default: %(default)s)",
+ )
+ parser.add_argument(
+ "--weight-estimation-scaling",
+ type=float,
+ default=1.0,
+ help=("Performs an additional scaling of weight compression scale estimate (default: %(default)s)"),
+ )
+ parser.add_argument(
+ "--cpu-tensor-alignment",
+ type=int,
+ default=Tensor.AllocationQuantum,
+ help=(
+ "Controls the allocation byte alignment of cpu tensors including Ethos-U Custom"
+ " operator inputs and outputs (default: %(default)s)"
+ ),
+ )
+ args = parser.parse_args(args=args)
+
+ # Generate the supported ops report and exit
+ if args.supported_ops_report:
+ generate_supported_ops()
+ return 0
+
+ if args.network is None:
+ parser.error("the following argument is required: NETWORK")
+
+ # check all config files exist because they will be read as a group
+ if args.config is not None:
+ for filename in args.config:
+ if not os.access(filename, os.R_OK):
+ raise InputFileError(filename, "File not found or is not readable")
+
+ sys.setrecursionlimit(args.recursion_limit)
+
+ if args.force_block_config:
+ force_block_config = architecture_features.Block.from_string(args.force_block_config)
+ else:
+ force_block_config = None
+
+ if args.cpu_tensor_alignment < 16 or args.cpu_tensor_alignment & (args.cpu_tensor_alignment - 1) != 0:
+ parser.error(
+ "Invalid argument to --cpu-tensor-alignment = {} (must be greater than or equal to 16 and a power of 2)"
+ "".format(args.cpu_tensor_alignment)
+ )
+
+ if args.system_config == ArchitectureFeatures.DEFAULT_CONFIG:
+ print(f"Warning: Using {ArchitectureFeatures.DEFAULT_CONFIG} values for system configuration")
+
+ if args.memory_mode == ArchitectureFeatures.DEFAULT_CONFIG:
+ print(f"Warning: Using {ArchitectureFeatures.DEFAULT_CONFIG} values for memory mode")
+
+ if args.verbose_all:
+ for v in vars(args):
+ if v.startswith("verbose") and v != "verbose_all":
+ setattr(args, v, True)
+
+ arch = architecture_features.ArchitectureFeatures(
+ vela_config_files=args.config,
+ system_config=args.system_config,
+ memory_mode=args.memory_mode,
+ accelerator_config=args.accelerator_config,
+ override_block_config=force_block_config,
+ block_config_limit=args.block_config_limit,
+ max_blockdep=args.max_block_dependency,
+ weight_estimation_scaling=args.weight_estimation_scaling,
+ verbose_config=args.verbose_config,
+ )
- # check all config files exist because they will be read as a group
- if args.config is not None:
- for filename in args.config:
- if not os.access(filename, os.R_OK):
- raise InputFileError(filename, "File not found or is not readable")
+ compiler_options = compiler_driver.CompilerOptions(
+ verbose_graph=args.verbose_graph,
+ verbose_quantization=args.verbose_quantization,
+ verbose_packing=args.verbose_packing,
+ verbose_tensor_purpose=args.verbose_tensor_purpose,
+ verbose_tensor_format=args.verbose_tensor_format,
+ verbose_allocation=args.verbose_allocation,
+ verbose_high_level_command_stream=args.verbose_high_level_command_stream,
+ verbose_register_command_stream=args.verbose_register_command_stream,
+ verbose_operators=args.verbose_operators,
+ show_cpu_operations=args.show_cpu_operations,
+ tensor_allocator=args.tensor_allocator,
+ timing=args.timing,
+ output_dir=args.output_dir,
+ cpu_tensor_alignment=args.cpu_tensor_alignment,
+ )
- sys.setrecursionlimit(args.recursion_limit)
+ scheduler_options = scheduler.SchedulerOptions(
+ use_cascading=args.cascading,
+ verbose_schedule=args.verbose_schedule,
+ verbose_pareto_frontier_schedules=args.verbose_pareto_frontier_schedules,
+ use_ifm_streaming=args.ifm_streaming,
+ pareto_metric=args.pareto_metric,
+ use_nhcwb16_between_cascaded_passes=args.nhcwb16_between_cascaded_passes,
+ cache_bias_scale_tensor=args.cache_bias_scale_tensor,
+ )
- if args.force_block_config:
- force_block_config = architecture_features.Block.from_string(args.force_block_config)
- else:
- force_block_config = None
+ model_reader_options = model_reader.ModelReaderOptions()
- if args.cpu_tensor_alignment < 16 or args.cpu_tensor_alignment & (args.cpu_tensor_alignment - 1) != 0:
- parser.error(
- "Invalid argument to --cpu-tensor-alignment = {} (must be greater than or equal to 16 and a power of 2)"
- "".format(args.cpu_tensor_alignment)
+ nng = process(
+ args.network, args.enable_debug_db, arch, model_reader_options, compiler_options, scheduler_options
)
- if args.system_config == ArchitectureFeatures.DEFAULT_CONFIG:
- print(f"Warning: Using {ArchitectureFeatures.DEFAULT_CONFIG} values for system configuration")
-
- if args.memory_mode == ArchitectureFeatures.DEFAULT_CONFIG:
- print(f"Warning: Using {ArchitectureFeatures.DEFAULT_CONFIG} values for memory mode")
-
- if args.verbose_all:
- for v in vars(args):
- if v.startswith("verbose") and v != "verbose_all":
- setattr(args, v, True)
-
- arch = architecture_features.ArchitectureFeatures(
- vela_config_files=args.config,
- system_config=args.system_config,
- memory_mode=args.memory_mode,
- accelerator_config=args.accelerator_config,
- override_block_config=force_block_config,
- block_config_limit=args.block_config_limit,
- max_blockdep=args.max_block_dependency,
- weight_estimation_scaling=args.weight_estimation_scaling,
- verbose_config=args.verbose_config,
- )
-
- compiler_options = compiler_driver.CompilerOptions(
- verbose_graph=args.verbose_graph,
- verbose_quantization=args.verbose_quantization,
- verbose_packing=args.verbose_packing,
- verbose_tensor_purpose=args.verbose_tensor_purpose,
- verbose_tensor_format=args.verbose_tensor_format,
- verbose_allocation=args.verbose_allocation,
- verbose_high_level_command_stream=args.verbose_high_level_command_stream,
- verbose_register_command_stream=args.verbose_register_command_stream,
- verbose_operators=args.verbose_operators,
- show_cpu_operations=args.show_cpu_operations,
- tensor_allocator=args.tensor_allocator,
- timing=args.timing,
- output_dir=args.output_dir,
- cpu_tensor_alignment=args.cpu_tensor_alignment,
- )
-
- scheduler_options = scheduler.SchedulerOptions(
- use_cascading=args.cascading,
- verbose_schedule=args.verbose_schedule,
- verbose_pareto_frontier_schedules=args.verbose_pareto_frontier_schedules,
- use_ifm_streaming=args.ifm_streaming,
- pareto_metric=args.pareto_metric,
- use_nhcwb16_between_cascaded_passes=args.nhcwb16_between_cascaded_passes,
- cache_bias_scale_tensor=args.cache_bias_scale_tensor,
- )
-
- model_reader_options = model_reader.ModelReaderOptions()
-
- nng = process(args.network, args.enable_debug_db, arch, model_reader_options, compiler_options, scheduler_options)
-
- if args.show_subgraph_io_summary:
- print_subgraph_io_summary(nng)
-
- return 0
+ if args.show_subgraph_io_summary:
+ print_subgraph_io_summary(nng)
+
+ return 0
+ except VelaError as e:
+ print(e.data)
+ return 1