# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved. # # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the License); you may # not use this file except in compliance with the License. # You may obtain a copy of the License at # # www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an AS IS BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Description: # Main entry point for the Vela compiler. # # Provides command line interface, options parsing, and network loading. Before calling the compiler driver. import argparse import ast import configparser import os.path import sys import time from . import architecture_features from . import compiler_driver from . import model_reader from . import scheduler from . import stats_writer from . import tflite_writer from ._version import __version__ from .debug_database import DebugDatabase from .errors import InputFileError from .nn_graph import PassPlacement from .nn_graph import TensorAllocator from .scheduler import ParetoMetric from .tensor import MemArea from .tensor import Tensor def process(input_name, enable_debug_db, arch, model_reader_options, compiler_options, scheduler_options): if compiler_options.timing: start = time.time() os.makedirs(compiler_options.output_dir, exist_ok=True) output_basename = os.path.join(compiler_options.output_dir, os.path.splitext(os.path.basename(input_name))[0]) DebugDatabase.show_warnings = enable_debug_db nng = model_reader.read_model(input_name, model_reader_options) if not nng: raise InputFileError(input_name, "input file could not be read") if compiler_options.verbose_operators: nng.print_operators() if compiler_options.timing: stop = time.time() print("Model reading took %f s" % (stop - start)) start = time.time() compiler_driver.compiler_driver(nng, arch, compiler_options, scheduler_options) passes_csv_file = "{0}_pass-breakdown_{1}.csv".format(output_basename, arch.system_config) stats_writer.write_pass_metrics_csv(nng, passes_csv_file) summary_csv_file = "{0}_summary_{1}.csv".format(output_basename, arch.system_config) stats_writer.write_summary_metrics_csv(nng, summary_csv_file, arch) stats_writer.print_performance_metrics(nng, show_cpu_operations=compiler_options.show_cpu_operations, arch=arch) output_filename = output_basename + "_vela.tflite" if input_name.endswith(".tflite"): tflite_writer.write_tflite(nng, output_filename) if enable_debug_db: debug_filename = output_basename + "_debug.xml" DebugDatabase.write(debug_filename, input_name, output_filename) if compiler_options.timing: stop = time.time() print("Compiler driver took %f s" % (stop - start)) return nng def print_subgraph_io_summary(nng): """Print a summary of all the input and output tensor sizes for all subgraphs. Also displays the total tensor size and the memory used area for sram. """ print("Subgraph IO Summary") print("-------------------") print("NNG: {0}".format(nng.name)) max_sg_size = 0 for sg in reversed(nng.subgraphs): print(" Subgraph: {0} = {1}".format(sg.name, sg.placement)) sg_size = 0 if sg.placement == PassPlacement.Npu: for tens in sg.input_tensors + [sg.scratch_tensor] + sg.output_tensors: if tens in sg.input_tensors: tens_dir = "In" elif tens in sg.output_tensors: tens_dir = "Out" else: tens_dir = "In/Out" size = tens.elements() * tens.element_size() / 1024.0 sg_size = sg_size + size print(" Tensor [{0}]: {1} = {2} KiB".format(tens_dir, tens.name, size)) print(" Total Size = {0} KiB".format(sg_size)) print(" SRAM Memory Used = {0} KiB".format(sg.memory_used.get(MemArea.Sram, 0) / 1024.0)) max_sg_size = max(sg_size, max_sg_size) print(" Maximum Subgraph Size = {0} KiB".format(max_sg_size)) def main(args=None): if args is None: args = sys.argv[1:] parser = argparse.ArgumentParser(prog="vela", description="Neural network model compiler for Ethos-U55") parser.add_argument( "network", metavar="NETWORK", type=str, default=None, nargs=None, help="Filename of network to process" ) parser.add_argument("--version", action="version", version=__version__) parser.add_argument( "--output-dir", type=str, default="output", help="Output directory to write files to (default: %(default)s)" ) parser.add_argument( "--enable-debug-db", action="store_true", default=None, help="Enables the calculation and writing of a network debug database to output directory", ) parser.add_argument("--config", type=str, help="Location of vela configuration file") parser.add_argument("--verbose-graph", action="store_true", help="Verbose graph rewriter") parser.add_argument("--verbose-quantization", action="store_true", help="Verbose quantization") parser.add_argument("--verbose-packing", action="store_true", help="Verbose pass packing") parser.add_argument("--verbose-tensor-purpose", action="store_true", help="Verbose tensor purpose") parser.add_argument("--verbose-tensor-format", action="store_true", help="Verbose tensor format") parser.add_argument("--verbose-schedule", action="store_true", help="Verbose schedule") parser.add_argument( "--verbose-pareto-frontier-schedules", action="store_true", help="Show all schedules along the pareto frontier of optimisation criteria", ) parser.add_argument("--verbose-allocation", action="store_true", help="Verbose tensor allocation") parser.add_argument( "--verbose-high-level-command-stream", action="store_true", help="Verbose high level command stream" ) parser.add_argument( "--verbose-register-command-stream", action="store_true", help="Verbose register command stream" ) parser.add_argument("--verbose-operators", action="store_true", help="Verbose operator list") parser.add_argument( "--show-minimum-possible-allocation", action="store_true", help="Show the minimum possible allocation" ) parser.add_argument( "--show-cpu-operations", action="store_true", help="Show the operations that fall back to the CPU" ) parser.add_argument( "--keep-scale-placement", action="store_true", help="Keep scale tensors memory placement during scheduling" ) parser.add_argument( "--cascading", type=ast.literal_eval, default=True, choices=[True, False], help="Controls the packing of multiple passes into a cascade (default: %(default)s)", ) parser.add_argument("--force-block-config", type=str, default="", help="Force a specific block configuration HxWxC") parser.add_argument("--timing", action="store_true", help="Time the compiler doing operations") parser.add_argument( "--accelerator-config", type=str, default="ethos-u55-256", choices=list(architecture_features.Accelerator.member_list()), help="Accelerator configuration to use (default: %(default)s)", ) parser.add_argument( "--system-config", type=str, default="internal-default", help="System configuration to use (default: %(default)s)", ) parser.add_argument( "--tensor-allocator", default=TensorAllocator.Greedy, type=lambda s: TensorAllocator[s], choices=list(TensorAllocator), help="Tensor Allocator algorithm (default: %(default)s)", ) parser.add_argument( "--show-subgraph-io-summary", action="store_true", help="Shows a summary of all the subgraphs and their inputs and outputs", ) parser.add_argument( "--ifm-streaming", type=ast.literal_eval, default=True, choices=[True, False], help="Controls scheduler IFM streaming search (default: %(default)s)", ) parser.add_argument( "--block-config-limit", type=int, default=16, help="Limit block config search space, use zero for unlimited (default: %(default)s)", ) parser.add_argument( "--global-memory-clock-scale", type=float, default=1.0, help=( "Performs an additional scaling of the individual memory clock scales specified by the system config " "(default: %(default)s)" ), ) parser.add_argument( "--pareto-metric", default=ParetoMetric.BwCycMem, type=lambda s: ParetoMetric[s], choices=list(ParetoMetric), help="Controls the calculation of the pareto metric (default: %(default)s)", ) parser.add_argument( "--recursion-limit", type=int, default=10000, help="Set the recursion depth limit, may result in RecursionError if too low (default: %(default)s)", ) parser.add_argument( "--max-block-dependency", type=int, default=architecture_features.ArchitectureFeatures.MAX_BLOCKDEP, choices=range(0, architecture_features.ArchitectureFeatures.MAX_BLOCKDEP + 1), help=( "Set the maximum value that can be used for the block dependency between npu kernel operations " "(default: %(default)s)" ), ) parser.add_argument( "--nhcwb16-between-cascaded-passes", type=ast.literal_eval, default=True, choices=[True, False], help="Control if NHCWB16 or NHWC should be used in between cascaded passes (default: %(default)s)", ) parser.add_argument( "--weight-estimation-scaling", type=float, default=1.0, help=("Performs an additional scaling of weight compression scale estimate (default: %(default)s)"), ) parser.add_argument( "--allocation-alignment", type=int, default=Tensor.AllocationQuantum, help=("Controls the allocation byte alignment of cpu tensors (default: %(default)s)"), ) args = parser.parse_args(args=args) # Read configuration file config_file = args.config config = None if config_file is not None: with open(config_file) as f: config = configparser.ConfigParser() config.read_file(f) if args.network is None: parser.error("the following argument is required: NETWORK") sys.setrecursionlimit(args.recursion_limit) if args.force_block_config: force_block_config = architecture_features.Block.from_string(args.force_block_config) else: force_block_config = None alignment = args.allocation_alignment if alignment < 16: parser.error("the following argument needs to be greater or equal to 16: ALLOCATION_ALIGNMENT") if alignment & (alignment - 1) != 0: parser.error("the following argument needs to be a power of 2: ALLOCATION_ALIGNMENT") arch = architecture_features.ArchitectureFeatures( vela_config=config, system_config=args.system_config, accelerator_config=args.accelerator_config, override_block_config=force_block_config, block_config_limit=args.block_config_limit, global_memory_clock_scale=args.global_memory_clock_scale, max_blockdep=args.max_block_dependency, weight_estimation_scaling=args.weight_estimation_scaling, ) compiler_options = compiler_driver.CompilerOptions( verbose_graph=args.verbose_graph, verbose_quantization=args.verbose_quantization, verbose_packing=args.verbose_packing, verbose_tensor_purpose=args.verbose_tensor_purpose, verbose_tensor_format=args.verbose_tensor_format, verbose_allocation=args.verbose_allocation, verbose_high_level_command_stream=args.verbose_high_level_command_stream, verbose_register_command_stream=args.verbose_register_command_stream, verbose_operators=args.verbose_operators, show_minimum_possible_allocation=args.show_minimum_possible_allocation, show_cpu_operations=args.show_cpu_operations, tensor_allocator=args.tensor_allocator, timing=args.timing, output_dir=args.output_dir, allocation_alignment=alignment, ) scheduler_options = scheduler.SchedulerOptions( use_cascading=args.cascading, verbose_schedule=args.verbose_schedule, verbose_pareto_frontier_schedules=args.verbose_pareto_frontier_schedules, use_ifm_streaming=args.ifm_streaming, pareto_metric=args.pareto_metric, use_nhcwb16_between_cascaded_passes=args.nhcwb16_between_cascaded_passes, keep_scale_placement=args.keep_scale_placement, ) model_reader_options = model_reader.ModelReaderOptions() nng = process(args.network, args.enable_debug_db, arch, model_reader_options, compiler_options, scheduler_options) if args.show_subgraph_io_summary: print_subgraph_io_summary(nng) return 0