diff options
author | Dwight Lidman <dwight.lidman@arm.com> | 2021-10-11 16:39:10 +0200 |
---|---|---|
committer | patrik.gustavsson <patrik.gustavsson@arm.com> | 2021-10-13 07:33:27 +0000 |
commit | 62cdfe52b20bcf6817bf1e1b543689cf5f90fdec (patch) | |
tree | 6e8cab02c1dc8c981c19087245c5a0a6ec842630 | |
parent | f53b3326d870e3af92978195271339318695713a (diff) | |
download | ethos-u-vela-62cdfe52b20bcf6817bf1e1b543689cf5f90fdec.tar.gz |
MLBEDSW-5369: Fix crash for multiple CPU subgraphs
This commit fixes an issue where networks with
multiple CPU-placed subgraphs would crash due
to the assumption that the second subgraph is
always placed on NPU.
Signed-off-by: Dwight Lidman <dwight.lidman@arm.com>
Change-Id: Ib7e7a9e89d3b0f3a597cf80446f5eb8b132883a4
-rw-r--r-- | ethosu/vela/compiler_driver.py | 40 |
1 files changed, 20 insertions, 20 deletions
diff --git a/ethosu/vela/compiler_driver.py b/ethosu/vela/compiler_driver.py index cb475398..cf26eb3b 100644 --- a/ethosu/vela/compiler_driver.py +++ b/ethosu/vela/compiler_driver.py @@ -188,18 +188,19 @@ def compiler_driver(nng, arch, options, scheduler_options, network_type): scratch_fast_tens = None flash_tens = None + # Create list of NPU subgraphs with same order as the list of all subgraphs + npu_subgraphs = [sg for sg in nng.subgraphs if sg.placement == PassPlacement.Npu] + # Calculate live ranges for all constant Npu tensors, in permanent storage - for sg in nng.subgraphs: - if sg.placement == PassPlacement.Npu: - lr_graph_flash = live_range.create_linear_live_range_graph( - sg, permanent_storage, MemType.Permanent_NPU, lr_graph=lr_graph_flash, - ) + for sg in npu_subgraphs: + lr_graph_flash = live_range.create_linear_live_range_graph( + sg, permanent_storage, MemType.Permanent_NPU, lr_graph=lr_graph_flash, + ) - if len(nng.subgraphs) > 1: + if npu_subgraphs: # Allocate all Npu constant tensors to the first Npu subgraph since it is # processed first during serialization into tensors - first_npu_sg = nng.subgraphs[1] - assert first_npu_sg.placement == PassPlacement.Npu + first_npu_sg = npu_subgraphs[0] tensor_allocation.allocate_tensors( nng, first_npu_sg, @@ -214,18 +215,17 @@ def compiler_driver(nng, arch, options, scheduler_options, network_type): root_sg = nng.get_root_subgraph() # Generate command streams and serialise Npu-ops into tensors - for sg in nng.subgraphs: - if sg.placement == PassPlacement.Npu: - high_level_command_stream_generator.generate_high_level_command_stream_for_schedule( - nng, sg, arch, options.verbose_high_level_command_stream - ) - lut.optimize_high_level_cmd_stream(sg, arch) - high_level_command_to_npu_op.generate_register_command_stream_for_sg( - nng, sg, arch, options.verbose_register_command_stream - ) - scratch_tens, scratch_fast_tens, flash_tens = npu_serialisation.serialise_npu_subgraph_into_tensors( - sg, arch, scratch_tens, scratch_fast_tens, flash_tens - ) + for sg in npu_subgraphs: + high_level_command_stream_generator.generate_high_level_command_stream_for_schedule( + nng, sg, arch, options.verbose_high_level_command_stream + ) + lut.optimize_high_level_cmd_stream(sg, arch) + high_level_command_to_npu_op.generate_register_command_stream_for_sg( + nng, sg, arch, options.verbose_register_command_stream + ) + scratch_tens, scratch_fast_tens, flash_tens = npu_serialisation.serialise_npu_subgraph_into_tensors( + sg, arch, scratch_tens, scratch_fast_tens, flash_tens + ) npu_serialisation.rewrite_npu_call_ops(root_sg, arch) |