aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDwight Lidman <dwight.lidman@arm.com>2021-10-11 16:39:10 +0200
committerpatrik.gustavsson <patrik.gustavsson@arm.com>2021-10-13 07:33:27 +0000
commit62cdfe52b20bcf6817bf1e1b543689cf5f90fdec (patch)
tree6e8cab02c1dc8c981c19087245c5a0a6ec842630
parentf53b3326d870e3af92978195271339318695713a (diff)
downloadethos-u-vela-62cdfe52b20bcf6817bf1e1b543689cf5f90fdec.tar.gz
MLBEDSW-5369: Fix crash for multiple CPU subgraphs
This commit fixes an issue where networks with multiple CPU-placed subgraphs would crash due to the assumption that the second subgraph is always placed on NPU. Signed-off-by: Dwight Lidman <dwight.lidman@arm.com> Change-Id: Ib7e7a9e89d3b0f3a597cf80446f5eb8b132883a4
-rw-r--r--ethosu/vela/compiler_driver.py40
1 files changed, 20 insertions, 20 deletions
diff --git a/ethosu/vela/compiler_driver.py b/ethosu/vela/compiler_driver.py
index cb47539..cf26eb3 100644
--- a/ethosu/vela/compiler_driver.py
+++ b/ethosu/vela/compiler_driver.py
@@ -188,18 +188,19 @@ def compiler_driver(nng, arch, options, scheduler_options, network_type):
scratch_fast_tens = None
flash_tens = None
+ # Create list of NPU subgraphs with same order as the list of all subgraphs
+ npu_subgraphs = [sg for sg in nng.subgraphs if sg.placement == PassPlacement.Npu]
+
# Calculate live ranges for all constant Npu tensors, in permanent storage
- for sg in nng.subgraphs:
- if sg.placement == PassPlacement.Npu:
- lr_graph_flash = live_range.create_linear_live_range_graph(
- sg, permanent_storage, MemType.Permanent_NPU, lr_graph=lr_graph_flash,
- )
+ for sg in npu_subgraphs:
+ lr_graph_flash = live_range.create_linear_live_range_graph(
+ sg, permanent_storage, MemType.Permanent_NPU, lr_graph=lr_graph_flash,
+ )
- if len(nng.subgraphs) > 1:
+ if npu_subgraphs:
# Allocate all Npu constant tensors to the first Npu subgraph since it is
# processed first during serialization into tensors
- first_npu_sg = nng.subgraphs[1]
- assert first_npu_sg.placement == PassPlacement.Npu
+ first_npu_sg = npu_subgraphs[0]
tensor_allocation.allocate_tensors(
nng,
first_npu_sg,
@@ -214,18 +215,17 @@ def compiler_driver(nng, arch, options, scheduler_options, network_type):
root_sg = nng.get_root_subgraph()
# Generate command streams and serialise Npu-ops into tensors
- for sg in nng.subgraphs:
- if sg.placement == PassPlacement.Npu:
- high_level_command_stream_generator.generate_high_level_command_stream_for_schedule(
- nng, sg, arch, options.verbose_high_level_command_stream
- )
- lut.optimize_high_level_cmd_stream(sg, arch)
- high_level_command_to_npu_op.generate_register_command_stream_for_sg(
- nng, sg, arch, options.verbose_register_command_stream
- )
- scratch_tens, scratch_fast_tens, flash_tens = npu_serialisation.serialise_npu_subgraph_into_tensors(
- sg, arch, scratch_tens, scratch_fast_tens, flash_tens
- )
+ for sg in npu_subgraphs:
+ high_level_command_stream_generator.generate_high_level_command_stream_for_schedule(
+ nng, sg, arch, options.verbose_high_level_command_stream
+ )
+ lut.optimize_high_level_cmd_stream(sg, arch)
+ high_level_command_to_npu_op.generate_register_command_stream_for_sg(
+ nng, sg, arch, options.verbose_register_command_stream
+ )
+ scratch_tens, scratch_fast_tens, flash_tens = npu_serialisation.serialise_npu_subgraph_into_tensors(
+ sg, arch, scratch_tens, scratch_fast_tens, flash_tens
+ )
npu_serialisation.rewrite_npu_call_ops(root_sg, arch)