aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohan Alfven <johan.alfven@arm.com>2024-05-20 11:31:41 +0200
committerJohan Alfven <johan.alfven@arm.com>2024-05-20 12:21:07 +0200
commit722f4bfe837aa33e69b60c1f12e14ab241d1cb0a (patch)
treed7d956cb35c0fb87c93324a97063a3077ed9bfe2
parentbe78a053a57da7bdae240690c933824c0861f55b (diff)
downloadethos-u-vela-722f4bfe837aa33e69b60c1f12e14ab241d1cb0a.tar.gz
MLBEDSW-9088: Update to concat grouping patch
- Fix performance regression caused by the concat grouping fix. - If there is no cpu op interfering there is no need for grouping the avg pool ops. Keep old compiler behavior for that use case. Change-Id: I6476585d7dedff0b9edd8b9c300a71c181496cf1 Signed-off-by: Johan Alfven <johan.alfven@arm.com>
-rw-r--r--ethosu/vela/pass_packing.py10
1 files changed, 7 insertions, 3 deletions
diff --git a/ethosu/vela/pass_packing.py b/ethosu/vela/pass_packing.py
index 66f7ffb3..f157e67b 100644
--- a/ethosu/vela/pass_packing.py
+++ b/ethosu/vela/pass_packing.py
@@ -525,17 +525,21 @@ def pack_into_passes(nng, arch, verbose_packing=False):
pass_list_top = sorted(pass_list_top, key=lambda ps: -1 if ps.ops[0].op_index is None else ps.ops[0].op_index)
# A concat is implemented by several AvgPool ops writing to the same ofm but with slice offset
- # Group all AvgPool ops for a concat so that they run in one sequence (within the same cmd stream)
+ # If there is a cpu op in between, group all AvgPool ops for a concat so that they run
+ # within the same cmd stream
last_idx = len(pass_list) - 1
for npu_ps in reversed(pass_list):
if npu_ps.placement == PassPlacement.Cpu or not npu_ps.ops[0].original_type.is_concat_op():
continue
# Concat pass found, search forward for the next avgpool op writing to the same ofm
idx = pass_list.index(npu_ps)
+ concat_is_split_between_npu_ops = False
for next_ps in pass_list[idx + 1 :]:
+ if next_ps.placement == PassPlacement.Cpu:
+ concat_is_split_between_npu_ops = True
next_is_concat = next_ps.ops[0].original_type.is_concat_op()
- if next_is_concat and next_ps.ops[0].ofm == npu_ps.ops[0].ofm:
- # Avgpool writing to the same OFM, group them
+ if next_is_concat and next_ps.ops[0].ofm == npu_ps.ops[0].ofm and concat_is_split_between_npu_ops:
+ # Avgpool writing to the same OFM and there is a cpu op between them, group them
pass_list.remove(npu_ps)
insert_index = pass_list.index(next_ps)
pass_list.insert(insert_index, npu_ps)