diff options
author | Johan Alfvén <johan.alfven@arm.com> | 2022-08-17 14:59:58 +0200 |
---|---|---|
committer | Johan Alfvén <johan.alfven@arm.com> | 2022-10-21 11:07:46 +0200 |
commit | 2a285fca30d13f6577ef3e8154aea24713d728a5 (patch) | |
tree | 00a37a5eaba8c28ee944bf5772b3e96ae59887c0 /ethosu | |
parent | 56a71b0108f43a1cb118b1e2fae902c31b2a9969 (diff) | |
download | ethos-u-vela-2a285fca30d13f6577ef3e8154aea24713d728a5.tar.gz |
MLBEDSW-6840: New stripe algo for optimize sub schedule
- The algorithm for trying out different stripes in order
to optimize a sub schedule/cascade, have a problem that it
can split the initial cascade into several smaller cascades.
The problem with this is that it will increase IFM/OFM DRAM
bandwith and performance will drop.
- Changed the stripe algorithm to prefer long cascades.
Signed-off-by: Johan Alfven <johan.alfven@arm.com>
Change-Id: I4f38b381597b7094819e9dd463aa1876e4e6bc62
Diffstat (limited to 'ethosu')
-rw-r--r-- | ethosu/vela/scheduler.py | 33 |
1 files changed, 21 insertions, 12 deletions
diff --git a/ethosu/vela/scheduler.py b/ethosu/vela/scheduler.py index 79cd6421..ec7380a6 100644 --- a/ethosu/vela/scheduler.py +++ b/ethosu/vela/scheduler.py @@ -1029,14 +1029,19 @@ class Scheduler: # Generate the possible stripings for the final Op in the sub-schedule final_ofm_shape = sub_schedule_ops[-1].ofm.shape + + # Skip testing the min stripe used in the MIN schedule since that will be used + # anyway if no new cascades are created below + last_op = sub_schedule_ops[-1] + min_stripe_h = sub_schedule.cost_map[last_op].stripe.height + 1 + possible_stripes = [ - final_ofm_shape.with_height(stripe_h) for stripe_h in range(1, final_ofm_shape.height // 2 + 1) + final_ofm_shape.with_height(stripe_h) for stripe_h in range(min_stripe_h, final_ofm_shape.height // 2 + 1) ] - # Propose different striping - the possible stripes are proposed similarly to a binary search + # Propose different striping best_schedule = None - iteration = 0 - while len(possible_stripes) > 1: - proposed_stripe = possible_stripes[len(possible_stripes) // 2] + max_nbr_of_cascades = 0 + for iteration, proposed_stripe in enumerate(possible_stripes): proposed_schedule = self.propose_schedule_striping( proposed_stripe, f"OPTIMIZED_{iteration}", buffered_sub_schedule ) @@ -1045,18 +1050,22 @@ class Scheduler: # Check if proposal fits proposed_schedule_mem_usage = self.estimate_schedule_memory_usage(proposed_schedule, non_local_mem_usage) - if (proposed_schedule_mem_usage) <= memory_limit: - # Remove all possible stripes smaller than this - possible_stripes = possible_stripes[len(possible_stripes) // 2 :] + + nbr_of_cascades = len(proposed_schedule.cascades) + + if iteration == 0: + # First iteration - used as limit to prevent splitting up the cascades + # Long cascades are better in order to reduce IFM/IFM dram bandwidth + max_nbr_of_cascades = nbr_of_cascades + + if (proposed_schedule_mem_usage) <= memory_limit and nbr_of_cascades <= max_nbr_of_cascades: best_schedule = proposed_schedule + if not proposed_schedule.cascades: # No cascading required - early exit break else: - # Proposal doesn't fit within the limit - remove all possible stripes larger than this - possible_stripes = possible_stripes[: len(possible_stripes) // 2] - - iteration += 1 + break return best_schedule |