diff options
-rw-r--r-- | ethosu/vela/high_level_command_stream_generator.py | 15 | ||||
-rw-r--r-- | ethosu/vela/scheduler.py | 4 |
2 files changed, 11 insertions, 8 deletions
diff --git a/ethosu/vela/high_level_command_stream_generator.py b/ethosu/vela/high_level_command_stream_generator.py index 95068081..a52bdc37 100644 --- a/ethosu/vela/high_level_command_stream_generator.py +++ b/ethosu/vela/high_level_command_stream_generator.py @@ -185,13 +185,14 @@ def generate_high_level_commands_for_sched_op(sched_op, schedule): if producer_op: assert op_info.cascade != 0 assert op_info.cascade == schedule.cost_map[producer_op].cascade - for prev_cmd in prev_cmd_gen: - yield prev_cmd - if prev_cmd.is_npu_pass_command() and prev_cmd.ps == producer_op.parent_ps: - ifm_present.end_coord = prev_cmd.ofm_box.end_coord - if ifm_required.is_subbox_of(ifm_present): - # There is enough IFM data - exit loop - break + if not ifm_required.is_subbox_of(ifm_present): + for prev_cmd in prev_cmd_gen: + yield prev_cmd + if prev_cmd.is_npu_pass_command() and prev_cmd.ps == producer_op.parent_ps: + ifm_present.end_coord = prev_cmd.ofm_box.end_coord + if ifm_required.is_subbox_of(ifm_present): + # There is enough IFM data - exit loop + break # Information about the current stripe's location in the cascade is_first_h_stripe = ofm_box_start.height == ofm_start.height diff --git a/ethosu/vela/scheduler.py b/ethosu/vela/scheduler.py index 7e989a7d..d01942bb 100644 --- a/ethosu/vela/scheduler.py +++ b/ethosu/vela/scheduler.py @@ -44,6 +44,7 @@ from . import weight_compressor from .architecture_allocator import ArchitectureBlockConfig from .architecture_allocator import find_block_config from .architecture_allocator import get_ifm_area_required +from .architecture_allocator import to_upscale from .architecture_features import ArchitectureFeatures from .architecture_features import Block from .cascade_builder import CascadeBuilder @@ -906,7 +907,8 @@ class Scheduler: striped_schedule.cost_map[sched_op] = cost # Calculate the preceeding Op's stripe - stripe = sched_op.ifm.shape.with_height(stripe.height * sched_op.kernel.stride.y) + height = stripe.height + stripe.height % to_upscale(sched_op.resampling_mode) + stripe = sched_op.ifm.shape.with_height(height * sched_op.kernel.stride.y) return striped_schedule |