From d03dc504452dbb32db383121f3dad81f4280bb3c Mon Sep 17 00:00:00 2001 From: Fredrik Svedberg Date: Thu, 30 Jun 2022 10:44:12 +0200 Subject: MLBEDSW-6496 mlperf_deeplabv3_mnv2_ade20k_int8 fails at verify_output for u65 Added check to see if additional stripe data is needed from producer op when cascading to make sure the stripes are not overwriting data still being used. Also changed scheduler to make sure ResizeBilinear always runs with even stripe height. Signed-off-by: Fredrik Svedberg Change-Id: If7d723e6be29575c2b55c400eebbe8275a1aa328 --- ethosu/vela/high_level_command_stream_generator.py | 15 ++++++++------- ethosu/vela/scheduler.py | 4 +++- 2 files changed, 11 insertions(+), 8 deletions(-) (limited to 'ethosu') diff --git a/ethosu/vela/high_level_command_stream_generator.py b/ethosu/vela/high_level_command_stream_generator.py index 95068081..a52bdc37 100644 --- a/ethosu/vela/high_level_command_stream_generator.py +++ b/ethosu/vela/high_level_command_stream_generator.py @@ -185,13 +185,14 @@ def generate_high_level_commands_for_sched_op(sched_op, schedule): if producer_op: assert op_info.cascade != 0 assert op_info.cascade == schedule.cost_map[producer_op].cascade - for prev_cmd in prev_cmd_gen: - yield prev_cmd - if prev_cmd.is_npu_pass_command() and prev_cmd.ps == producer_op.parent_ps: - ifm_present.end_coord = prev_cmd.ofm_box.end_coord - if ifm_required.is_subbox_of(ifm_present): - # There is enough IFM data - exit loop - break + if not ifm_required.is_subbox_of(ifm_present): + for prev_cmd in prev_cmd_gen: + yield prev_cmd + if prev_cmd.is_npu_pass_command() and prev_cmd.ps == producer_op.parent_ps: + ifm_present.end_coord = prev_cmd.ofm_box.end_coord + if ifm_required.is_subbox_of(ifm_present): + # There is enough IFM data - exit loop + break # Information about the current stripe's location in the cascade is_first_h_stripe = ofm_box_start.height == ofm_start.height diff --git a/ethosu/vela/scheduler.py b/ethosu/vela/scheduler.py index 7e989a7d..d01942bb 100644 --- a/ethosu/vela/scheduler.py +++ b/ethosu/vela/scheduler.py @@ -44,6 +44,7 @@ from . import weight_compressor from .architecture_allocator import ArchitectureBlockConfig from .architecture_allocator import find_block_config from .architecture_allocator import get_ifm_area_required +from .architecture_allocator import to_upscale from .architecture_features import ArchitectureFeatures from .architecture_features import Block from .cascade_builder import CascadeBuilder @@ -906,7 +907,8 @@ class Scheduler: striped_schedule.cost_map[sched_op] = cost # Calculate the preceeding Op's stripe - stripe = sched_op.ifm.shape.with_height(stripe.height * sched_op.kernel.stride.y) + height = stripe.height + stripe.height % to_upscale(sched_op.resampling_mode) + stripe = sched_op.ifm.shape.with_height(height * sched_op.kernel.stride.y) return striped_schedule -- cgit v1.2.1