From 455e20e5ed0d5ce141a921e67f0219e55044e6e1 Mon Sep 17 00:00:00 2001 From: Diqing Zhong Date: Wed, 3 Feb 2021 16:37:31 +0100 Subject: MLBEDSW-3953: Output diff in mobilenet_v3 Fixed two issues: - Cmd stream can be out of order in Ifmstreaming - In H32, LUT could be corrupted if blockdep is not 0 Change-Id: I2edd84429b93d83b2794f14937ce3fd279fd4a24 Signed-off-by: Diqing Zhong --- ethosu/vela/high_level_command_stream_generator.py | 24 +++++++++++----------- ethosu/vela/register_command_stream_util.py | 6 ++++++ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/ethosu/vela/high_level_command_stream_generator.py b/ethosu/vela/high_level_command_stream_generator.py index e514e76c..66613ba8 100644 --- a/ethosu/vela/high_level_command_stream_generator.py +++ b/ethosu/vela/high_level_command_stream_generator.py @@ -260,6 +260,18 @@ def generate_high_level_command_stream_for_pass(strat, passes, block_configs, id upscaling, ) + ifm_y_needed = 1 + if len(ifm_box.end_coord) >= 3: + ifm_y_needed = ifm_box.end_coord[-3] + if ifm_y_present < ifm_y_needed: + for prev_cmd in prev_pass_gen: + yield prev_cmd + rng = prev_cmd.get_ofm_y_range_for_pass(prev_pass) + if rng is not None: + ifm_y_present = max(ifm_y_present, rng[1]) + if ifm_y_present >= ifm_y_needed: + break + for intermediate in ps.intermediates: if ( intermediate is not None @@ -281,18 +293,6 @@ def generate_high_level_command_stream_for_pass(strat, passes, block_configs, id intermediate_box = Box([0] * len(intermediate.shape), list(intermediate.shape)) yield from dma_if_necessary(ps, intermediate_box, intermediate) - ifm_y_needed = 1 - if len(ifm_box.end_coord) >= 3: - ifm_y_needed = ifm_box.end_coord[-3] - if ifm_y_present < ifm_y_needed: - for prev_cmd in prev_pass_gen: - yield prev_cmd - rng = prev_cmd.get_ofm_y_range_for_pass(prev_pass) - if rng is not None: - ifm_y_present = max(ifm_y_present, rng[1]) - if ifm_y_present >= ifm_y_needed: - break - if scale_tensor is not None and scale_tensor.purpose == TensorPurpose.FSBias and scale_box is None: scale_box = Box([0] * len(scale_tensor.shape), list(scale_tensor.shape)) yield from dma_if_necessary(ps, scale_box, scale_tensor) diff --git a/ethosu/vela/register_command_stream_util.py b/ethosu/vela/register_command_stream_util.py index 55fa620c..4cf826d9 100644 --- a/ethosu/vela/register_command_stream_util.py +++ b/ethosu/vela/register_command_stream_util.py @@ -463,6 +463,12 @@ def calc_blockdep(arch: ArchitectureFeatures, prev_op: Optional[NpuBlockOperatio return 0 assert npu_op.ifm is not None assert prev_op.ofm is not None + # Check if the reserved shram will be used in current/prev op + prev_uses_lut = prev_op.activation is not None and prev_op.activation.op_type == NpuActivationOp.TABLE_LOOKUP + curr_uses_lut = npu_op.activation is not None and npu_op.activation.op_type == NpuActivationOp.TABLE_LOOKUP + if prev_uses_lut and arch.shram_reserved_unused_banks == 0 and not curr_uses_lut: + return 0 + # Check if IFM or IFM2 overlaps with prev op's OFM prev_ofm_ranges = get_address_ranges(prev_op.ofm) ifm_ranges = get_address_ranges(npu_op.ifm) -- cgit v1.2.1