diff options
author | Diqing Zhong <diqing.zhong@arm.com> | 2021-02-03 16:37:31 +0100 |
---|---|---|
committer | Diqing Zhong <diqing.zhong@arm.com> | 2021-02-08 09:47:39 +0100 |
commit | 455e20e5ed0d5ce141a921e67f0219e55044e6e1 (patch) | |
tree | ea7b48a4d19f21bdd12a86f36461cc4af12b55be | |
parent | 42abec185a95ac8d5f2b32d541f587b3cd306e75 (diff) | |
download | ethos-u-vela-455e20e5ed0d5ce141a921e67f0219e55044e6e1.tar.gz |
MLBEDSW-3953: Output diff in mobilenet_v3
Fixed two issues:
- Cmd stream can be out of order in Ifmstreaming
- In H32, LUT could be corrupted if blockdep is not 0
Change-Id: I2edd84429b93d83b2794f14937ce3fd279fd4a24
Signed-off-by: Diqing Zhong <diqing.zhong@arm.com>
-rw-r--r-- | ethosu/vela/high_level_command_stream_generator.py | 24 | ||||
-rw-r--r-- | ethosu/vela/register_command_stream_util.py | 6 |
2 files changed, 18 insertions, 12 deletions
diff --git a/ethosu/vela/high_level_command_stream_generator.py b/ethosu/vela/high_level_command_stream_generator.py index e514e76c..66613ba8 100644 --- a/ethosu/vela/high_level_command_stream_generator.py +++ b/ethosu/vela/high_level_command_stream_generator.py @@ -260,6 +260,18 @@ def generate_high_level_command_stream_for_pass(strat, passes, block_configs, id upscaling, ) + ifm_y_needed = 1 + if len(ifm_box.end_coord) >= 3: + ifm_y_needed = ifm_box.end_coord[-3] + if ifm_y_present < ifm_y_needed: + for prev_cmd in prev_pass_gen: + yield prev_cmd + rng = prev_cmd.get_ofm_y_range_for_pass(prev_pass) + if rng is not None: + ifm_y_present = max(ifm_y_present, rng[1]) + if ifm_y_present >= ifm_y_needed: + break + for intermediate in ps.intermediates: if ( intermediate is not None @@ -281,18 +293,6 @@ def generate_high_level_command_stream_for_pass(strat, passes, block_configs, id intermediate_box = Box([0] * len(intermediate.shape), list(intermediate.shape)) yield from dma_if_necessary(ps, intermediate_box, intermediate) - ifm_y_needed = 1 - if len(ifm_box.end_coord) >= 3: - ifm_y_needed = ifm_box.end_coord[-3] - if ifm_y_present < ifm_y_needed: - for prev_cmd in prev_pass_gen: - yield prev_cmd - rng = prev_cmd.get_ofm_y_range_for_pass(prev_pass) - if rng is not None: - ifm_y_present = max(ifm_y_present, rng[1]) - if ifm_y_present >= ifm_y_needed: - break - if scale_tensor is not None and scale_tensor.purpose == TensorPurpose.FSBias and scale_box is None: scale_box = Box([0] * len(scale_tensor.shape), list(scale_tensor.shape)) yield from dma_if_necessary(ps, scale_box, scale_tensor) diff --git a/ethosu/vela/register_command_stream_util.py b/ethosu/vela/register_command_stream_util.py index 55fa620c..4cf826d9 100644 --- a/ethosu/vela/register_command_stream_util.py +++ b/ethosu/vela/register_command_stream_util.py @@ -463,6 +463,12 @@ def calc_blockdep(arch: ArchitectureFeatures, prev_op: Optional[NpuBlockOperatio return 0 assert npu_op.ifm is not None assert prev_op.ofm is not None + # Check if the reserved shram will be used in current/prev op + prev_uses_lut = prev_op.activation is not None and prev_op.activation.op_type == NpuActivationOp.TABLE_LOOKUP + curr_uses_lut = npu_op.activation is not None and npu_op.activation.op_type == NpuActivationOp.TABLE_LOOKUP + if prev_uses_lut and arch.shram_reserved_unused_banks == 0 and not curr_uses_lut: + return 0 + # Check if IFM or IFM2 overlaps with prev op's OFM prev_ofm_ranges = get_address_ranges(prev_op.ofm) ifm_ranges = get_address_ranges(npu_op.ifm) |