aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDiqing Zhong <diqing.zhong@arm.com>2021-02-03 16:37:31 +0100
committerDiqing Zhong <diqing.zhong@arm.com>2021-02-08 09:47:39 +0100
commit455e20e5ed0d5ce141a921e67f0219e55044e6e1 (patch)
treeea7b48a4d19f21bdd12a86f36461cc4af12b55be
parent42abec185a95ac8d5f2b32d541f587b3cd306e75 (diff)
downloadethos-u-vela-455e20e5ed0d5ce141a921e67f0219e55044e6e1.tar.gz
MLBEDSW-3953: Output diff in mobilenet_v3
Fixed two issues: - Cmd stream can be out of order in Ifmstreaming - In H32, LUT could be corrupted if blockdep is not 0 Change-Id: I2edd84429b93d83b2794f14937ce3fd279fd4a24 Signed-off-by: Diqing Zhong <diqing.zhong@arm.com>
-rw-r--r--ethosu/vela/high_level_command_stream_generator.py24
-rw-r--r--ethosu/vela/register_command_stream_util.py6
2 files changed, 18 insertions, 12 deletions
diff --git a/ethosu/vela/high_level_command_stream_generator.py b/ethosu/vela/high_level_command_stream_generator.py
index e514e76..66613ba 100644
--- a/ethosu/vela/high_level_command_stream_generator.py
+++ b/ethosu/vela/high_level_command_stream_generator.py
@@ -260,6 +260,18 @@ def generate_high_level_command_stream_for_pass(strat, passes, block_configs, id
upscaling,
)
+ ifm_y_needed = 1
+ if len(ifm_box.end_coord) >= 3:
+ ifm_y_needed = ifm_box.end_coord[-3]
+ if ifm_y_present < ifm_y_needed:
+ for prev_cmd in prev_pass_gen:
+ yield prev_cmd
+ rng = prev_cmd.get_ofm_y_range_for_pass(prev_pass)
+ if rng is not None:
+ ifm_y_present = max(ifm_y_present, rng[1])
+ if ifm_y_present >= ifm_y_needed:
+ break
+
for intermediate in ps.intermediates:
if (
intermediate is not None
@@ -281,18 +293,6 @@ def generate_high_level_command_stream_for_pass(strat, passes, block_configs, id
intermediate_box = Box([0] * len(intermediate.shape), list(intermediate.shape))
yield from dma_if_necessary(ps, intermediate_box, intermediate)
- ifm_y_needed = 1
- if len(ifm_box.end_coord) >= 3:
- ifm_y_needed = ifm_box.end_coord[-3]
- if ifm_y_present < ifm_y_needed:
- for prev_cmd in prev_pass_gen:
- yield prev_cmd
- rng = prev_cmd.get_ofm_y_range_for_pass(prev_pass)
- if rng is not None:
- ifm_y_present = max(ifm_y_present, rng[1])
- if ifm_y_present >= ifm_y_needed:
- break
-
if scale_tensor is not None and scale_tensor.purpose == TensorPurpose.FSBias and scale_box is None:
scale_box = Box([0] * len(scale_tensor.shape), list(scale_tensor.shape))
yield from dma_if_necessary(ps, scale_box, scale_tensor)
diff --git a/ethosu/vela/register_command_stream_util.py b/ethosu/vela/register_command_stream_util.py
index 55fa620..4cf826d 100644
--- a/ethosu/vela/register_command_stream_util.py
+++ b/ethosu/vela/register_command_stream_util.py
@@ -463,6 +463,12 @@ def calc_blockdep(arch: ArchitectureFeatures, prev_op: Optional[NpuBlockOperatio
return 0
assert npu_op.ifm is not None
assert prev_op.ofm is not None
+ # Check if the reserved shram will be used in current/prev op
+ prev_uses_lut = prev_op.activation is not None and prev_op.activation.op_type == NpuActivationOp.TABLE_LOOKUP
+ curr_uses_lut = npu_op.activation is not None and npu_op.activation.op_type == NpuActivationOp.TABLE_LOOKUP
+ if prev_uses_lut and arch.shram_reserved_unused_banks == 0 and not curr_uses_lut:
+ return 0
+
# Check if IFM or IFM2 overlaps with prev op's OFM
prev_ofm_ranges = get_address_ranges(prev_op.ofm)
ifm_ranges = get_address_ranges(npu_op.ifm)