aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohan Alfvén <johan.alfven@arm.com>2022-01-27 06:47:26 +0100
committerJohan Alfvén <johan.alfven@arm.com>2022-01-27 07:35:23 +0100
commit628928d32ceae5c95abd9b3a2cb7333b1e5de059 (patch)
tree717f0081d3e268369731a398f8cf94726189e420
parent1793e2f3ebc65b56f8f1cb1bbcb377eb4913c7f7 (diff)
downloadethos-u-vela-628928d32ceae5c95abd9b3a2cb7333b1e5de059.tar.gz
MLBEDSW-6060: Revert patch for MLBEDSW-5582
- Issue was due to a previous patch to fix MLBEDSW-5582 - Revert fix for MLBEDSW-5582 commit 849ff81f82c10a68898e5101930b92372bec5565, - Made new fix for MLBEDSW-5582 that enforce output tensor from NPU graphs to be in NHWC format. This information is otherwise lost in the case when parts of a concatenation are placed in different custom operators resulting in mismatch bewteen NHWC and NHCWB16. Signed-off-by: Johan Alfven <johan.alfven@arm.com> Change-Id: Iab3ba29d348353c854f357836e6aa7c338ae1572
-rw-r--r--ethosu/vela/extract_npu_subgraphs.py3
-rw-r--r--ethosu/vela/pass_packing.py17
2 files changed, 9 insertions, 11 deletions
diff --git a/ethosu/vela/extract_npu_subgraphs.py b/ethosu/vela/extract_npu_subgraphs.py
index d802b51..f46f031 100644
--- a/ethosu/vela/extract_npu_subgraphs.py
+++ b/ethosu/vela/extract_npu_subgraphs.py
@@ -98,6 +98,9 @@ def rewrite_tensor_cpu_producer_npu_consumers(
# Deal with output tensors for the NPU graph. These are special.
npu_subgraph.output_tensors = [new_tens if tens == orig_tens else tens for tens in npu_subgraph.output_tensors]
+ for tens in npu_subgraph.output_tensors:
+ # Enforce output tensor from NPU graph to use normal NHWC output
+ tens.needs_linear_format = True
def rewrite_tensor_npu_producer_cpu_consumers(
diff --git a/ethosu/vela/pass_packing.py b/ethosu/vela/pass_packing.py
index b84e455..1fefdf4 100644
--- a/ethosu/vela/pass_packing.py
+++ b/ethosu/vela/pass_packing.py
@@ -179,7 +179,7 @@ for (operation_set, incompatible_pack_flags, flags_to_set, flags_to_clear) in te
def pack_into_passes(nng, arch, verbose_packing=False):
- def visit_op(op, multiple_ops=None):
+ def visit_op(op, ignored):
visit_op_refcount[op] += 1
if visit_op_refcount[op] == 1: # First-time visit, go and fix up unused output tensors
@@ -187,6 +187,7 @@ def pack_into_passes(nng, arch, verbose_packing=False):
if len(tens.consumers()) == 0:
visit_op_refcount[op] += 1
+ assert visit_op_refcount[op] <= len(op.outputs)
if visit_op_refcount[op] == len(op.outputs):
if op.type in startup_init_ops:
@@ -197,9 +198,9 @@ def pack_into_passes(nng, arch, verbose_packing=False):
ofm_tensor = op.outputs[0]
ofm_shape = op.ofm_shapes[0] if op.run_on_npu else None
- build_pass((op,), ofm_tensor, ofm_shape, multiple_ops)
+ build_pass((op,), ofm_tensor, ofm_shape)
- def build_pass(start_ops_to_process, ofm_tensor=None, ofm_shape=None, multiple_ops=None):
+ def build_pass(start_ops_to_process, ofm_tensor=None, ofm_shape=None):
reverse_ops_list = []
curr_flags = PassFlags.Empty
npu_block_type = NpuBlockType.Default
@@ -372,10 +373,6 @@ def pack_into_passes(nng, arch, verbose_packing=False):
reverse_pass_list.append(ps)
- if multiple_ops:
- multiple_op_next = multiple_ops.pop(0)
- visit_op(multiple_op_next, multiple_ops)
-
for inp, refcount in input_refcounts.items():
for _ in range(refcount):
visit_tensor(inp)
@@ -386,10 +383,8 @@ def pack_into_passes(nng, arch, verbose_packing=False):
visit_tensor_refcount[tens] += 1
assert visit_tensor_refcount[tens] <= len(tens.consumers())
if visit_tensor_refcount[tens] == len(tens.consumers()):
- if tens.ops:
- op = tens.ops[0]
- multiple_ops = [o for o in tens.ops if o != op]
- visit_op(op, multiple_ops)
+ for op in reversed(tens.ops):
+ visit_op(op, tens)
def create_primary_op(op_list):
if any(op.type in (npu_post_ops | npu_post_fuse_limited_ops) and op.run_on_npu for op in op_list):