From 211165a378ccb43696c562fe53e1e1937c75a144 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johan=20Alfv=C3=A9n?= <johan.alfven@arm.com>
Date: Sun, 6 Feb 2022 15:30:07 +0100
Subject: MLBEDSW-5582: MLCE: memory corruption with zero concat

Fixed problem when ofm is produced by different NPU nodes by
making sure that output is always in NHWC format.

Signed-off-by: Johan Alfven <johan.alfven@arm.com>
Change-Id: I00e55c989d5860499fbaf4f4318661b17b4bda7e
---
 ethosu/vela/extract_npu_subgraphs.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/ethosu/vela/extract_npu_subgraphs.py b/ethosu/vela/extract_npu_subgraphs.py
index ac24e430..015634c8 100644
--- a/ethosu/vela/extract_npu_subgraphs.py
+++ b/ethosu/vela/extract_npu_subgraphs.py
@@ -106,9 +106,6 @@ def rewrite_tensor_cpu_producer_npu_consumers(
 
     # Deal with output tensors for the NPU graph. These are special.
     npu_subgraph.output_tensors = [new_tens if tens == orig_tens else tens for tens in npu_subgraph.output_tensors]
-    for tens in npu_subgraph.output_tensors:
-        # Enforce output tensor from NPU graph to use normal NHWC output
-        tens.needs_linear_format = True
 
 
 def rewrite_tensor_npu_producer_cpu_consumers(
@@ -241,6 +238,11 @@ def extract_subgraph(nng, orig_sg, arch):
                         tens, call_pass[curr_sg], startup_init_passes[curr_sg], curr_sg, orig_sg, subgraph_for_pass
                     )
 
+        for tens in curr_sg.output_tensors:
+            # ofm can depend on multiple ops. These ops can be divided into different NPU
+            # nodes due to CPU nodes. If that is the case the ofm must be NHWC.
+            tens.needs_linear_format = True
+
     return new_subgraphs
 
 
-- 
cgit v1.2.1