aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohan Alfvén <johan.alfven@arm.com>2022-09-28 14:22:54 +0200
committerJohan Alfvén <johan.alfven@arm.com>2022-10-28 09:45:42 +0200
commit8484d6e529bc7828d3e5034cd9dfcfb1ddb0559a (patch)
treeb2d094e7a405c9b78636f2a072796a184f35aa7c
parent53605be9fb83fb0a0fa873a0f4d7435654d3df6b (diff)
downloadethos-u-vela-8484d6e529bc7828d3e5034cd9dfcfb1ddb0559a.tar.gz
Revert "MLBEDSW-6961: Bypass functionality for memory ops"
This reverts commit 5060ff53f5ac2382e04a68d7772bd71a36f63845. Signed-off-by: Johan Alfven <johan.alfven@arm.com> Change-Id: I8dd7e9ed8325fd2e8c17509fd9757292706f5ee7
-rw-r--r--ethosu/vela/graph_optimiser_util.py79
-rw-r--r--ethosu/vela/tflite_supported_operators.py9
2 files changed, 55 insertions, 33 deletions
diff --git a/ethosu/vela/graph_optimiser_util.py b/ethosu/vela/graph_optimiser_util.py
index e6a79cef..b33851a8 100644
--- a/ethosu/vela/graph_optimiser_util.py
+++ b/ethosu/vela/graph_optimiser_util.py
@@ -200,25 +200,35 @@ def bypass_memory_only_ops(op):
ofm = op.ofm
ifm = op.ifm
- # Check if ifm is subgraph ifm
+ # Check if ifm/ofm are network ifm/ofm
ifm_is_sg_ifm = ifm.ops[0].type in (Op.Placeholder, Op.SubgraphInput, Op.Const)
- # Check if ifm is produced by CPU
+ ifm_is_sg_ofm = any(ifm_cons is None for ifm_cons in ifm.consumer_list)
+ ofm_is_sg_ofm = any(ofm_cons is None for ofm_cons in ofm.consumer_list)
+ # Check if ifm/ofm is produced respectively consumed by CPU
ifm_is_cpu_produced = any(ifm_prod is not None and not ifm_prod.run_on_npu for ifm_prod in op.ifm.ops)
+ ofm_is_cpu_consumed = any(ofm_cons is not None and not ofm_cons.run_on_npu for ofm_cons in op.ofm.consumer_list)
# This case should be handled prior to this function
- assert not (ifm_is_sg_ifm or ifm_is_cpu_produced)
-
- # Bypassed by replacing ifm with ofm
- ofm.ops = []
- for prev_op in ifm.ops:
- prev_op.outputs = [ofm]
- ofm.ops.append(prev_op)
-
- # All ifm consumers need to use ofm as input
- for ifm_cons in ifm.consumer_list:
- for ifm_idx, cons_ifm in enumerate(ifm_cons.inputs):
- if cons_ifm == ifm:
- ifm_cons.set_input_tensor(ofm, ifm_idx)
+ assert not ((ifm_is_sg_ifm or ifm_is_sg_ofm or ifm_is_cpu_produced) and (ofm_is_sg_ofm or ofm_is_cpu_consumed))
+
+ if ofm_is_sg_ofm or ofm_is_cpu_consumed:
+ # Bypassed by replacing ifm with ofm
+ ofm.ops = []
+ for prev_op in ifm.ops:
+ prev_op.outputs = [ofm]
+ ofm.ops.append(prev_op)
+
+ # All ifm consumers need to use ofm as input
+ for ifm_cons in ifm.consumer_list:
+ for ifm_idx, cons_ifm in enumerate(ifm_cons.inputs):
+ if cons_ifm == ifm:
+ ifm_cons.set_input_tensor(ofm, ifm_idx)
+ else:
+ # Bypassed by replacing ofm with ifm
+ for cons in ofm.consumer_list:
+ for ifm_idx, cons_ifm in enumerate(cons.inputs):
+ if cons_ifm == ofm:
+ cons.set_input_tensor(ifm, ifm_idx)
def move_splitsliceread_to_consumer(op, cons_op):
@@ -251,8 +261,8 @@ def record_optimised(op, arch):
DebugDatabase.add_optimised(op, op)
-def insert_copy_op_after_ifm(op):
- tens = op.ifm
+def insert_copy_op_after_tens(tens):
+ tens_cons_list_copy = tens.consumer_list.copy()
# Create a avg_pool nop op with ifm as input
copy_tens = tens.clone()
@@ -262,7 +272,12 @@ def insert_copy_op_after_ifm(op):
copy_op.set_ifm_ofm_shapes()
copy_op.run_on_npu = True
- op.set_input_tensor(copy_tens, 0)
+ # Set copy_ifm consumers
+ for tens_cons in tens_cons_list_copy:
+ if tens_cons is not None:
+ for ifm_idx, cons_inp in enumerate(tens_cons.inputs):
+ if cons_inp == tens:
+ tens_cons.set_input_tensor(copy_tens, ifm_idx)
DebugDatabase.add_optimised(tens.ops[0], copy_op)
@@ -271,26 +286,24 @@ def fix_sg_input_output(op, arch, nng):
if not op.run_on_npu or op.type not in memory_only_ops:
return op
- # For the memory only operators we want to remove, the ifm tensor
- # is replaced by the ofm tensor.
- # But in order to to do this, the ifm can not be inputs of the sg or
- # the ifm can not have more than one consumers.
- # This need to be fixed prior to the removal.
+ # For the memory only operators we want to remove, tensors are removed.
+ # But in order to to do this, they cannot be outputs of the sg,
+ # this need to be fixed prior to the removal.
# Solution is to add a avgpool NOP, to maintain the original tensor.
- # This is also valid when reshape ifm is produced by CPU
+ # This is also valid when reshape ifm/ofm is produced respectively
+ # consumed by CPU
- # Check if operator ifm is subgraph ifm
+ # Check if operator ifm/ofm are sg ifm/ofm
ifm_is_sg_ifm = op.ifm.ops[0].type in (Op.Placeholder, Op.SubgraphInput, Op.Const)
-
- # Check if ifm is produced by CPU
+ ifm_is_sg_ofm = any(ifm_cons is None for ifm_cons in op.ifm.consumer_list)
+ ofm_is_sg_ofm = any(ofm_cons is None for ofm_cons in op.ofm.consumer_list)
+ # Check if ifm/ofm is produced respectively consumed by CPU
ifm_is_cpu_produced = any(ifm_prod is not None and not ifm_prod.run_on_npu for ifm_prod in op.ifm.ops)
+ ofm_is_cpu_consumed = any(ofm_cons is not None and not ofm_cons.run_on_npu for ofm_cons in op.ofm.consumer_list)
- # Check numbers of ifm consumers - if many insert avgpool NOP
- ifm_has_multiple_cons = len(op.ifm.consumer_list) > 1
-
- if ifm_is_sg_ifm or ifm_is_cpu_produced or ifm_has_multiple_cons:
- # Ifm need to persist in order to remove the memory only operator.
- insert_copy_op_after_ifm(op)
+ if (ifm_is_sg_ofm or ifm_is_sg_ifm or ifm_is_cpu_produced) and (ofm_is_sg_ofm or ofm_is_cpu_consumed):
+ # Both ifm and ofm need to persist, but only ifm need a copy, in order to remove the memory only operator.
+ insert_copy_op_after_tens(op.ifm)
return op
diff --git a/ethosu/vela/tflite_supported_operators.py b/ethosu/vela/tflite_supported_operators.py
index b8fe4b6a..c394778b 100644
--- a/ethosu/vela/tflite_supported_operators.py
+++ b/ethosu/vela/tflite_supported_operators.py
@@ -319,6 +319,7 @@ class TFLiteSupportedOperators:
# Reshape specific checks:
self.specific_constraints[Op.Reshape].append(TFLiteSupportedOperators.constraint_reshape_shape_constant)
+ self.specific_constraints[Op.Reshape].append(TFLiteSupportedOperators.constraint_reshape_before_mean)
def is_operator_supported(self, op):
ext_type = optype_to_builtintype(op.type)
@@ -879,3 +880,11 @@ class TFLiteSupportedOperators:
extra = ", ".join(extra)
return valid, f"Op has non-const input(s): {extra}"
+
+ @staticmethod
+ def constraint_reshape_before_mean(op):
+ "Reshape on NPU not supported before MEAN operator"
+ for next_op in op.outputs[0].consumers():
+ if next_op is not None and next_op.type == Op.Mean:
+ return False, ""
+ return True, ""