aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCharles Xu <charles.xu@arm.com>2020-05-18 08:54:47 +0200
committerTim Hall <tim.hall@arm.com>2020-06-18 17:53:52 +0100
commit600351a0669605e740820df34780c3927a0d5559 (patch)
tree050a8cdd7106e8d7391181a15e007035d5c28a05
parente1a56101030e2a318ea90981f910c9f2631937fd (diff)
downloadethos-u-vela-600351a0669605e740820df34780c3927a0d5559.tar.gz
MLBEDSW-1828: Ifm/ifm2 order is reversed in some cases of split
Signed-off-by: Charles Xu <charles.xu@arm.com> Change-Id: Ib8d66f8b3c0467966165c1b53aeb7da7c8764c89
-rw-r--r--ethosu/vela/high_level_command_stream_generator.py36
-rw-r--r--ethosu/vela/pass_packing.py2
2 files changed, 25 insertions, 13 deletions
diff --git a/ethosu/vela/high_level_command_stream_generator.py b/ethosu/vela/high_level_command_stream_generator.py
index 3b968dc8..0cd3ad22 100644
--- a/ethosu/vela/high_level_command_stream_generator.py
+++ b/ethosu/vela/high_level_command_stream_generator.py
@@ -33,12 +33,36 @@ def dma_if_necessary(ps, box, tensor):
in_tensor = dma_op.inputs[0]
yield DMA(in_tensor, tensor, box)
+def match_tensor(source, derived):
+ if source == derived:
+ return True
+ ops = derived.ops
+ return (ops != [] and
+ len(ops) ==1 and
+ ops[0].type == "SplitSliceRead" and
+ source == ops[0].inputs[0])
def generate_high_level_command_stream_for_pass(strat, passes, block_configs, idx):
is_first = idx == 0
is_last = idx == len(passes) - 1
ps = passes[idx]
block_config = block_configs[idx]
+ npu_block_type = ps.npu_block_type
+ split_offsets = [None, None] # offset for [ifm, ifm2]
+
+ ifm_idx = 0
+ for op in ps.ops:
+ if op.type == "SplitSliceRead":
+ split_offsets[ifm_idx] = op.attrs["split_start"]
+ ps.primary_op.attrs["fused_memory_function"] = op.type
+ ifm_idx += 1
+
+ if len(ps.inputs) == 2 and npu_block_type == NpuBlockType.ElementWise:
+ # Ensure correct imf and ifm2 order
+ if (match_tensor(ps.inputs[0], ps.primary_op.inputs[1]) and
+ match_tensor(ps.inputs[1], ps.primary_op.inputs[0])):
+ ps.ifm_tensor, ps.ifm2_tensor = ps.ifm2_tensor, ps.ifm_tensor
+ split_offsets[0], split_offsets[1] = split_offsets[1], split_offsets[0]
ifm_tensor = ps.ifm_tensor
ifm2_tensor = ps.ifm2_tensor
@@ -55,13 +79,9 @@ def generate_high_level_command_stream_for_pass(strat, passes, block_configs, id
strides = ps.primary_op.attrs.get("strides", None)
skirt = ps.primary_op.attrs.get("skirt", None)
- npu_block_type = ps.npu_block_type
-
concat_axis = 0
concat_offset = 0
- split_offsets = [None, None] # offset for [ifm, ifm2]
-
# Fusable activation functions
activation_ops = set(("Sigmoid", "Tanh", "Relu", "Relu6", "ReluN1To1"))
@@ -78,14 +98,6 @@ def generate_high_level_command_stream_for_pass(strat, passes, block_configs, id
elif op.type in activation_ops:
ps.primary_op.attrs["fused_activation_function"] = op.type
- # The ops list has to be reversed here since the Pass Packing is done in reverse
- ifm_idx = 0
- for op in reversed(ps.ops):
- if op.type == "SplitSliceRead":
- split_offsets[ifm_idx] = op.attrs["split_start"]
- ps.primary_op.attrs["fused_memory_function"] = op.type
- ifm_idx += 1
-
if strat == SchedulingStrategy.WeightStream:
ofm_step = block_config[-1]
ofm_stop = ofm_end[-1]
diff --git a/ethosu/vela/pass_packing.py b/ethosu/vela/pass_packing.py
index 5841ca23..4cfac33c 100644
--- a/ethosu/vela/pass_packing.py
+++ b/ethosu/vela/pass_packing.py
@@ -314,7 +314,7 @@ def pack_into_passes(nng, arch, verbose_packing=False):
if operation_set is None:
print("Warning:", curr_op.type, "operation is unknown or unsupported, placing on CPU")
- for inp in curr_op.inputs:
+ for inp in reversed(curr_op.inputs):
can_pack = True
if len(inp.ops) == 1:
next_op = inp.ops[0]