diff options
author | Charles Xu <charles.xu@arm.com> | 2020-05-18 08:54:47 +0200 |
---|---|---|
committer | Tim Hall <tim.hall@arm.com> | 2020-06-18 17:53:52 +0100 |
commit | 600351a0669605e740820df34780c3927a0d5559 (patch) | |
tree | 050a8cdd7106e8d7391181a15e007035d5c28a05 /ethosu/vela | |
parent | e1a56101030e2a318ea90981f910c9f2631937fd (diff) | |
download | ethos-u-vela-600351a0669605e740820df34780c3927a0d5559.tar.gz |
MLBEDSW-1828: Ifm/ifm2 order is reversed in some cases of split
Signed-off-by: Charles Xu <charles.xu@arm.com>
Change-Id: Ib8d66f8b3c0467966165c1b53aeb7da7c8764c89
Diffstat (limited to 'ethosu/vela')
-rw-r--r-- | ethosu/vela/high_level_command_stream_generator.py | 36 | ||||
-rw-r--r-- | ethosu/vela/pass_packing.py | 2 |
2 files changed, 25 insertions, 13 deletions
diff --git a/ethosu/vela/high_level_command_stream_generator.py b/ethosu/vela/high_level_command_stream_generator.py index 3b968dc8..0cd3ad22 100644 --- a/ethosu/vela/high_level_command_stream_generator.py +++ b/ethosu/vela/high_level_command_stream_generator.py @@ -33,12 +33,36 @@ def dma_if_necessary(ps, box, tensor): in_tensor = dma_op.inputs[0] yield DMA(in_tensor, tensor, box) +def match_tensor(source, derived): + if source == derived: + return True + ops = derived.ops + return (ops != [] and + len(ops) ==1 and + ops[0].type == "SplitSliceRead" and + source == ops[0].inputs[0]) def generate_high_level_command_stream_for_pass(strat, passes, block_configs, idx): is_first = idx == 0 is_last = idx == len(passes) - 1 ps = passes[idx] block_config = block_configs[idx] + npu_block_type = ps.npu_block_type + split_offsets = [None, None] # offset for [ifm, ifm2] + + ifm_idx = 0 + for op in ps.ops: + if op.type == "SplitSliceRead": + split_offsets[ifm_idx] = op.attrs["split_start"] + ps.primary_op.attrs["fused_memory_function"] = op.type + ifm_idx += 1 + + if len(ps.inputs) == 2 and npu_block_type == NpuBlockType.ElementWise: + # Ensure correct imf and ifm2 order + if (match_tensor(ps.inputs[0], ps.primary_op.inputs[1]) and + match_tensor(ps.inputs[1], ps.primary_op.inputs[0])): + ps.ifm_tensor, ps.ifm2_tensor = ps.ifm2_tensor, ps.ifm_tensor + split_offsets[0], split_offsets[1] = split_offsets[1], split_offsets[0] ifm_tensor = ps.ifm_tensor ifm2_tensor = ps.ifm2_tensor @@ -55,13 +79,9 @@ def generate_high_level_command_stream_for_pass(strat, passes, block_configs, id strides = ps.primary_op.attrs.get("strides", None) skirt = ps.primary_op.attrs.get("skirt", None) - npu_block_type = ps.npu_block_type - concat_axis = 0 concat_offset = 0 - split_offsets = [None, None] # offset for [ifm, ifm2] - # Fusable activation functions activation_ops = set(("Sigmoid", "Tanh", "Relu", "Relu6", "ReluN1To1")) @@ -78,14 +98,6 @@ def generate_high_level_command_stream_for_pass(strat, passes, block_configs, id elif op.type in activation_ops: ps.primary_op.attrs["fused_activation_function"] = op.type - # The ops list has to be reversed here since the Pass Packing is done in reverse - ifm_idx = 0 - for op in reversed(ps.ops): - if op.type == "SplitSliceRead": - split_offsets[ifm_idx] = op.attrs["split_start"] - ps.primary_op.attrs["fused_memory_function"] = op.type - ifm_idx += 1 - if strat == SchedulingStrategy.WeightStream: ofm_step = block_config[-1] ofm_stop = ofm_end[-1] diff --git a/ethosu/vela/pass_packing.py b/ethosu/vela/pass_packing.py index 5841ca23..4cfac33c 100644 --- a/ethosu/vela/pass_packing.py +++ b/ethosu/vela/pass_packing.py @@ -314,7 +314,7 @@ def pack_into_passes(nng, arch, verbose_packing=False): if operation_set is None: print("Warning:", curr_op.type, "operation is unknown or unsupported, placing on CPU") - for inp in curr_op.inputs: + for inp in reversed(curr_op.inputs): can_pack = True if len(inp.ops) == 1: next_op = inp.ops[0] |