aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrik Gustavsson <patrik.gustavsson@arm.com>2021-02-09 15:38:46 +0100
committerPatrik Gustavsson <patrik.gustavsson@arm.com>2021-02-11 14:38:56 +0100
commite3b1b91c450129308a3a1d466a2f2876a5a872b1 (patch)
tree77c6fe5cc8836dc3aac505efdbf78cee433bd398
parent138d47f5a3e87d294b3714ae799ccad8ac9111bd (diff)
downloadethos-u-vela-e3b1b91c450129308a3a1d466a2f2876a5a872b1.tar.gz
MLBEDSW-3774 Remove SplitSliceRead
Removed SplitSliceRead from subgraph during graph optimisation. Signed-off-by: Patrik Gustavsson <patrik.gustavsson@arm.com> Change-Id: I9315d4c2a6767828dd2b4e66823d73b10ebee99c
-rw-r--r--ethosu/vela/graph_optimiser.py51
-rw-r--r--ethosu/vela/high_level_command_stream.py4
-rw-r--r--ethosu/vela/high_level_command_stream_generator.py32
-rw-r--r--ethosu/vela/operation.py3
-rw-r--r--ethosu/vela/pass_packing.py16
5 files changed, 57 insertions, 49 deletions
diff --git a/ethosu/vela/graph_optimiser.py b/ethosu/vela/graph_optimiser.py
index eb93106..50368b8 100644
--- a/ethosu/vela/graph_optimiser.py
+++ b/ethosu/vela/graph_optimiser.py
@@ -164,10 +164,8 @@ def rewrite_split_ops(tens, arch, nng):
# If start offset is not a multiple of 16 in the C-dimension, NHCWB16 need to be avoided in the input
if (offset_start[-1] % 16) != 0:
inp.avoid_NHCWB16 = True
- else:
- offset_start = full_shape(4, offset_start, 0)
- new_op.attrs["split_start"] = offset_start
+ new_op.read_offsets[0] = Shape4D.from_list(offset_start, 0)
new_op.run_on_npu = True
new_op.set_output_tensor(tens)
new_op.ifm_shapes.append(Shape4D(inp.shape))
@@ -177,6 +175,45 @@ def rewrite_split_ops(tens, arch, nng):
return tens
+def remove_SplitSliceRead(op, arch):
+
+ if op.type == Op.SplitSliceRead:
+ # Check if it is possible to put the SplitSliceRead on the tensor consumer, or if an avgpool need to be inserted
+ if (
+ len(op.ofm.consumer_list) == 1
+ and op.ofm.consumer_list[0] is not None
+ and op.ofm.consumer_list[0].run_on_npu
+ and op.ofm.consumer_list[0].type != Op.Reshape
+ and op.ofm_shapes[0] == Shape4D.from_list(op.ofm.shape)
+ ):
+ # SplitSliceRead can be performed by tensor consumer
+ cons_op = op.ofm.consumer_list[0]
+ if cons_op.ifm == op.ofm:
+ cons_op.read_offsets[0] = op.read_offsets[0]
+ cons_op.set_input_tensor(op.ifm, cons_op.type.info.indices.ifms[0])
+ cons_op.ifm_shapes[0] = op.ifm_shapes[0]
+ elif cons_op.type.is_binary_elementwise_op() and cons_op.ifm2 == op.ofm:
+ cons_op.read_offsets[1] = op.read_offsets[0]
+ cons_op.set_input_tensor(op.ifm, cons_op.type.info.indices.ifms[1])
+ cons_op.ifm_shapes[1] = op.ifm_shapes[0]
+
+ op.ofm.consumer_list.remove(cons_op)
+ op.ofm.ops = []
+ op.ifm.consumer_list.remove(op)
+ else:
+ avgpool_op = create_avgpool_nop(op.name + "_avgpool")
+ avgpool_op.add_input_tensor(op.ifm)
+ avgpool_op.outputs = [op.ofm]
+ op.ofm.ops.remove(op)
+ op.ofm.ops.append(avgpool_op)
+ avgpool_op.ifm_shapes.append(op.ifm_shapes[0])
+ avgpool_op.ofm_shapes.append(op.ofm_shapes[0])
+ avgpool_op.read_offsets[0] = op.read_offsets[0]
+
+ op.ifm.consumer_list.remove(op)
+ DebugDatabase.add_optimised(op, avgpool_op)
+
+
def insert_copy_op_after_tens(tens):
tens_cons_list_copy = tens.consumer_list.copy()
@@ -202,7 +239,7 @@ def fix_sg_input_output(op, arch, nng):
if not op.run_on_npu or op.type != Op.Reshape:
return op
- # For the memory operators we want to remove, tensors are removed.
+ # For the Reshape operators we want to remove, tensors are removed.
# But in order to to do this, they cannot be outputs of the sg,
# this need to be fixed prior to the removal.
# Solution is to add a avgpool NOP, to maintain the original tensor.
@@ -1295,6 +1332,12 @@ def optimise_graph_a(nng, arch, verbose_graph=False):
[fuse_activation_function_with_prev, optimise_pad, add_padding_fields],
)
+ # Removal of SplitSliceRead, need to be done after optimisation has been performed,
+ # since ifm/ofm_shapes are of importance to this function
+ for sg in nng.subgraphs:
+ rewrite_graph.visit_graph_post_order(sg.output_tensors, arch, [], [remove_SplitSliceRead])
+ sg.refresh_after_modification()
+
# Post-optimisation operator debug tracing, and checking that no undesired reshapes are left in the graph
for sg in nng.subgraphs:
rewrite_graph.visit_graph_post_order(sg.output_tensors, arch, [], [check_reshapes, _record_optimised])
diff --git a/ethosu/vela/high_level_command_stream.py b/ethosu/vela/high_level_command_stream.py
index c25c023..0ce8fac 100644
--- a/ethosu/vela/high_level_command_stream.py
+++ b/ethosu/vela/high_level_command_stream.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
+# Copyright (C) 2020-2021 Arm Limited or its affiliates. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -41,7 +41,7 @@ class Box:
npu_block_type: NpuBlockType,
concat_axis: int = 0,
concat_offset: int = 0,
- split_offset: int = None,
+ split_offset: Shape4D = None,
k_height: int = 1,
upscaling_factor: int = 1,
):
diff --git a/ethosu/vela/high_level_command_stream_generator.py b/ethosu/vela/high_level_command_stream_generator.py
index 97b42ae..c2027e0 100644
--- a/ethosu/vela/high_level_command_stream_generator.py
+++ b/ethosu/vela/high_level_command_stream_generator.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
+# Copyright (C) 2020-2021 Arm Limited or its affiliates. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -38,46 +38,20 @@ def dma_if_necessary(ps, box, tensor):
yield DMA(ps, in_tensor, tensor, box)
-def match_tensor(source, derived):
- if source == derived:
- return True
- ops = derived.ops
- return ops != [] and len(ops) == 1 and ops[0].type == Op.SplitSliceRead and source == ops[0].inputs[0]
-
-
def generate_high_level_command_stream_for_pass(strat, passes, block_configs, idx):
is_first = idx == 0
is_last = idx == len(passes) - 1
ps = passes[idx]
block_config = block_configs[idx]
npu_block_type = ps.npu_block_type
- split_offsets = [None, None] # offset for [ifm, ifm2]
+ split_offsets = list(ps.primary_op.read_offsets) # offset for [ifm, ifm2]
if ps.ifm_tensor is not None and ps.ifm2_tensor is not None and npu_block_type == NpuBlockType.ElementWise:
# Ensure correct ifm and ifm2 order
- if match_tensor(ps.inputs[0], ps.primary_op.inputs[1]) and match_tensor(ps.inputs[1], ps.primary_op.inputs[0]):
+ if ps.inputs[0] == ps.primary_op.inputs[1] and ps.inputs[1] == ps.primary_op.inputs[0]:
ps.ifm_tensor, ps.ifm2_tensor = ps.ifm2_tensor, ps.ifm_tensor
ps.ifm_shapes[0], ps.ifm_shapes[1] = ps.ifm_shapes[1], ps.ifm_shapes[0]
- for op in ps.ops:
- if op.type == Op.SplitSliceRead:
- ps.primary_op.memory_function = op.type
- assert len(op.inputs) == 1
- if match_tensor(ps.ifm_tensor, op.inputs[0]):
- split_offsets[0] = op.attrs["split_start"]
- elif match_tensor(ps.ifm2_tensor, op.inputs[0]):
- split_offsets[1] = op.attrs["split_start"]
- else:
- assert False
- else:
- ifm_idx = 0
- for op in ps.ops:
- if op.type == Op.SplitSliceRead:
- assert ifm_idx < 2
- split_offsets[ifm_idx] = op.attrs["split_start"]
- ps.primary_op.memory_function = op.type
- ifm_idx += 1
-
ifm_tensor = ps.ifm_tensor
ifm_shape = None
if ifm_tensor.shape != []:
diff --git a/ethosu/vela/operation.py b/ethosu/vela/operation.py
index b297bed..16431be 100644
--- a/ethosu/vela/operation.py
+++ b/ethosu/vela/operation.py
@@ -418,6 +418,7 @@ class Operation:
"ifm_shapes",
"ofm_shapes",
"rescale",
+ "read_offsets",
)
def __init__(self, op_type: Op, name: str):
@@ -444,6 +445,7 @@ class Operation:
# If not none: contains rescale to be used as output scaling
# (which overrides the ofm tensor's scale)
self.rescale = None
+ self.read_offsets: List[Shape4D] = [None, None] # offset for [ifm, ifm2]
def clone(self, suffix="_clone"):
res = Operation(self.type, self.name + suffix)
@@ -458,6 +460,7 @@ class Operation:
res.forced_output_quantization = self.forced_output_quantization
res.scheduled_pass = self.scheduled_pass
res.op_index = None # not relevant as not part of input network
+ res.read_offsets = list(self.read_offsets)
return res
diff --git a/ethosu/vela/pass_packing.py b/ethosu/vela/pass_packing.py
index b52b159..281c093 100644
--- a/ethosu/vela/pass_packing.py
+++ b/ethosu/vela/pass_packing.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
+# Copyright (C) 2020-2021 Arm Limited or its affiliates. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -42,8 +42,6 @@ class PassFlags(enum.Flag):
PostFusingLimited = 8192
-npu_pre_ops = set((Op.SplitSliceRead,))
-
mac_main_ops = set(
(
# convolutions
@@ -148,16 +146,6 @@ test_sequence = [
),
(
# ops_set
- npu_pre_ops,
- # incompatible_pack_flags
- PassFlags.Cpu | PassFlags.MemoryOnly,
- # flags_to_set
- PassFlags.Npu | PassFlags.Mac | PassFlags.Pre | PassFlags.ElementWise,
- # flags_to_clear
- PassFlags.Empty,
- ),
- (
- # ops_set
npu_dma_ops,
# incompatible_pack_flags
PassFlags.Cpu | PassFlags.MemoryOnly,
@@ -437,7 +425,7 @@ def pack_into_passes(nng, arch, verbose_packing=False):
visit_op(op, tens)
def create_primary_op(op_list):
- if any(op.type in (npu_pre_ops | npu_post_ops | npu_post_fuse_limited_ops) and op.run_on_npu for op in op_list):
+ if any(op.type in (npu_post_ops | npu_post_fuse_limited_ops) and op.run_on_npu for op in op_list):
# Configure a 1x1 AvgPool and attach the op onto it
op = op_list[0]
inp = op.inputs[0]