aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ethosu/vela/nn_graph.py5
-rw-r--r--ethosu/vela/tflite_graph_optimiser.py9
-rw-r--r--ethosu/vela/tosa_graph_optimiser.py61
-rw-r--r--ethosu/vela/tosa_reader.py12
4 files changed, 73 insertions, 14 deletions
diff --git a/ethosu/vela/nn_graph.py b/ethosu/vela/nn_graph.py
index 3c87f9b..b9eee28 100644
--- a/ethosu/vela/nn_graph.py
+++ b/ethosu/vela/nn_graph.py
@@ -253,7 +253,10 @@ class Subgraph:
for tens in ps.inputs:
for op in tens.ops:
pred_pass = op.scheduled_pass
- assert pred_pass.time < ps.time
+ # Pass with split concat ops may end up with a dependency to
+ # itself since output from concat is produced by several avg pool ops.
+ # Hence pred_pass can be equal to ps.
+ assert pred_pass == ps or pred_pass.time < ps.time
if ps not in pred_pass.successors:
pred_pass.successors.append(ps)
diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py
index 687e5d4..3af8588 100644
--- a/ethosu/vela/tflite_graph_optimiser.py
+++ b/ethosu/vela/tflite_graph_optimiser.py
@@ -827,7 +827,7 @@ def convert_batched_fc_shape(op: Operation, arch, nng) -> Operation:
if op.type == Op.FullyConnected:
# Check if the first dimension indicates batching
if op.ifm_shapes[0].batch > 1:
- batching_split = {4: (2, 2), 8: (2, 4), 16: (4, 4)}
+ batching_split = {4: (2, 2), 6: (2, 3), 8: (2, 4), 9: (3, 3), 12: (3, 4), 16: (4, 4)}
n = op.ifm_shapes[0].batch
h, w = batching_split.get(n, (1, n))
op.ifm_shapes[0] = Shape4D([1, h, w, op.ifm_shapes[0].depth])
@@ -840,6 +840,13 @@ def convert_batched_fc_shape(op: Operation, arch, nng) -> Operation:
n = op.ofm_shapes[0].batch
h, w = batching_split.get(n, (1, n))
op.ofm_shapes[0] = Shape4D([1, h, w, op.ofm_shapes[0].depth])
+ if h == 1 and w > 4:
+ # If batch can not be found in the split set the weights are going to be
+ # read from memory several times. Convert op to conv2d since this
+ # enables weight buffering.
+ op.type = Op.Conv2DBias
+ op.attrs["padding"] = Padding.SAME
+ DebugDatabase.add_optimised(op, op)
return op
diff --git a/ethosu/vela/tosa_graph_optimiser.py b/ethosu/vela/tosa_graph_optimiser.py
index 09b2c52..26d3dca 100644
--- a/ethosu/vela/tosa_graph_optimiser.py
+++ b/ethosu/vela/tosa_graph_optimiser.py
@@ -247,7 +247,11 @@ def fix_sg_input_output_tosa(op, arch, nng):
# consumed by CPU
# Check if operator ifm/ofm are sg ifm/ofm
- ifm_is_sg_ifm = op.ifm.ops[0].type in (Op.Placeholder, Op.SubgraphInput, Op.Const)
+ ifm_is_sg_ifm = op.ifm.ops[0].type in (
+ Op.Placeholder,
+ Op.SubgraphInput,
+ Op.Const,
+ )
ifm_is_sg_ofm = any(ifm_cons is None for ifm_cons in op.ifm.consumer_list)
ofm_is_sg_ofm = any(ofm_cons is None for ofm_cons in op.ofm.consumer_list)
# Check if ifm/ofm is produced repectivly consumed by CPU
@@ -302,7 +306,13 @@ def remove_splitsliceread(op, arch):
else:
name = op.name + "_add"
ofm = op.ofm
- ifm2 = create_const_tensor(name + "_zero_scalar", [1], ofm.dtype, [0], quantization=ofm.quantization)
+ ifm2 = create_const_tensor(
+ name + "_zero_scalar",
+ [1],
+ ofm.dtype,
+ [0],
+ quantization=ofm.quantization,
+ )
add_op = create_add_nop(name)
add_op.inputs = [op.ifm, ifm2]
add_op.outputs = [ofm]
@@ -330,7 +340,13 @@ def rewrite_concat(op):
write_offset = [0, 0, 0, 0]
write_offset[axis_4D] = offset
concat_end = offset + op.ifm_shapes[idx][axis_4D]
- create_add_for_concat(op, op.name + str(idx) + "_add", inp, op.ifm_shapes[idx], Shape4D.from_list(write_offset))
+ create_add_for_concat(
+ op,
+ op.name + str(idx) + "_add",
+ inp,
+ op.ifm_shapes[idx],
+ Shape4D.from_list(write_offset),
+ )
offset = concat_end
assert op.ofm_shapes[0][axis_4D] == offset
@@ -417,7 +433,10 @@ def rewrite_rescale(op, arch, nng):
DebugDatabase.add_optimised(op, prev_op)
return op
else:
- print("Warning, unsupported fusing of TOSA Rescale previous operator is of type:", prev_op.type)
+ print(
+ "Warning, unsupported fusing of TOSA Rescale previous operator is of type:",
+ prev_op.type,
+ )
assert False
elif (
(ifm.dtype == DataType.int8 and ofm.dtype == DataType.int8)
@@ -447,7 +466,7 @@ def rewrite_rescale(op, arch, nng):
for a in equal_attributes:
assert op.attrs[a] == rescale_1.attrs[a] == rescale_2.attrs[a], (
f"Only handling equal {a} for all operands "
- "({op.attrs[a]}, {rescale_1.attrs[a]}, {rescale_2.attrs[a]}) "
+ f"({op.attrs[a]}, {rescale_1.attrs[a]}, {rescale_2.attrs[a]}) "
"for all the rescale operations to be fused with Add!"
)
@@ -486,7 +505,10 @@ def rewrite_rescale(op, arch, nng):
print("Warning, unsupported fusing of TOSA Rescale with Add.")
assert False
else:
- print("Warning, unsupported fusing of TOSA Rescale previous operator is of type:", prev_op.type)
+ print(
+ "Warning, unsupported fusing of TOSA Rescale previous operator is of type:",
+ prev_op.type,
+ )
assert False
return op
@@ -519,17 +541,31 @@ def convert_pad_in_width(op):
if left > 0:
shape = Shape4D(1, ifm_shape.height, left, ofm_shape.depth)
zero_tens = create_const_tensor(
- op.name + "_left", shape.as_list(), ofm.dtype, shape.elements() * [pad_value], quantization=quant
+ op.name + "_left",
+ shape.as_list(),
+ ofm.dtype,
+ shape.elements() * [pad_value],
+ quantization=quant,
)
zero_tens.equivalence_id = create_equivalence_id(tuple(zero_tens.values))
create_add_for_concat(op, op.name + "_left", zero_tens, shape, shp0)
if right > 0:
shape = Shape4D(1, ifm_shape.height, right, ofm_shape.depth)
zero_tens = create_const_tensor(
- op.name + "_right", shape.as_list(), ofm.dtype, shape.elements() * [pad_value], quantization=quant
+ op.name + "_right",
+ shape.as_list(),
+ ofm.dtype,
+ shape.elements() * [pad_value],
+ quantization=quant,
)
zero_tens.equivalence_id = create_equivalence_id(tuple(zero_tens.values))
- create_add_for_concat(op, op.name + "_right", zero_tens, shape, shp0.with_width(ofm_shape.width - right))
+ create_add_for_concat(
+ op,
+ op.name + "_right",
+ zero_tens,
+ shape,
+ shp0.with_width(ofm_shape.width - right),
+ )
op.type = Op.ConcatTFLite
return add_op
@@ -992,7 +1028,12 @@ def tosa_optimise_graph(nng, arch):
)
# Rewite Operators step
- op_rewrite_list = [set_tensor_equivalence, rewrite_rescale, convert_depthwise_to_conv, convert_table_to_lut]
+ op_rewrite_list = [
+ set_tensor_equivalence,
+ rewrite_rescale,
+ convert_depthwise_to_conv,
+ convert_table_to_lut,
+ ]
for idx, sg in enumerate(nng.subgraphs):
nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(
diff --git a/ethosu/vela/tosa_reader.py b/ethosu/vela/tosa_reader.py
index 6d80e10..670b264 100644
--- a/ethosu/vela/tosa_reader.py
+++ b/ethosu/vela/tosa_reader.py
@@ -189,7 +189,8 @@ class TosaSubgraph:
elif op.type.is_conv2d_op():
inputs[1] = clone_and_reshape_tensor(inputs[1], (1, 2, 3, 0), False)
elif op.type.is_depthwise_conv2d_op():
- inputs[1] = clone_and_reshape_tensor(inputs[1], (1, 2, 0, 3), False)
+ HWCM_to_HWOI = (0, 1, 3, 2)
+ inputs[1] = clone_and_reshape_tensor(inputs[1], HWCM_to_HWOI, False)
if op.type.needs_bias() and len(inputs) <= op_type.info.indices.biases[0]:
# No Bias tensor
inputs.append(None)
@@ -241,7 +242,14 @@ class TosaSubgraph:
if shift != 0:
op.explicit_scaling = ExplicitScaling(False, [shift], [1])
if op.type.is_depthwise_conv2d_op():
- op.attrs["depth_multiplier"] = op.weights.shape[3]
+ assert op.weights.shape[-1] % op.ifm.shape[-1] == 0
+ depth_multiplier = op.weights.shape[-1] / op.ifm.shape[-1]
+ if depth_multiplier > 1:
+ assert op.ifm.shape[-1] == 1 and op.ofm.shape[-1] == depth_multiplier, (
+ "For depth multipliers > 1, IFM channels must be 1 and "
+ "OFM channels must be equal to the depth multiplier"
+ )
+ op.attrs["depth_multiplier"] = depth_multiplier
if op.type == Op.SplitSliceRead:
op.read_offsets[0] = Shape4D.from_list(list(op.attrs["start"]), 0)
op.read_shapes[0] = op.attrs["size"]