diff options
-rw-r--r-- | ethosu/vela/nn_graph.py | 5 | ||||
-rw-r--r-- | ethosu/vela/tflite_graph_optimiser.py | 9 | ||||
-rw-r--r-- | ethosu/vela/tosa_graph_optimiser.py | 61 | ||||
-rw-r--r-- | ethosu/vela/tosa_reader.py | 12 |
4 files changed, 73 insertions, 14 deletions
diff --git a/ethosu/vela/nn_graph.py b/ethosu/vela/nn_graph.py index 3c87f9b..b9eee28 100644 --- a/ethosu/vela/nn_graph.py +++ b/ethosu/vela/nn_graph.py @@ -253,7 +253,10 @@ class Subgraph: for tens in ps.inputs: for op in tens.ops: pred_pass = op.scheduled_pass - assert pred_pass.time < ps.time + # Pass with split concat ops may end up with a dependency to + # itself since output from concat is produced by several avg pool ops. + # Hence pred_pass can be equal to ps. + assert pred_pass == ps or pred_pass.time < ps.time if ps not in pred_pass.successors: pred_pass.successors.append(ps) diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py index 687e5d4..3af8588 100644 --- a/ethosu/vela/tflite_graph_optimiser.py +++ b/ethosu/vela/tflite_graph_optimiser.py @@ -827,7 +827,7 @@ def convert_batched_fc_shape(op: Operation, arch, nng) -> Operation: if op.type == Op.FullyConnected: # Check if the first dimension indicates batching if op.ifm_shapes[0].batch > 1: - batching_split = {4: (2, 2), 8: (2, 4), 16: (4, 4)} + batching_split = {4: (2, 2), 6: (2, 3), 8: (2, 4), 9: (3, 3), 12: (3, 4), 16: (4, 4)} n = op.ifm_shapes[0].batch h, w = batching_split.get(n, (1, n)) op.ifm_shapes[0] = Shape4D([1, h, w, op.ifm_shapes[0].depth]) @@ -840,6 +840,13 @@ def convert_batched_fc_shape(op: Operation, arch, nng) -> Operation: n = op.ofm_shapes[0].batch h, w = batching_split.get(n, (1, n)) op.ofm_shapes[0] = Shape4D([1, h, w, op.ofm_shapes[0].depth]) + if h == 1 and w > 4: + # If batch can not be found in the split set the weights are going to be + # read from memory several times. Convert op to conv2d since this + # enables weight buffering. + op.type = Op.Conv2DBias + op.attrs["padding"] = Padding.SAME + DebugDatabase.add_optimised(op, op) return op diff --git a/ethosu/vela/tosa_graph_optimiser.py b/ethosu/vela/tosa_graph_optimiser.py index 09b2c52..26d3dca 100644 --- a/ethosu/vela/tosa_graph_optimiser.py +++ b/ethosu/vela/tosa_graph_optimiser.py @@ -247,7 +247,11 @@ def fix_sg_input_output_tosa(op, arch, nng): # consumed by CPU # Check if operator ifm/ofm are sg ifm/ofm - ifm_is_sg_ifm = op.ifm.ops[0].type in (Op.Placeholder, Op.SubgraphInput, Op.Const) + ifm_is_sg_ifm = op.ifm.ops[0].type in ( + Op.Placeholder, + Op.SubgraphInput, + Op.Const, + ) ifm_is_sg_ofm = any(ifm_cons is None for ifm_cons in op.ifm.consumer_list) ofm_is_sg_ofm = any(ofm_cons is None for ofm_cons in op.ofm.consumer_list) # Check if ifm/ofm is produced repectivly consumed by CPU @@ -302,7 +306,13 @@ def remove_splitsliceread(op, arch): else: name = op.name + "_add" ofm = op.ofm - ifm2 = create_const_tensor(name + "_zero_scalar", [1], ofm.dtype, [0], quantization=ofm.quantization) + ifm2 = create_const_tensor( + name + "_zero_scalar", + [1], + ofm.dtype, + [0], + quantization=ofm.quantization, + ) add_op = create_add_nop(name) add_op.inputs = [op.ifm, ifm2] add_op.outputs = [ofm] @@ -330,7 +340,13 @@ def rewrite_concat(op): write_offset = [0, 0, 0, 0] write_offset[axis_4D] = offset concat_end = offset + op.ifm_shapes[idx][axis_4D] - create_add_for_concat(op, op.name + str(idx) + "_add", inp, op.ifm_shapes[idx], Shape4D.from_list(write_offset)) + create_add_for_concat( + op, + op.name + str(idx) + "_add", + inp, + op.ifm_shapes[idx], + Shape4D.from_list(write_offset), + ) offset = concat_end assert op.ofm_shapes[0][axis_4D] == offset @@ -417,7 +433,10 @@ def rewrite_rescale(op, arch, nng): DebugDatabase.add_optimised(op, prev_op) return op else: - print("Warning, unsupported fusing of TOSA Rescale previous operator is of type:", prev_op.type) + print( + "Warning, unsupported fusing of TOSA Rescale previous operator is of type:", + prev_op.type, + ) assert False elif ( (ifm.dtype == DataType.int8 and ofm.dtype == DataType.int8) @@ -447,7 +466,7 @@ def rewrite_rescale(op, arch, nng): for a in equal_attributes: assert op.attrs[a] == rescale_1.attrs[a] == rescale_2.attrs[a], ( f"Only handling equal {a} for all operands " - "({op.attrs[a]}, {rescale_1.attrs[a]}, {rescale_2.attrs[a]}) " + f"({op.attrs[a]}, {rescale_1.attrs[a]}, {rescale_2.attrs[a]}) " "for all the rescale operations to be fused with Add!" ) @@ -486,7 +505,10 @@ def rewrite_rescale(op, arch, nng): print("Warning, unsupported fusing of TOSA Rescale with Add.") assert False else: - print("Warning, unsupported fusing of TOSA Rescale previous operator is of type:", prev_op.type) + print( + "Warning, unsupported fusing of TOSA Rescale previous operator is of type:", + prev_op.type, + ) assert False return op @@ -519,17 +541,31 @@ def convert_pad_in_width(op): if left > 0: shape = Shape4D(1, ifm_shape.height, left, ofm_shape.depth) zero_tens = create_const_tensor( - op.name + "_left", shape.as_list(), ofm.dtype, shape.elements() * [pad_value], quantization=quant + op.name + "_left", + shape.as_list(), + ofm.dtype, + shape.elements() * [pad_value], + quantization=quant, ) zero_tens.equivalence_id = create_equivalence_id(tuple(zero_tens.values)) create_add_for_concat(op, op.name + "_left", zero_tens, shape, shp0) if right > 0: shape = Shape4D(1, ifm_shape.height, right, ofm_shape.depth) zero_tens = create_const_tensor( - op.name + "_right", shape.as_list(), ofm.dtype, shape.elements() * [pad_value], quantization=quant + op.name + "_right", + shape.as_list(), + ofm.dtype, + shape.elements() * [pad_value], + quantization=quant, ) zero_tens.equivalence_id = create_equivalence_id(tuple(zero_tens.values)) - create_add_for_concat(op, op.name + "_right", zero_tens, shape, shp0.with_width(ofm_shape.width - right)) + create_add_for_concat( + op, + op.name + "_right", + zero_tens, + shape, + shp0.with_width(ofm_shape.width - right), + ) op.type = Op.ConcatTFLite return add_op @@ -992,7 +1028,12 @@ def tosa_optimise_graph(nng, arch): ) # Rewite Operators step - op_rewrite_list = [set_tensor_equivalence, rewrite_rescale, convert_depthwise_to_conv, convert_table_to_lut] + op_rewrite_list = [ + set_tensor_equivalence, + rewrite_rescale, + convert_depthwise_to_conv, + convert_table_to_lut, + ] for idx, sg in enumerate(nng.subgraphs): nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order( diff --git a/ethosu/vela/tosa_reader.py b/ethosu/vela/tosa_reader.py index 6d80e10..670b264 100644 --- a/ethosu/vela/tosa_reader.py +++ b/ethosu/vela/tosa_reader.py @@ -189,7 +189,8 @@ class TosaSubgraph: elif op.type.is_conv2d_op(): inputs[1] = clone_and_reshape_tensor(inputs[1], (1, 2, 3, 0), False) elif op.type.is_depthwise_conv2d_op(): - inputs[1] = clone_and_reshape_tensor(inputs[1], (1, 2, 0, 3), False) + HWCM_to_HWOI = (0, 1, 3, 2) + inputs[1] = clone_and_reshape_tensor(inputs[1], HWCM_to_HWOI, False) if op.type.needs_bias() and len(inputs) <= op_type.info.indices.biases[0]: # No Bias tensor inputs.append(None) @@ -241,7 +242,14 @@ class TosaSubgraph: if shift != 0: op.explicit_scaling = ExplicitScaling(False, [shift], [1]) if op.type.is_depthwise_conv2d_op(): - op.attrs["depth_multiplier"] = op.weights.shape[3] + assert op.weights.shape[-1] % op.ifm.shape[-1] == 0 + depth_multiplier = op.weights.shape[-1] / op.ifm.shape[-1] + if depth_multiplier > 1: + assert op.ifm.shape[-1] == 1 and op.ofm.shape[-1] == depth_multiplier, ( + "For depth multipliers > 1, IFM channels must be 1 and " + "OFM channels must be equal to the depth multiplier" + ) + op.attrs["depth_multiplier"] = depth_multiplier if op.type == Op.SplitSliceRead: op.read_offsets[0] = Shape4D.from_list(list(op.attrs["start"]), 0) op.read_shapes[0] = op.attrs["size"] |