From 8d939c032703aa513705abc9c286d4410e28ced0 Mon Sep 17 00:00:00 2001 From: Michael McGeagh Date: Wed, 29 Jul 2020 13:11:43 +0100 Subject: MLBEDSW-2660 Convert FC-like convs to FC By converting certain Conv2D's (where the kernel size is 1x1 and the IFM H and W are both 1) to Fully Connected's, vela can better know whether the weights need to be cached/double buffered or not. This change decreases the number of NPU_OP_DMA_START commands found in the resulting command stream. Signed-off-by: Michael McGeagh Change-Id: I928150d9f360578dde75a83986bea1560d83cbdd --- ethosu/vela/graph_optimiser.py | 47 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'ethosu/vela/graph_optimiser.py') diff --git a/ethosu/vela/graph_optimiser.py b/ethosu/vela/graph_optimiser.py index 23ddf833..c805be50 100644 --- a/ethosu/vela/graph_optimiser.py +++ b/ethosu/vela/graph_optimiser.py @@ -445,6 +445,52 @@ def reorder_depthwise_weights(op, arch): return op +def convert_conv_to_fc(op, arch): + # Conv 1x1 can be equivalent to Fully Connected. + # By representing certain convs as fully connected layers, Vela can better determine wether or not to use + # caching/double buffering for the weights. + # (Weights dont need to be reloaded for convs when IFM H and W are 1) + if op.type == "Conv2DBiasAct": + _, h, w, _ = op.inputs[0].shape + kh, kw, _, _ = op.inputs[1].shape + if h == 1 and w == 1 and kh == 1 and kw == 1: + # Overwrite this op as a Fully Connected Op + op.name += "_fc" + op.type = "FullyConnectedAct" + faf = op.attrs.get("fused_activation_function", None) + op.attrs = { + "fused_activation_function": faf, + "weights_format": 0, + "npu_block_type": NpuBlockType.VectorProduct, + } + # Reshape Weights to be 2D. HWIO becomes just IO (as H and W are 1, they can just be dropped) + weight_tensor = op.inputs[1] + weight_tensor.quant_values = weight_tensor.quant_values.squeeze(axis=(0, 1)) + weight_tensor.set_all_shapes(list(weight_tensor.quant_values.shape)) + # The output from a fully connected is expected to be 2D so we need to add a reshape layer to convert it + # back to 4D afterwards as the next layer is expecting that shape + orig_ofm_tensor = op.outputs[0] + # Reshape this ops output to be 2D: {(N*H*W), C} (We know N H and W are all 1 so this becomes {1, C}) + fc_ofm_tensor = orig_ofm_tensor.clone("_fc") + fc_ofm_tensor.set_all_shapes([1, fc_ofm_tensor.shape[-1]]) + fc_ofm_tensor.ops = [op] + # Add a reshape after the new OFM to convert it back to the original 4D shape + reshape_name = op.name + "_reshape_post" + new_shape_tens = Tensor([1], DataType.int32, reshape_name + "_shape") + new_shape_tens.values = np.array(orig_ofm_tensor.shape) + new_shape_tens_const = Operation("Const", new_shape_tens.name + "_const") + new_shape_tens.ops = [new_shape_tens_const] + new_shape_tens_const.outputs = [new_shape_tens] + reshape_op = Operation("Reshape", reshape_name) + reshape_op.inputs = [fc_ofm_tensor, new_shape_tens] + reshape_op.attrs["new_shape"] = orig_ofm_tensor.shape + orig_ofm_tensor.ops = [reshape_op] + reshape_op.outputs = [orig_ofm_tensor] + # Replace this ops OFM to point to the 2D tensor + op.outputs[0] = fc_ofm_tensor + return op + + # Reorder activation op if it's after the memory only operations def fixup_act_reorder(op, arch): if op.type in activation_ops: @@ -591,6 +637,7 @@ def optimise_graph_a(nng, arch, verbose_graph=False): supported_operator_check, # then do any rewrites of supported operators convert_depthwise_to_conv, + convert_conv_to_fc, fixup_fully_connected_input, fixup_pack_input, fixup_conv2d_backprop, -- cgit v1.2.1