MLBEDSW-2660 Convert FC-like convs to FC

By converting certain Conv2D's (where the kernel size is 1x1 and the IFM H and W are both 1) to Fully Connected's, vela can better know whether the weights need to be cached/double buffered or not. This change decreases the number of NPU_OP_DMA_START commands found in the resulting command stream. Signed-off-by: Michael McGeagh <michael.mcgeagh@arm.com> Change-Id: I928150d9f360578dde75a83986bea1560d83cbdd
author: Michael McGeagh <michael.mcgeagh@arm.com> 2020-07-29 13:11:43 +0100
committer: Michael Mcgeagh <michael.mcgeagh@arm.com> 2020-07-30 16:01:25 +0100
commit: 8d939c032703aa513705abc9c286d4410e28ced0 (patch)
tree: 8c81c111f25cc256d48c7a3caa5aecfcf9496d12
parent: 6a8d424b4d41fb5ea69996dd227ea74f794f7a64 (diff)
download: ethos-u-vela-8d939c032703aa513705abc9c286d4410e28ced0.tar.gz
1 files changed, 47 insertions, 0 deletions
diff --git a/ethosu/vela/graph_optimiser.py b/ethosu/vela/graph_optimiser.py
index 23ddf833..c805be50 100644
--- a/ethosu/vela/graph_optimiser.py
+++ b/ethosu/vela/graph_optimiser.py
@@ -445,6 +445,52 @@ def reorder_depthwise_weights(op, arch):
     return op
 
 
+def convert_conv_to_fc(op, arch):
+    # Conv 1x1 can be equivalent to Fully Connected.
+    # By representing certain convs as fully connected layers, Vela can better determine wether or not to use
+    # caching/double buffering for the weights.
+    # (Weights dont need to be reloaded for convs when IFM H and W are 1)
+    if op.type == "Conv2DBiasAct":
+        _, h, w, _ = op.inputs[0].shape
+        kh, kw, _, _ = op.inputs[1].shape
+        if h == 1 and w == 1 and kh == 1 and kw == 1:
+            # Overwrite this op as a Fully Connected Op
+            op.name += "_fc"
+            op.type = "FullyConnectedAct"
+            faf = op.attrs.get("fused_activation_function", None)
+            op.attrs = {
+                "fused_activation_function": faf,
+                "weights_format": 0,
+                "npu_block_type": NpuBlockType.VectorProduct,
+            }
+            # Reshape Weights to be 2D. HWIO becomes just IO (as H and W are 1, they can just be dropped)
+            weight_tensor = op.inputs[1]
+            weight_tensor.quant_values = weight_tensor.quant_values.squeeze(axis=(0, 1))
+            weight_tensor.set_all_shapes(list(weight_tensor.quant_values.shape))
+            # The output from a fully connected is expected to be 2D so we need to add a reshape layer to convert it
+            # back to 4D afterwards as the next layer is expecting that shape
+            orig_ofm_tensor = op.outputs[0]
+            # Reshape this ops output to be 2D: {(N*H*W), C} (We know N H and W are all 1 so this becomes {1, C})
+            fc_ofm_tensor = orig_ofm_tensor.clone("_fc")
+            fc_ofm_tensor.set_all_shapes([1, fc_ofm_tensor.shape[-1]])
+            fc_ofm_tensor.ops = [op]
+            # Add a reshape after the new OFM to convert it back to the original 4D shape
+            reshape_name = op.name + "_reshape_post"
+            new_shape_tens = Tensor([1], DataType.int32, reshape_name + "_shape")
+            new_shape_tens.values = np.array(orig_ofm_tensor.shape)
+            new_shape_tens_const = Operation("Const", new_shape_tens.name + "_const")
+            new_shape_tens.ops = [new_shape_tens_const]
+            new_shape_tens_const.outputs = [new_shape_tens]
+            reshape_op = Operation("Reshape", reshape_name)
+            reshape_op.inputs = [fc_ofm_tensor, new_shape_tens]
+            reshape_op.attrs["new_shape"] = orig_ofm_tensor.shape
+            orig_ofm_tensor.ops = [reshape_op]
+            reshape_op.outputs = [orig_ofm_tensor]
+            # Replace this ops OFM to point to the 2D tensor
+            op.outputs[0] = fc_ofm_tensor
+    return op
+
+
 # Reorder activation op if it's after the memory only operations
 def fixup_act_reorder(op, arch):
     if op.type in activation_ops:
@@ -591,6 +637,7 @@ def optimise_graph_a(nng, arch, verbose_graph=False):
         supported_operator_check,
         # then do any rewrites of supported operators
         convert_depthwise_to_conv,
+        convert_conv_to_fc,
         fixup_fully_connected_input,
         fixup_pack_input,
         fixup_conv2d_backprop,
author	Michael McGeagh <michael.mcgeagh@arm.com>	2020-07-29 13:11:43 +0100
committer	Michael Mcgeagh <michael.mcgeagh@arm.com>	2020-07-30 16:01:25 +0100
commit	8d939c032703aa513705abc9c286d4410e28ced0 (patch)
tree	8c81c111f25cc256d48c7a3caa5aecfcf9496d12
parent	6a8d424b4d41fb5ea69996dd227ea74f794f7a64 (diff)
download	ethos-u-vela-8d939c032703aa513705abc9c286d4410e28ced0.tar.gz