aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael McGeagh <michael.mcgeagh@arm.com>2020-07-29 13:11:43 +0100
committerMichael Mcgeagh <michael.mcgeagh@arm.com>2020-07-30 16:01:25 +0100
commit8d939c032703aa513705abc9c286d4410e28ced0 (patch)
tree8c81c111f25cc256d48c7a3caa5aecfcf9496d12
parent6a8d424b4d41fb5ea69996dd227ea74f794f7a64 (diff)
downloadethos-u-vela-8d939c032703aa513705abc9c286d4410e28ced0.tar.gz
MLBEDSW-2660 Convert FC-like convs to FC
By converting certain Conv2D's (where the kernel size is 1x1 and the IFM H and W are both 1) to Fully Connected's, vela can better know whether the weights need to be cached/double buffered or not. This change decreases the number of NPU_OP_DMA_START commands found in the resulting command stream. Signed-off-by: Michael McGeagh <michael.mcgeagh@arm.com> Change-Id: I928150d9f360578dde75a83986bea1560d83cbdd
-rw-r--r--ethosu/vela/graph_optimiser.py47
1 files changed, 47 insertions, 0 deletions
diff --git a/ethosu/vela/graph_optimiser.py b/ethosu/vela/graph_optimiser.py
index 23ddf833..c805be50 100644
--- a/ethosu/vela/graph_optimiser.py
+++ b/ethosu/vela/graph_optimiser.py
@@ -445,6 +445,52 @@ def reorder_depthwise_weights(op, arch):
return op
+def convert_conv_to_fc(op, arch):
+ # Conv 1x1 can be equivalent to Fully Connected.
+ # By representing certain convs as fully connected layers, Vela can better determine wether or not to use
+ # caching/double buffering for the weights.
+ # (Weights dont need to be reloaded for convs when IFM H and W are 1)
+ if op.type == "Conv2DBiasAct":
+ _, h, w, _ = op.inputs[0].shape
+ kh, kw, _, _ = op.inputs[1].shape
+ if h == 1 and w == 1 and kh == 1 and kw == 1:
+ # Overwrite this op as a Fully Connected Op
+ op.name += "_fc"
+ op.type = "FullyConnectedAct"
+ faf = op.attrs.get("fused_activation_function", None)
+ op.attrs = {
+ "fused_activation_function": faf,
+ "weights_format": 0,
+ "npu_block_type": NpuBlockType.VectorProduct,
+ }
+ # Reshape Weights to be 2D. HWIO becomes just IO (as H and W are 1, they can just be dropped)
+ weight_tensor = op.inputs[1]
+ weight_tensor.quant_values = weight_tensor.quant_values.squeeze(axis=(0, 1))
+ weight_tensor.set_all_shapes(list(weight_tensor.quant_values.shape))
+ # The output from a fully connected is expected to be 2D so we need to add a reshape layer to convert it
+ # back to 4D afterwards as the next layer is expecting that shape
+ orig_ofm_tensor = op.outputs[0]
+ # Reshape this ops output to be 2D: {(N*H*W), C} (We know N H and W are all 1 so this becomes {1, C})
+ fc_ofm_tensor = orig_ofm_tensor.clone("_fc")
+ fc_ofm_tensor.set_all_shapes([1, fc_ofm_tensor.shape[-1]])
+ fc_ofm_tensor.ops = [op]
+ # Add a reshape after the new OFM to convert it back to the original 4D shape
+ reshape_name = op.name + "_reshape_post"
+ new_shape_tens = Tensor([1], DataType.int32, reshape_name + "_shape")
+ new_shape_tens.values = np.array(orig_ofm_tensor.shape)
+ new_shape_tens_const = Operation("Const", new_shape_tens.name + "_const")
+ new_shape_tens.ops = [new_shape_tens_const]
+ new_shape_tens_const.outputs = [new_shape_tens]
+ reshape_op = Operation("Reshape", reshape_name)
+ reshape_op.inputs = [fc_ofm_tensor, new_shape_tens]
+ reshape_op.attrs["new_shape"] = orig_ofm_tensor.shape
+ orig_ofm_tensor.ops = [reshape_op]
+ reshape_op.outputs = [orig_ofm_tensor]
+ # Replace this ops OFM to point to the 2D tensor
+ op.outputs[0] = fc_ofm_tensor
+ return op
+
+
# Reorder activation op if it's after the memory only operations
def fixup_act_reorder(op, arch):
if op.type in activation_ops:
@@ -591,6 +637,7 @@ def optimise_graph_a(nng, arch, verbose_graph=False):
supported_operator_check,
# then do any rewrites of supported operators
convert_depthwise_to_conv,
+ convert_conv_to_fc,
fixup_fully_connected_input,
fixup_pack_input,
fixup_conv2d_backprop,