From cf7da10987cac3fc68cf180a9af665fe06d608fa Mon Sep 17 00:00:00 2001 From: Jacob Bohlin Date: Wed, 20 May 2020 09:03:40 +0200 Subject: MLBEDSW-1716: Transpose Convolution support Change-Id: Ie6d8d6de9f3447f19ba06aafa9fa480fc96a973b Signed-off-by: Jacob Bohlin --- ethosu/vela/graph_optimiser.py | 53 +++++++++++++++++++++--- ethosu/vela/mark_tensors.py | 4 +- ethosu/vela/operation.py | 3 ++ ethosu/vela/pass_packing.py | 2 +- ethosu/vela/register_command_stream_generator.py | 8 +++- ethosu/vela/supported_operators.py | 30 +++++++++++++- ethosu/vela/weight_compressor.py | 8 +++- 7 files changed, 96 insertions(+), 12 deletions(-) (limited to 'ethosu') diff --git a/ethosu/vela/graph_optimiser.py b/ethosu/vela/graph_optimiser.py index b004f4cc..ca8b89fc 100644 --- a/ethosu/vela/graph_optimiser.py +++ b/ethosu/vela/graph_optimiser.py @@ -131,12 +131,50 @@ def calc_padding_and_skirt(padding_type, kernel_size, stride, input_dims): skirt = (top_pad, left_pad, ypad - top_pad, xpad - left_pad) return padding, skirt +def calc_upscaled_padding_and_skirt(padding_type, kernel_size, stride, input_dims): + upscaled_shape = [input_dims[0], input_dims[1] * stride[1], input_dims[2] * stride[2], input_dims[3]] + ypad = needed_total_padding(int(upscaled_shape[1]), int(stride[1]), int(kernel_size[0])) + xpad = needed_total_padding(int(upscaled_shape[2]), int(stride[2]), int(kernel_size[1])) + + if padding_type == b"SAME": + right_pad = ((xpad + 1) // 2) - 1 + bottom_pad = ((ypad + 1) // 2) - 1 + left_pad = max(kernel_size[0] - 1 - right_pad, 0) + top_pad = max(kernel_size[1] - 1 - bottom_pad, 0) + elif padding_type == b"VALID": + right_pad = (xpad + 1) // 2 + bottom_pad = (ypad + 1) // 2 + left_pad = max(kernel_size[0] - right_pad, 0) + top_pad = max(kernel_size[1] - bottom_pad, 0) + else: + assert 0, "Unknown padding" + + padding = (top_pad, left_pad, bottom_pad, right_pad) + skirt = (top_pad, left_pad, ypad - top_pad, xpad - left_pad) + return padding, skirt + def fixup_conv2d_backprop(op, arch): if op.type == "Conv2DBackpropInput": # flip the inputs op.inputs[0], op.inputs[2] = op.inputs[2], op.inputs[0] - op.type = "Conv2DBackpropInputSwitched" + op.type = "Conv2DBackpropInputSwitchedBias" + weight_shape = op.inputs[1].shape + weight_sets = weight_shape[3] + + if len(op.inputs) < 4: + # Add bias/scale tensor filled with zeros + scale_op = Operation("Const", op.name + "_bias") + scale_tens = Tensor([weight_sets], DataType.int32, op.name + "_bias_tens") + scale_tens.values = [0] * weight_sets + scale_tens.quant_values = [0] * weight_sets + scale_tens.ops = [scale_op] + scale_op.outputs = [scale_tens] + scale_tens.consumer_list = [op] + op.inputs.append(scale_tens) + + # Update strides + op.attrs.update( {"stride_w": 1, "stride_h": 1, "strides": (1,1,1,1)} ) return op @@ -292,15 +330,20 @@ def add_padding_fields(op, arch): else: raise UnsupportedFeatureError("Unknown operation that uses padding: {}".format(op.type)) - dilation_h, dilation_w = op.get_dilation_h_w() - dilated_kernel_size = [dilation_h * (kernel_size[0] - 1) + 1, dilation_w * (kernel_size[1] - 1) + 1] - padding, skirt = calc_padding_and_skirt(op.attrs["padding"], dilated_kernel_size, op.attrs["strides"], input_shape) + if op.type == "Conv2DBackpropInputSwitchedBias": + padding, skirt = calc_upscaled_padding_and_skirt(op.attrs["padding"], kernel_size, op.attrs["strides"], input_shape) + else: + dilation_h, dilation_w = op.get_dilation_h_w() + dilated_kernel_size = [dilation_h * (kernel_size[0] - 1) + 1, dilation_w * (kernel_size[1] - 1) + 1] + padding, skirt = calc_padding_and_skirt(op.attrs["padding"], dilated_kernel_size, op.attrs["strides"], input_shape) + op.attrs["explicit_padding"] = padding op.attrs["skirt"] = skirt + return op -conv_op = set(("Conv2D", "QuantizedConv2D", "Conv2DBackpropInputSwitched", "Conv2DBiasAct")) +conv_op = set(("Conv2D", "QuantizedConv2D", "Conv2DBackpropInputSwitchedBias", "Conv2DBiasAct")) fc_op = set( ( "MatMul", diff --git a/ethosu/vela/mark_tensors.py b/ethosu/vela/mark_tensors.py index bf7bc45f..5231e860 100644 --- a/ethosu/vela/mark_tensors.py +++ b/ethosu/vela/mark_tensors.py @@ -111,8 +111,8 @@ tensor_purposes = [ # ops, input_purpose purpose_from_list([TensorPurpose.FeatureMap, TensorPurpose.Weights, TensorPurpose.FeatureMap]), ), ( - set(("Conv2DBackpropInputSwitched",)), - purpose_from_list([TensorPurpose.FeatureMap, TensorPurpose.Weights, TensorPurpose.FeatureMap]), + set(("Conv2DBackpropInputSwitchedBias",)), + purpose_from_list([TensorPurpose.FeatureMap, TensorPurpose.Weights, TensorPurpose.FeatureMap, TensorPurpose.FeatureMap]), ), ( set(("QuantizedConv2D", "QuantizedMatMul")), diff --git a/ethosu/vela/operation.py b/ethosu/vela/operation.py index e8a03b7d..51311ef7 100644 --- a/ethosu/vela/operation.py +++ b/ethosu/vela/operation.py @@ -75,6 +75,9 @@ input and output tensors, as well as an attribute dictionary.""" if len(self.inputs) >= 3: bias_idx = 2 + elif self.type == "Conv2DBackpropInputSwitchedBias": + bias_idx = 3 + elif npu_block_type == NpuBlockType.Pooling: ifm_idx = 0 ofm_idx = 0 diff --git a/ethosu/vela/pass_packing.py b/ethosu/vela/pass_packing.py index 9bf080e6..5841ca23 100644 --- a/ethosu/vela/pass_packing.py +++ b/ethosu/vela/pass_packing.py @@ -48,7 +48,7 @@ mac_main_ops = set( "Conv2DBiasAct", "Conv2D", "QuantizedConv2D", - "Conv2DBackpropInputSwitched", + "Conv2DBackpropInputSwitchedBias", # depth-wise convolutions "DepthwiseConv2dBiasAct", "DepthwiseConv2dNative", diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py index 3da8bbcf..11c0c20d 100644 --- a/ethosu/vela/register_command_stream_generator.py +++ b/ethosu/vela/register_command_stream_generator.py @@ -38,6 +38,7 @@ from .ethos_u55_regs.ethos_u55_regs import cmd0 from .ethos_u55_regs.ethos_u55_regs import cmd1 from .ethos_u55_regs.ethos_u55_regs import elementwise_mode from .ethos_u55_regs.ethos_u55_regs import ifm_precision +from .ethos_u55_regs.ethos_u55_regs import resampling_mode from .ethos_u55_regs.ethos_u55_regs import rounding from .high_level_command_stream import CommandType from .numeric_util import clamp_sigmoid @@ -555,9 +556,12 @@ def generate_register_command_stream(nng, sg, arch, verbose=False): if primary_op.type == "ResizeBilinear": # perform nearest neighbor upscale - emit.cmd0_with_param(cmd0.NPU_SET_IFM_UPSCALE, 1) + emit.cmd0_with_param(cmd0.NPU_SET_IFM_UPSCALE, resampling_mode.NEAREST) + elif primary_op.type == "Conv2DBackpropInputSwitchedBias": + # perform insert zero upscale + emit.cmd0_with_param(cmd0.NPU_SET_IFM_UPSCALE, resampling_mode.TRANSPOSE) else: - emit.cmd0_with_param(cmd0.NPU_SET_IFM_UPSCALE, 0) + emit.cmd0_with_param(cmd0.NPU_SET_IFM_UPSCALE, resampling_mode.NONE) if npu_block_type in set( (NpuBlockType.ConvolutionMxN, NpuBlockType.ConvolutionDepthWise, NpuBlockType.Pooling) diff --git a/ethosu/vela/supported_operators.py b/ethosu/vela/supported_operators.py index 729d435a..cbd5d6cc 100644 --- a/ethosu/vela/supported_operators.py +++ b/ethosu/vela/supported_operators.py @@ -22,10 +22,11 @@ class SupportedOperators: def __init__(self): # Categorised lists of supported operators self.npu_pre_ops = set(("QuantizedResizeBilinear", "SplitSliceRead")) - self.convolution_ops = set(("Conv2DBiasAct", "Conv2D", "QuantizedConv2D", "Conv2DBackpropInputSwitched")) + self.convolution_ops = set(("Conv2DBiasAct", "Conv2D", "QuantizedConv2D")) self.depthwise_convolution_ops = set( ("DepthwiseConv2dBiasAct", "DepthwiseConv2dNative", "QuantizedDepthwiseConv2D") ) + self.transpose_convolution_ops = set(("Conv2DBackpropInput",)) self.max_pooling_ops = set(("QuantizedMaxPool", "MaxPool", "MaxPoolAct")) self.avg_pooling_ops = set(("QuantizedAvgPool", "AvgPool", "AvgPoolAct")) self.pooling_ops = self.max_pooling_ops | self.avg_pooling_ops @@ -36,6 +37,8 @@ class SupportedOperators: self.convolution_ops # depth-wise convolutions | self.depthwise_convolution_ops + # transpose convolutions + | self.transpose_convolution_ops # pooling | self.pooling_ops # resizing/upscaling @@ -90,6 +93,9 @@ class SupportedOperators: self.supported_operator_restrictions.update( {op: self.check_depthwise_convolution_restrictions for op in self.depthwise_convolution_ops} ) + self.supported_operator_restrictions.update( + {op: self.check_transpose_convolution_restrictions for op in self.transpose_convolution_ops} + ) self.supported_operator_restrictions.update({op: self.check_pooling_restrictions for op in self.pooling_ops}) self.supported_operator_restrictions.update({op: self.check_resize_restrictions for op in self.resizing_ops}) self.supported_operator_restrictions.update( @@ -180,6 +186,28 @@ class SupportedOperators: return False return self.check_convolution_restrictions(op) + def check_transpose_convolution_restrictions(self, op): + # check stride + stride_h, stride_w = op.attrs["stride_h"], op.attrs["stride_w"] + if stride_h != stride_w != 2: + return False + + # check output dimensions + ifm_tensor, weight_tensor, _, ofm_tensor = op.get_ifm_weights_biases_ofm() + ifm_h, ifm_w = ifm_tensor.shape[1], ifm_tensor.shape[2] + ofm_h, ofm_w = ofm_tensor.shape[1], ofm_tensor.shape[2] + if op.attrs["padding"] == b"SAME": + if (ofm_h != ifm_h * stride_h) or (ofm_w != ifm_w * stride_w): + return False + elif op.attrs["padding"] == b"VALID": + kernel_h, kernel_w = weight_tensor.shape[0], weight_tensor.shape[1] + if ((ofm_h != (ifm_h) * stride_h + max(kernel_h - stride_h, 0)) + or (ofm_w != (ifm_w) * stride_w + max(kernel_w - stride_w, 0))): + return False + + return self.check_convolution_restrictions(op) + + def check_pooling_restrictions(self, op): # check stride if op.attrs["stride_w"] > 3 or op.attrs["stride_h"] > 3: diff --git a/ethosu/vela/weight_compressor.py b/ethosu/vela/weight_compressor.py index 9edde601..df2b0573 100644 --- a/ethosu/vela/weight_compressor.py +++ b/ethosu/vela/weight_compressor.py @@ -234,6 +234,10 @@ def compress_weights(arch, nng, tens, npu_block_type, ofm_block_depth, ofm_depth else: tens.block_traversal = TensorBlockTraversal.DepthFirst + if tens.consumer_list[0].type == "Conv2DBackpropInputSwitchedBias": + # Transpose Convoluion, reverse weights in H and W axes + weights = np.flip(weights, axis=(0,1)) + # Slice weight stream up depth-ways into bricks and compress full_ofm_depth = quant_buf.shape[-1] for idx in range(0, full_ofm_depth, ofm_depth_step): @@ -273,7 +277,9 @@ def calc_scales_and_pack_biases(tens, arch, oc_quantum, rescale_for_faf=False): # the connected operator should expect a bias input unless it is a FullyConnected assert "Bias" in tens.consumer_list[0].type or tens.consumer_list[0].type.startswith("FullyConnected") # the input bias tensor is the same as that connected to the operator - assert tens is tens.consumer_list[0].inputs[2] + _, _, bias_tens, _ = tens.consumer_list[0].get_ifm_weights_biases_ofm() + assert tens is bias_tens + # the operator should only have a single output assert len(tens.consumer_list[0].outputs) == 1 -- cgit v1.2.1