diff options
author | Raul Farkas <raul.farkas@arm.com> | 2023-01-30 12:58:46 +0000 |
---|---|---|
committer | Raul Farkas <raul.farkas@arm.com> | 2023-05-10 13:34:42 +0100 |
commit | 10d6b3b3fa594b9aca4a72f002acea9f927f9c60 (patch) | |
tree | 3b5f71ad590c81e53bca82ab2ffb20196d2408e2 | |
parent | 69782af3ff2cda96dff09ad66799b3ac8f16c19d (diff) | |
download | ethos-u-vela-10d6b3b3fa594b9aca4a72f002acea9f927f9c60.tar.gz |
MLBEDSW-7283: Add opt cases for strided CONV2D
* Implement a general optimization solution for strided CONV2D that
supports a stride_w with no upper bound.
* Implement filter zero padding to allow for optimization in those cases
in which the filter width is not divisible by the stride width.
E.g.: Filter width = 8, stride width = 3 ->
Filter width = 8 + 1 (0 padding) = 9, stride width = 3
* Implement partial optimization to reduce the stride to hw supported
strides (i.e. 2 and 3) when optimizing to reach a stride = 1 is not
possible due to the IFM width not being divisible by the stride width.
* Implement optimization for when SAME padding is used. If the pre-opt
and post-opt padding do not match, add zero padding to the filter so
that the post-opt IFM padding matches.
Change-Id: Ia66b0d107281fa9993f6bf4d0c26627ee743253b
Signed-off-by: Raul Farkas <raul.farkas@arm.com>
-rw-r--r-- | SUPPORTED_OPS.md | 4 | ||||
-rw-r--r-- | ethosu/vela/test/test_tflite_supported_operators.py | 12 | ||||
-rw-r--r-- | ethosu/vela/tflite_graph_optimiser.py | 156 | ||||
-rw-r--r-- | ethosu/vela/tflite_supported_operators.py | 7 |
4 files changed, 130 insertions, 49 deletions
diff --git a/SUPPORTED_OPS.md b/SUPPORTED_OPS.md index 85ac0367..4c2a098a 100644 --- a/SUPPORTED_OPS.md +++ b/SUPPORTED_OPS.md @@ -18,7 +18,7 @@ limitations under the License. # Supported Ops This file was automatically generated by Vela using the `--supported-ops-report` parameter. -Vela version: `3.7.1.dev23+g3734897.d20230427` +Vela version: `3.7.1.dev17+geeff1bcf` This file complies with [**Gitiles Markdown syntax**](https://github.com/google/gitiles/blob/master/Documentation/markdown.md) @@ -153,7 +153,7 @@ This is a list of constraints that the CONV_2D operator must satisfy in order to - Stride values for both width and height must be integer types - Dilation factor values for both width and height must be integer types -- Stride values for height must be between 1 and 3 and for width between 1 and 4 +- Stride width must be greater than or equal to 1 and stride height must be between 1 and 3 - Dilated kernel height must be in the range [1, 64] - Product of dilated kernel width and height must be in the range [1, 4096] - Weight tensor must be 8-bit diff --git a/ethosu/vela/test/test_tflite_supported_operators.py b/ethosu/vela/test/test_tflite_supported_operators.py index 74dd3bf2..cedf87af 100644 --- a/ethosu/vela/test/test_tflite_supported_operators.py +++ b/ethosu/vela/test/test_tflite_supported_operators.py @@ -109,15 +109,15 @@ def test_constraint_conv_pass(): "stride_w, stride_h, supported", [ [0, 20, False], - [4, 1, True], - [4, 2, True], - [2, 2, True], - [4, 4, False], + [20, 0, False], + [4, 3, True], [4, 5, False], - [5, 4, False], + [4, 9, False], [3, 3, True], [1, 1, True], - [2, 4, False], + [20, 2, True], + [6, 3, True], + [8, 1, True], ], ) def test_constraint_stride_range(stride_w: int, stride_h: int, supported: bool): diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py index 07f65a44..f68e0cf9 100644 --- a/ethosu/vela/tflite_graph_optimiser.py +++ b/ethosu/vela/tflite_graph_optimiser.py @@ -17,6 +17,8 @@ # Description: # Early optimisation of a TensorFlow Lite based network graph, using the rewrite_graph module # to do the traversal of the graph. +from __future__ import annotations + import math import uuid @@ -949,15 +951,16 @@ def reorder_depthwise_weights(op, arch, nng): return op -def fixup_strided_conv(op: Operation, arch, nng): +def fixup_strided_conv(op: Operation, arch, nng) -> Operation: """Optimize or fixup strided Conv2DBias Optimization: - Reduce, when possible, the Conv2DBias stride from 2 to 1 by re-shaping - both IFM and filter. + Reduce, when possible, the Conv2DBias stride from N with 1 > N > 4 to 1 + by re-shaping both IFM and filter. Fixup: - Introduce software support for Conv2DBias with stride_width = 4 by - reducing it to 1 when possible by re-shaping both IFM and filter. + Introduce software support for Conv2DBias with stride_width > 4 by + reducing it to 1, 2 or 3 (HW supported strides) when possible by + re-shaping both IFM and filter. """ if op.type != Op.Conv2DBias: return op @@ -970,44 +973,123 @@ def fixup_strided_conv(op: Operation, arch, nng): if op.op_index != 0 and stride_x < 4: return op - if ( - (stride_x == 2 or stride_x == 4) - and ifm_shape.depth <= 4 - and ifm_shape.width % 2 == 0 - and weight_tensor is not None - and weight_tensor.shape[1] >= 2 - ): + def calc_resize_factor(ifm_width: int, stride_x: int) -> tuple[int, int]: + """Compute resize factor for strided Conv2D optimization""" + # Define strides that are supported by HW + hw_supported_strides = (2, 3) + resize_factor = stride_x + + if ifm_width % resize_factor != 0: + # In case it is not divisible, check if the resize factor is + # divisible by any of the hw_supported_strides. If it is, re-compute + # the resize factor to be the value that leads us to + # reach a hw supported stride. + # E.g.: IFM width = 133, stride = 14, filter width = 7 can be + # optimised to IFM width = 19, stride = 2, filter width = 7 using + # a resize factor of 7. The final stride is 2 which is + # supported by the hardware. + supported_final_strides = (x for x in hw_supported_strides if resize_factor % x == 0) + new_resize_factor = resize_factor // next(supported_final_strides, 1) + resize_factor = new_resize_factor if resize_factor != new_resize_factor else 1 + + optimised_stride = stride_x // resize_factor + + return resize_factor, optimised_stride + + resize_factor, final_stride = calc_resize_factor(ifm_shape.width, stride_x) + + def calc_filter_padding( + ifm_padding_type: Padding | None, + ifm_current_padding_x: int, + post_op_stride: int, + opt_resize_factor: int, + filter_width: int, + ) -> tuple[int, int, int, int]: + """Calculate zero padding to be added to the filter. + + Parameters + ---------- + ifm_padding_type : Padding or None + The padding type that is applied to the IFM. + ifm_current_padding_x : int + Padding amount that is added to the IFM before optimization. + post_op_stride : int + The final stride once optimization is performed. + opt_resize_factor : int + The factor by which the stride will be reduced. + E.g. opt_resize_factor = 2 on a stride of 4 will produce + a stride of 2 after the optimization + filter_width : int + Width of the filter before optimization. + + Returns + ------- + padding : tuple[int, int, int, int] + A tuple with the ammount of padding on each side (top, left, bottom, right) + """ + padding_size = 0 + padding = (0, 0, 0, 0) + if ifm_padding_type and ifm_padding_type != Padding.VALID: + padding_size = (ifm_current_padding_x + post_op_stride) * opt_resize_factor - filter_width + # Distribute padding between left and right side of the filter + padding_left = padding_size // 2 + padding = (0, padding_left, 0, padding_size - padding_left) + + # Check if filter width is divisible by the stride width (required for optimization) + # If padding was already added above, the filter width is already divisible by + # resize factor, so this should be skipped. + if padding_size == 0 and filter_width % opt_resize_factor != 0: + padding_size = opt_resize_factor - (filter_width % opt_resize_factor) + # Add padding zeros to the right + padding = (0, 0, 0, padding_size) + + return padding + + # Compute the depth of the IFM once the strided Conv2D is optimised + post_opt_ifm_depth = ifm_shape.depth * resize_factor + + if stride_x > 1 and (post_opt_ifm_depth <= 8 or stride_x > 3) and resize_factor != 1 and weight_tensor is not None: k_w, _ = op.get_kernel_size() - curr_padding_x = needed_total_padding(ifm_shape.width, stride_x, k_w) - optimised_padding_x = needed_total_padding(ifm_shape.width // stride_x, 1, (k_w + 1) // stride_x) - padding_type = op.attrs.get("padding", None) + weight_shape = weight_tensor.shape - # If padding is enabled, check if current padding matches optimised padding - if not padding_type or (padding_type != Padding.VALID and curr_padding_x != optimised_padding_x): - # Horizontal padding would become different after optimisation; this would not work + padding_type = op.attrs.get("padding", None) + if padding_type in (None, Padding.EXPLICIT, Padding.TILE): return op - # IFM - op.ifm_shapes[0] = Shape4D( - [ifm_shape.batch, ifm_shape.height, ifm_shape.width // stride_x, ifm_shape.depth * stride_x] + # Compute current padding as if IFM padding is SAME + curr_padding_x = needed_total_padding(ifm_shape.width, stride_x, k_w) + # Compute the padding needed on the filter for the optimisation + _, left_filter_padding, _, right_filter_padding = calc_filter_padding( + padding_type, curr_padding_x, final_stride, resize_factor, k_w ) + total_horizontal_padding = left_filter_padding + right_filter_padding + # If IFM padding is enabled, check if pre-opt and post-opt padding is + # the same while taking into consideration the extra filter padding. + if padding_type == Padding.SAME: + optimised_padding_x = needed_total_padding( + ifm_shape.width // resize_factor, final_stride, (k_w + 1 + total_horizontal_padding) // resize_factor + ) + if curr_padding_x != optimised_padding_x: + # Horizontal padding would become different after optimisation; this would not work + return op - # Weights - weight_shape = weight_tensor.shape - if weight_shape[1] % 2 != 0: - weight_shape[1] = weight_shape[1] + 1 - padded_array = np.zeros(weight_shape) - for i in range(weight_shape[0]): - padded_array[i] = np.vstack( - [ - weight_tensor.values[i], - np.full((1, weight_shape[2], weight_shape[3]), weight_tensor.quantization.zero_point), - ] - ) - weight_tensor.values = padded_array + # Resize IFM + op.ifm_shapes[0] = Shape4D( + [ifm_shape.batch, ifm_shape.height, ifm_shape.width // resize_factor, ifm_shape.depth * resize_factor] + ) + # Compute list of 0 padding for each dimensions of the filter + filter_dimension_padding = [(0, 0) for _ in weight_tensor.shape] + # Update padding for filter width with computed padding + filter_dimension_padding[1] = (left_filter_padding, right_filter_padding) + # Add padding to the filter + zero_point = weight_tensor.quantization.zero_point + padding_constant = zero_point if np.isscalar(zero_point) else 0 + padded_filter_tensor = np.pad(weight_tensor.values, filter_dimension_padding, constant_values=padding_constant) + weight_shape[1] = padded_filter_tensor.shape[1] + weight_tensor.values = padded_filter_tensor # Change weight shape based on stride_x - weight_shape[1] //= stride_x - weight_shape[2] *= stride_x + weight_shape[1] //= resize_factor + weight_shape[2] *= resize_factor weight_tensor.values = np.reshape(weight_tensor.values, weight_shape) weight_tensor.set_all_shapes(weight_shape) @@ -1016,7 +1098,7 @@ def fixup_strided_conv(op: Operation, arch, nng): weight_tensor.value_id = uuid.uuid4() # Strides - stride_x = 1 + stride_x = final_stride op.attrs.update({"stride_w": stride_x, "stride_h": stride_y, "strides": (1, stride_y, stride_x, 1)}) return op diff --git a/ethosu/vela/tflite_supported_operators.py b/ethosu/vela/tflite_supported_operators.py index 95c7de33..8e9ab12f 100644 --- a/ethosu/vela/tflite_supported_operators.py +++ b/ethosu/vela/tflite_supported_operators.py @@ -542,12 +542,11 @@ class TFLiteSupportedOperators: @staticmethod def constraint_conv_stride(op): - "Stride values for height must be between 1 and 3 and for width between 1 and 4" + "Stride width must be greater than or equal to 1 and stride height must be between 1 and 3" w, h = op.get_kernel_stride() - stride_min_w_h = 1 - stride_max_w = 4 + stride_min = 1 stride_max_h = 3 - valid = (stride_min_w_h <= w <= stride_max_w) and (stride_min_w_h <= h <= stride_max_h) + valid = (stride_min <= w) and (stride_min <= h <= stride_max_h) return valid, f"Op has stride WxH as: {w}x{h}" @staticmethod |