diff options
Diffstat (limited to 'ethosu/vela')
-rw-r--r-- | ethosu/vela/graph_optimiser_util.py | 9 | ||||
-rw-r--r-- | ethosu/vela/test/test_tflite_supported_operators.py | 27 | ||||
-rw-r--r-- | ethosu/vela/tflite_graph_optimiser.py | 66 | ||||
-rw-r--r-- | ethosu/vela/tflite_supported_operators.py | 12 | ||||
-rw-r--r-- | ethosu/vela/utils.py | 29 |
5 files changed, 94 insertions, 49 deletions
diff --git a/ethosu/vela/graph_optimiser_util.py b/ethosu/vela/graph_optimiser_util.py index da3fe138..220ba1a9 100644 --- a/ethosu/vela/graph_optimiser_util.py +++ b/ethosu/vela/graph_optimiser_util.py @@ -185,10 +185,11 @@ def calc_explicit_padding(input_size, stride, filter_size, pad_before, pad_after def needed_total_padding(input_size, stride, filter_size): - out_size = (input_size + stride - 1) // stride - needed_input = (out_size - 1) * stride + filter_size - total_padding = max(0, needed_input - input_size) - return total_padding + """Compute hardware padding.""" + if input_size % stride == 0: + return max(filter_size - stride, 0) + + return max(filter_size - (input_size % stride), 0) # Set input/output tensor equivalence to the same id for memory operations diff --git a/ethosu/vela/test/test_tflite_supported_operators.py b/ethosu/vela/test/test_tflite_supported_operators.py index 4aca00da..cbad1713 100644 --- a/ethosu/vela/test/test_tflite_supported_operators.py +++ b/ethosu/vela/test/test_tflite_supported_operators.py @@ -106,23 +106,24 @@ def test_constraint_conv_pass(): @pytest.mark.parametrize( - "stride_w, stride_h, supported", + "ifm_shape, stride_w, stride_h, supported", [ - [0, 20, False], - [20, 0, False], - [4, 3, True], - [4, 5, False], - [4, 9, False], - [3, 3, True], - [1, 1, True], - [20, 2, True], - [6, 3, True], - [8, 1, True], + [[1, 8, 8, 8], 0, 20, False], + [[1, 8, 8, 8], 20, 0, False], + [[1, 8, 8, 8], 4, 3, True], + [[1, 8, 8, 8], 4, 5, False], + [[1, 8, 8, 8], 4, 9, False], + [[1, 8, 8, 8], 3, 3, True], + [[1, 8, 8, 8], 1, 1, True], + [[1, 8, 8, 8], 20, 2, False], + [[1, 8, 40, 8], 20, 2, True], + [[1, 8, 40, 8], 6, 3, True], + [[1, 8, 40, 8], 8, 1, True], ], ) -def test_constraint_stride_range(stride_w: int, stride_h: int, supported: bool): +def test_constraint_stride_range(ifm_shape: list[int], stride_w: int, stride_h: int, supported: bool): # Stride width and height must lie within a certain range - op = testutil.create_op_with_quant_tensors(Op.Conv2DBias, [1, 8, 8, 8], [1, 8, 8, 8], [1, 1, 1, 1]) + op = testutil.create_op_with_quant_tensors(Op.Conv2DBias, ifm_shape, [1, 8, 8, 8], [1, 1, 1, 1]) op.attrs = {"stride_w": stride_w, "stride_h": stride_h} assert support.is_operator_supported(op) == supported diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py index 99ac24ee..76383a4b 100644 --- a/ethosu/vela/tflite_graph_optimiser.py +++ b/ethosu/vela/tflite_graph_optimiser.py @@ -73,6 +73,7 @@ from .tensor import QuantizationParameters from .tensor import Tensor from .tensor import TensorPurpose from .tflite_mapping import optype_to_builtintype +from .utils import calc_resize_factor passthrough_nodes = (Op.Identity,) @@ -970,29 +971,6 @@ def fixup_strided_conv(op: Operation, arch, nng) -> Operation: if op.op_index != 0 and stride_x < 4: return op - def calc_resize_factor(ifm_width: int, stride_x: int) -> tuple[int, int]: - """Compute resize factor for strided Conv2D optimization""" - # Define strides that are supported by HW - hw_supported_strides = (2, 3) - resize_factor = stride_x - - if ifm_width % resize_factor != 0: - # In case it is not divisible, check if the resize factor is - # divisible by any of the hw_supported_strides. If it is, re-compute - # the resize factor to be the value that leads us to - # reach a hw supported stride. - # E.g.: IFM width = 133, stride = 14, filter width = 7 can be - # optimised to IFM width = 19, stride = 2, filter width = 7 using - # a resize factor of 7. The final stride is 2 which is - # supported by the hardware. - supported_final_strides = (x for x in hw_supported_strides if resize_factor % x == 0) - new_resize_factor = resize_factor // next(supported_final_strides, 1) - resize_factor = new_resize_factor if resize_factor != new_resize_factor else 1 - - optimised_stride = stride_x // resize_factor - - return resize_factor, optimised_stride - resize_factor, final_stride = calc_resize_factor(ifm_shape.width, stride_x) def calc_filter_padding( @@ -1001,6 +979,7 @@ def fixup_strided_conv(op: Operation, arch, nng) -> Operation: post_op_stride: int, opt_resize_factor: int, filter_width: int, + ifm_width: int, ) -> tuple[int, int, int, int]: """Calculate zero padding to be added to the filter. @@ -1018,6 +997,8 @@ def fixup_strided_conv(op: Operation, arch, nng) -> Operation: a stride of 2 after the optimization filter_width : int Width of the filter before optimization. + ifm_width : int + Width of the IFM before optimization Returns ------- @@ -1027,15 +1008,40 @@ def fixup_strided_conv(op: Operation, arch, nng) -> Operation: padding_size = 0 padding = (0, 0, 0, 0) if ifm_padding_type and ifm_padding_type != Padding.VALID: - padding_size = (ifm_current_padding_x + post_op_stride) * opt_resize_factor - filter_width - # Distribute padding between left and right side of the filter - padding_left = padding_size // 2 + # Compute padding size for the filter that guarantees that HW padding added to IFM matches + # before and after the optimization is performed + expected_filter_size = 0 + pre_opt_stride = post_op_stride * opt_resize_factor + post_opt_ifm_width = ifm_width // opt_resize_factor + # Compute the total expected filter size post optimization that ensures that the same HW padding + # is added to IFM. + # There are two ways of calculating required filter size depending on whether IFM width is divisible + # by stride width or not. These approaches match the cases used to calculate HW padding in + # needed_total_padding method. + if ifm_width % pre_opt_stride == 0: + expected_filter_size = ifm_current_padding_x + post_op_stride + else: + expected_filter_size = ifm_current_padding_x + (post_opt_ifm_width % post_op_stride) + # Compute padding size from expected filter size + padding_size = expected_filter_size * opt_resize_factor - filter_width + + if ifm_current_padding_x == 0: + # If no HW padding is added to IFM, divide filter padding between left and right following + # the same strategy as the reference. + padding_left = padding_size // 2 + else: + # If HW padding is added to IFM, split padding for the filter so that left padding and right padding + # are proportional to left and right HW padding. + left_hw_padding = ifm_current_padding_x // 2 + # Compute filter padding + padding_left = padding_size // ifm_current_padding_x * left_hw_padding padding = (0, padding_left, 0, padding_size - padding_left) # Check if filter width is divisible by the stride width (required for optimization) - # If padding was already added above, the filter width is already divisible by - # resize factor, so this should be skipped. - if padding_size == 0 and filter_width % opt_resize_factor != 0: + # If filter width is not divisible by stride width and no HW padding is added to IFM, compute + # filter padding required for the filter width to be divisible by the stride width and apply it as right + # padding. + if filter_width % opt_resize_factor != 0 and (padding_size == 0 or ifm_current_padding_x == 0): padding_size = opt_resize_factor - (filter_width % opt_resize_factor) # Add padding zeros to the right padding = (0, 0, 0, padding_size) @@ -1056,7 +1062,7 @@ def fixup_strided_conv(op: Operation, arch, nng) -> Operation: curr_padding_x = needed_total_padding(ifm_shape.width, stride_x, k_w) # Compute the padding needed on the filter for the optimisation _, left_filter_padding, _, right_filter_padding = calc_filter_padding( - padding_type, curr_padding_x, final_stride, resize_factor, k_w + padding_type, curr_padding_x, final_stride, resize_factor, k_w, ifm_shape.width ) total_horizontal_padding = left_filter_padding + right_filter_padding # If IFM padding is enabled, check if pre-opt and post-opt padding is diff --git a/ethosu/vela/tflite_supported_operators.py b/ethosu/vela/tflite_supported_operators.py index 0dfdc666..25b68970 100644 --- a/ethosu/vela/tflite_supported_operators.py +++ b/ethosu/vela/tflite_supported_operators.py @@ -29,6 +29,7 @@ from .supported_operators_util import list_formatter from .tensor import check_quantized_tens_scaling_equal from .tflite_mapping import BUILTIN_OPERATOR_UNKNOWN from .tflite_mapping import optype_to_builtintype +from .utils import calc_resize_factor def _optype_formatter(op_list): @@ -545,11 +546,18 @@ class TFLiteSupportedOperators: @staticmethod def constraint_conv_stride(op): - "Stride width must be greater than or equal to 1 and stride height must be between 1 and 3" + """Stride width must be greater than or equal to 1. + For stride widths greater than 3, the post-optimization stride needs to be less than or equal to 3. + Stride height must be between 1 and 3.""" w, h = op.get_kernel_stride() stride_min = 1 stride_max_h = 3 - valid = (stride_min <= w) and (stride_min <= h <= stride_max_h) + ifm_width = op.ifm.shape[2] + _, optimized_stride = calc_resize_factor(ifm_width, w) if w > 1 else (1, w) + # Optimized stride indicates the final Conv2D stride width after all optimizations are performed + can_optimize_stride_width_gt_3 = optimized_stride <= 3 + valid = (stride_min <= w) and (stride_min <= h <= stride_max_h) and can_optimize_stride_width_gt_3 + return valid, f"Op has stride WxH as: {w}x{h}" @staticmethod diff --git a/ethosu/vela/utils.py b/ethosu/vela/utils.py index 6a368979..11c253c0 100644 --- a/ethosu/vela/utils.py +++ b/ethosu/vela/utils.py @@ -84,3 +84,32 @@ def progress_print( return print(f"{context_str}{message}") + + +def calc_resize_factor(ifm_width: int, stride_x: int) -> tuple[int, int]: + """Compute resize factor for strided Conv2D optimization.""" + # Define strides that are supported by HW + hw_supported_strides = (2, 3) + resize_factor = stride_x + + if ifm_width % resize_factor != 0: + # In case it is not divisible, check if the resize factor is + # divisible by any of the hw_supported_strides. If it is, re-compute + # the resize factor to be the value that leads us to + # reach a hw supported stride. The IFM width needs to be divisible by the new stride. + # E.g.: IFM width = 133, stride = 14, filter width = 7 can be + # optimised to IFM width = 19, stride = 2, filter width = 7 using + # a resize factor of 7. The final stride is 2 which is + # supported by the hardware. + + # Filter strides that can be obtained from current stride + divisible_strides = (x for x in hw_supported_strides if resize_factor % x == 0) + # Remove strides that are not IFM width divisors + divisor_strides = (x for x in divisible_strides if ifm_width % (stride_x // x) == 0) + # Compute new resize factor based on chosen stride + new_resize_factor = resize_factor // next(divisor_strides, 1) + resize_factor = new_resize_factor if resize_factor != new_resize_factor else 1 + + optimised_stride = stride_x // resize_factor + + return resize_factor, optimised_stride |