MLBEDSW-7237: CONV_2D stride 4 optimisation

* Extend stride range from (1,3) to (1,4) * Add stride 4 support when optimising CONV_2D * Add some tests for various strides Change-Id: Iddaeb42c4a6e02695ecdd3740bc8b9dd59a7eb3c Signed-off-by: Raul Farkas <raul.farkas@arm.com>
author: Raul Farkas <raul.farkas@arm.com> 2023-01-24 16:29:06 +0000
committer: Raul Farkas <raul.farkas@arm.com> 2023-02-07 15:55:53 +0000
commit: 090f18a55fcd4f7ae8ca1ae633418d05c62cbb6e (patch)
tree: 0d88ac2cf3253af50f63c507d8b397831bd32b7a
parent: 12e481147de461e3ea63a8b1dcbc1b66b0fe8e6f (diff)
download: ethos-u-vela-090f18a55fcd4f7ae8ca1ae633418d05c62cbb6e.tar.gz
4 files changed, 59 insertions, 22 deletions
diff --git a/SUPPORTED_OPS.md b/SUPPORTED_OPS.md
index 43db4c5b..3d045923 100644
--- a/SUPPORTED_OPS.md
+++ b/SUPPORTED_OPS.md
@@ -1,7 +1,7 @@
 # Supported Ops
 
 This file was automatically generated by Vela using the `--supported-ops-report` parameter.  
-Vela version: `3.6.1.dev17+g859efbe.d20230203`
+Vela version: `3.6.1.dev18+g34cbb970`
 
 This file complies with
 [**Gitiles Markdown syntax**](https://github.com/google/gitiles/blob/master/Documentation/markdown.md)
@@ -123,7 +123,7 @@ This is a list of constraints that the CONV_2D operator must satisfy in order to
 
 - Stride values for both width and height must be integer types
 - Dilation factor values for both width and height must be integer types
-- Stride values for both width and height must be in the range [1, 3]
+- Stride values for both width and height must be between 1 and 4
 - Dilated kernel height must be in the range [1, 64]
 - Product of dilated kernel width and height must be in the range [1, 4096]
 - Weight tensor must be 8-bit
@@ -139,7 +139,6 @@ This is a list of constraints that the DEPTHWISE_CONV_2D operator must satisfy i
 
 - Stride values for both width and height must be integer types
 - Dilation factor values for both width and height must be integer types
-- Stride values for both width and height must be in the range [1, 3]
 - Dilated kernel height must be in the range [1, 64]
 - Product of dilated kernel width and height must be in the range [1, 4096]
 - Weight tensor must be 8-bit
@@ -148,6 +147,7 @@ This is a list of constraints that the DEPTHWISE_CONV_2D operator must satisfy i
 - Optional Bias tensor must be of shape: 1D
 - Optional Bias tensor must be of type: int32, int64
 - Optional Bias tensor values must fit within 40-bits
+- Stride values for both width and height must be between 1 and 3
 - For depth multipliers > 1, IFM channels must be 1 and OFM channels must be equal to the depth multiplier
 
 ### TFLite EXPAND_DIMS Constraints
diff --git a/ethosu/vela/test/test_tflite_supported_operators.py b/ethosu/vela/test/test_tflite_supported_operators.py
index 6a0b58e3..efe0d000 100644
--- a/ethosu/vela/test/test_tflite_supported_operators.py
+++ b/ethosu/vela/test/test_tflite_supported_operators.py
@@ -17,6 +17,7 @@
 # Description:
 # Unit tests for tflite support_operators
 import numpy as np
+import pytest
 
 from ethosu.vela.data_type import DataType
 from ethosu.vela.operation import ActivationFunction
@@ -104,11 +105,15 @@ def test_constraint_conv_pass():
     assert support.is_operator_supported(op)
 
 
-def test_constraint_stride_range():
+@pytest.mark.parametrize(
+    "stride_w, stride_h, supported",
+    [[0, 20, False], [4, 4, True], [4, 5, False], [5, 4, False], [3, 3, True], [1, 1, True], [2, 4, True]],
+)
+def test_constraint_stride_range(stride_w: int, stride_h: int, supported: bool):
     # Stride width and height must lie within a certain range
-    op = testutil.create_op_with_quant_tensors(Op.Conv2DBias, [1, 8, 8, 8], [1, 8, 8, 8])
-    op.attrs = {"stride_w": 0, "stride_h": 20}
-    assert not support.is_operator_supported(op)
+    op = testutil.create_op_with_quant_tensors(Op.Conv2DBias, [1, 8, 8, 8], [1, 8, 8, 8], [1, 1, 1, 1])
+    op.attrs = {"stride_w": stride_w, "stride_h": stride_h}
+    assert support.is_operator_supported(op) == supported
 
 
 def test_constraint_dilated_height_range():
diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py
index ff7b4863..73137feb 100644
--- a/ethosu/vela/tflite_graph_optimiser.py
+++ b/ethosu/vela/tflite_graph_optimiser.py
@@ -790,28 +790,39 @@ def reorder_depthwise_weights(op, arch, nng):
     return op
 
 
-def optimise_strided_conv(op, arch, nng):
-    if op.type != Op.Conv2DBias or op.op_index != 0:
+def fixup_strided_conv(op, arch, nng):
+    if op.type != Op.Conv2DBias:
         return op
     stride_x, stride_y = op.get_kernel_stride()
     weight_tensor = op.weights
     ifm_shape = op.ifm_shapes[0]
 
+    # Do not optimize if op is not the first in the network and stride is
+    # supported by the hardware
+    if op.op_index != 0 and stride_x < 4:
+        return op
+    op.ifm.needs_linear_format = True
+
     if (
-        stride_x == 2
+        (stride_x == 2 or stride_x == 4)
         and ifm_shape.depth <= 4
         and ifm_shape.width % 2 == 0
         and weight_tensor is not None
         and weight_tensor.shape[1] >= 2
     ):
         k_w, _ = op.get_kernel_size()
-        curr_padding_x = needed_total_padding(ifm_shape.width, 2, k_w)
-        optimised_padding_x = needed_total_padding(ifm_shape.width // 2, 1, (k_w + 1) // 2)
-        if curr_padding_x != optimised_padding_x:
+        curr_padding_x = needed_total_padding(ifm_shape.width, stride_x, k_w)
+        optimised_padding_x = needed_total_padding(ifm_shape.width // stride_x, 1, (k_w + 1) // stride_x)
+        padding_type = op.attrs.get("padding", None)
+
+        # If padding is enabled, check if current padding matches optimised padding
+        if not padding_type or (padding_type != Padding.VALID and curr_padding_x != optimised_padding_x):
             # Horizontal padding would become different after optimisation; this would not work
             return op
         # IFM
-        op.ifm_shapes[0] = Shape4D([ifm_shape.batch, ifm_shape.height, ifm_shape.width // 2, ifm_shape.depth * 2])
+        op.ifm_shapes[0] = Shape4D(
+            [ifm_shape.batch, ifm_shape.height, ifm_shape.width // stride_x, ifm_shape.depth * stride_x]
+        )
 
         # Weights
         weight_shape = weight_tensor.shape
@@ -826,8 +837,11 @@ def optimise_strided_conv(op, arch, nng):
                     ]
                 )
             weight_tensor.values = padded_array
-        weight_shape[1] //= 2
-        weight_shape[2] *= 2
+
+        # Change weight shape based on stride_x
+        weight_shape[1] //= stride_x
+        weight_shape[2] *= stride_x
+
         weight_tensor.values = np.reshape(weight_tensor.values, weight_shape)
         weight_tensor.set_all_shapes(weight_shape)
         # If multiple copies of the weights are used, we could avoid
@@ -1942,7 +1956,7 @@ def tflite_optimise_graph(nng, arch):
         convert_prelu,
         convert_mul_max_to_abs_or_lrelu,
         convert_lrelu,
-        optimise_strided_conv,
+        fixup_strided_conv,
         convert_hardswish_to_lut,
         rewrite_fully_connected_input,
         convert_batched_fc_shape,
diff --git a/ethosu/vela/tflite_supported_operators.py b/ethosu/vela/tflite_supported_operators.py
index ea39b478..2a1eba7d 100644
--- a/ethosu/vela/tflite_supported_operators.py
+++ b/ethosu/vela/tflite_supported_operators.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright 2020-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# SPDX-FileCopyrightText: Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
 #
 # SPDX-License-Identifier: Apache-2.0
 #
@@ -223,12 +223,12 @@ class TFLiteSupportedOperators:
         # Setup specific constraints. Note: the order matters
         self.specific_constraints = defaultdict(list)
 
+        # Conv specific ops:
+        for op_type in TFLiteSupportedOperators.convolution_ops:
+            self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_conv_stride)
+
         # Conv-like checks:
         for op_type in TFLiteSupportedOperators.convolution_like_ops:
-            if op_type not in TFLiteSupportedOperators.transpose_convolution_ops:
-                # Transpose Conv has a specific stride constraint (see constraint_tconv_stride below)
-                self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_stride_range)
-
             self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_dilated_height_range)
             self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_dilated_product_range)
             self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_weights_type)
@@ -237,6 +237,7 @@ class TFLiteSupportedOperators:
             self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_bias_shape)
             self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_bias_type)
             self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_bias_40bit)
+
         # Transpose Conv specific checks:
         for op_type in TFLiteSupportedOperators.transpose_convolution_ops:
             self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_tconv_stride)
@@ -244,6 +245,7 @@ class TFLiteSupportedOperators:
             self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_tconv_valid)
         # Depthwise Conv specific checks:
         for op_type in TFLiteSupportedOperators.depthwise_convolution_ops:
+            self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_depthwise_conv_stride)
             self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_depth_multiplier)
 
         # Pooling checks:
@@ -534,6 +536,22 @@ class TFLiteSupportedOperators:
         return True, "Op has depth_multiplier=1"
 
     @staticmethod
+    def constraint_conv_stride(op):
+        "Stride values for both width and height must be between 1 and 4"
+        w, h = op.get_kernel_stride()
+        stride_min, stride_max = 1, 4
+        valid = (stride_min <= w <= stride_max) and (stride_min <= h <= stride_max)
+        return valid, f"Op has stride WxH as: {w}x{h}"
+
+    @staticmethod
+    def constraint_depthwise_conv_stride(op):
+        "Stride values for both width and height must be between 1 and 3"
+        w, h = op.get_kernel_stride()
+        stride_min, stride_max = 1, 3
+        valid = (stride_min <= w <= stride_max) and (stride_min <= h <= stride_max)
+        return valid, f"Op has stride WxH as: {w}x{h}"
+
+    @staticmethod
     def constraint_tconv_stride(op):
         "Stride values for both width and height must be 2"
         w = op.kernel.stride.x
author	Raul Farkas <raul.farkas@arm.com>	2023-01-24 16:29:06 +0000
committer	Raul Farkas <raul.farkas@arm.com>	2023-02-07 15:55:53 +0000
commit	090f18a55fcd4f7ae8ca1ae633418d05c62cbb6e (patch)
tree	0d88ac2cf3253af50f63c507d8b397831bd32b7a
parent	12e481147de461e3ea63a8b1dcbc1b66b0fe8e6f (diff)
download	ethos-u-vela-090f18a55fcd4f7ae8ca1ae633418d05c62cbb6e.tar.gz