aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRaul Farkas <raul.farkas@arm.com>2023-01-30 12:58:46 +0000
committerRaul Farkas <raul.farkas@arm.com>2023-05-10 13:34:42 +0100
commit10d6b3b3fa594b9aca4a72f002acea9f927f9c60 (patch)
tree3b5f71ad590c81e53bca82ab2ffb20196d2408e2
parent69782af3ff2cda96dff09ad66799b3ac8f16c19d (diff)
downloadethos-u-vela-10d6b3b3fa594b9aca4a72f002acea9f927f9c60.tar.gz
MLBEDSW-7283: Add opt cases for strided CONV2D
* Implement a general optimization solution for strided CONV2D that supports a stride_w with no upper bound. * Implement filter zero padding to allow for optimization in those cases in which the filter width is not divisible by the stride width. E.g.: Filter width = 8, stride width = 3 -> Filter width = 8 + 1 (0 padding) = 9, stride width = 3 * Implement partial optimization to reduce the stride to hw supported strides (i.e. 2 and 3) when optimizing to reach a stride = 1 is not possible due to the IFM width not being divisible by the stride width. * Implement optimization for when SAME padding is used. If the pre-opt and post-opt padding do not match, add zero padding to the filter so that the post-opt IFM padding matches. Change-Id: Ia66b0d107281fa9993f6bf4d0c26627ee743253b Signed-off-by: Raul Farkas <raul.farkas@arm.com>
-rw-r--r--SUPPORTED_OPS.md4
-rw-r--r--ethosu/vela/test/test_tflite_supported_operators.py12
-rw-r--r--ethosu/vela/tflite_graph_optimiser.py156
-rw-r--r--ethosu/vela/tflite_supported_operators.py7
4 files changed, 130 insertions, 49 deletions
diff --git a/SUPPORTED_OPS.md b/SUPPORTED_OPS.md
index 85ac0367..4c2a098a 100644
--- a/SUPPORTED_OPS.md
+++ b/SUPPORTED_OPS.md
@@ -18,7 +18,7 @@ limitations under the License.
# Supported Ops
This file was automatically generated by Vela using the `--supported-ops-report` parameter.
-Vela version: `3.7.1.dev23+g3734897.d20230427`
+Vela version: `3.7.1.dev17+geeff1bcf`
This file complies with
[**Gitiles Markdown syntax**](https://github.com/google/gitiles/blob/master/Documentation/markdown.md)
@@ -153,7 +153,7 @@ This is a list of constraints that the CONV_2D operator must satisfy in order to
- Stride values for both width and height must be integer types
- Dilation factor values for both width and height must be integer types
-- Stride values for height must be between 1 and 3 and for width between 1 and 4
+- Stride width must be greater than or equal to 1 and stride height must be between 1 and 3
- Dilated kernel height must be in the range [1, 64]
- Product of dilated kernel width and height must be in the range [1, 4096]
- Weight tensor must be 8-bit
diff --git a/ethosu/vela/test/test_tflite_supported_operators.py b/ethosu/vela/test/test_tflite_supported_operators.py
index 74dd3bf2..cedf87af 100644
--- a/ethosu/vela/test/test_tflite_supported_operators.py
+++ b/ethosu/vela/test/test_tflite_supported_operators.py
@@ -109,15 +109,15 @@ def test_constraint_conv_pass():
"stride_w, stride_h, supported",
[
[0, 20, False],
- [4, 1, True],
- [4, 2, True],
- [2, 2, True],
- [4, 4, False],
+ [20, 0, False],
+ [4, 3, True],
[4, 5, False],
- [5, 4, False],
+ [4, 9, False],
[3, 3, True],
[1, 1, True],
- [2, 4, False],
+ [20, 2, True],
+ [6, 3, True],
+ [8, 1, True],
],
)
def test_constraint_stride_range(stride_w: int, stride_h: int, supported: bool):
diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py
index 07f65a44..f68e0cf9 100644
--- a/ethosu/vela/tflite_graph_optimiser.py
+++ b/ethosu/vela/tflite_graph_optimiser.py
@@ -17,6 +17,8 @@
# Description:
# Early optimisation of a TensorFlow Lite based network graph, using the rewrite_graph module
# to do the traversal of the graph.
+from __future__ import annotations
+
import math
import uuid
@@ -949,15 +951,16 @@ def reorder_depthwise_weights(op, arch, nng):
return op
-def fixup_strided_conv(op: Operation, arch, nng):
+def fixup_strided_conv(op: Operation, arch, nng) -> Operation:
"""Optimize or fixup strided Conv2DBias
Optimization:
- Reduce, when possible, the Conv2DBias stride from 2 to 1 by re-shaping
- both IFM and filter.
+ Reduce, when possible, the Conv2DBias stride from N with 1 > N > 4 to 1
+ by re-shaping both IFM and filter.
Fixup:
- Introduce software support for Conv2DBias with stride_width = 4 by
- reducing it to 1 when possible by re-shaping both IFM and filter.
+ Introduce software support for Conv2DBias with stride_width > 4 by
+ reducing it to 1, 2 or 3 (HW supported strides) when possible by
+ re-shaping both IFM and filter.
"""
if op.type != Op.Conv2DBias:
return op
@@ -970,44 +973,123 @@ def fixup_strided_conv(op: Operation, arch, nng):
if op.op_index != 0 and stride_x < 4:
return op
- if (
- (stride_x == 2 or stride_x == 4)
- and ifm_shape.depth <= 4
- and ifm_shape.width % 2 == 0
- and weight_tensor is not None
- and weight_tensor.shape[1] >= 2
- ):
+ def calc_resize_factor(ifm_width: int, stride_x: int) -> tuple[int, int]:
+ """Compute resize factor for strided Conv2D optimization"""
+ # Define strides that are supported by HW
+ hw_supported_strides = (2, 3)
+ resize_factor = stride_x
+
+ if ifm_width % resize_factor != 0:
+ # In case it is not divisible, check if the resize factor is
+ # divisible by any of the hw_supported_strides. If it is, re-compute
+ # the resize factor to be the value that leads us to
+ # reach a hw supported stride.
+ # E.g.: IFM width = 133, stride = 14, filter width = 7 can be
+ # optimised to IFM width = 19, stride = 2, filter width = 7 using
+ # a resize factor of 7. The final stride is 2 which is
+ # supported by the hardware.
+ supported_final_strides = (x for x in hw_supported_strides if resize_factor % x == 0)
+ new_resize_factor = resize_factor // next(supported_final_strides, 1)
+ resize_factor = new_resize_factor if resize_factor != new_resize_factor else 1
+
+ optimised_stride = stride_x // resize_factor
+
+ return resize_factor, optimised_stride
+
+ resize_factor, final_stride = calc_resize_factor(ifm_shape.width, stride_x)
+
+ def calc_filter_padding(
+ ifm_padding_type: Padding | None,
+ ifm_current_padding_x: int,
+ post_op_stride: int,
+ opt_resize_factor: int,
+ filter_width: int,
+ ) -> tuple[int, int, int, int]:
+ """Calculate zero padding to be added to the filter.
+
+ Parameters
+ ----------
+ ifm_padding_type : Padding or None
+ The padding type that is applied to the IFM.
+ ifm_current_padding_x : int
+ Padding amount that is added to the IFM before optimization.
+ post_op_stride : int
+ The final stride once optimization is performed.
+ opt_resize_factor : int
+ The factor by which the stride will be reduced.
+ E.g. opt_resize_factor = 2 on a stride of 4 will produce
+ a stride of 2 after the optimization
+ filter_width : int
+ Width of the filter before optimization.
+
+ Returns
+ -------
+ padding : tuple[int, int, int, int]
+ A tuple with the ammount of padding on each side (top, left, bottom, right)
+ """
+ padding_size = 0
+ padding = (0, 0, 0, 0)
+ if ifm_padding_type and ifm_padding_type != Padding.VALID:
+ padding_size = (ifm_current_padding_x + post_op_stride) * opt_resize_factor - filter_width
+ # Distribute padding between left and right side of the filter
+ padding_left = padding_size // 2
+ padding = (0, padding_left, 0, padding_size - padding_left)
+
+ # Check if filter width is divisible by the stride width (required for optimization)
+ # If padding was already added above, the filter width is already divisible by
+ # resize factor, so this should be skipped.
+ if padding_size == 0 and filter_width % opt_resize_factor != 0:
+ padding_size = opt_resize_factor - (filter_width % opt_resize_factor)
+ # Add padding zeros to the right
+ padding = (0, 0, 0, padding_size)
+
+ return padding
+
+ # Compute the depth of the IFM once the strided Conv2D is optimised
+ post_opt_ifm_depth = ifm_shape.depth * resize_factor
+
+ if stride_x > 1 and (post_opt_ifm_depth <= 8 or stride_x > 3) and resize_factor != 1 and weight_tensor is not None:
k_w, _ = op.get_kernel_size()
- curr_padding_x = needed_total_padding(ifm_shape.width, stride_x, k_w)
- optimised_padding_x = needed_total_padding(ifm_shape.width // stride_x, 1, (k_w + 1) // stride_x)
- padding_type = op.attrs.get("padding", None)
+ weight_shape = weight_tensor.shape
- # If padding is enabled, check if current padding matches optimised padding
- if not padding_type or (padding_type != Padding.VALID and curr_padding_x != optimised_padding_x):
- # Horizontal padding would become different after optimisation; this would not work
+ padding_type = op.attrs.get("padding", None)
+ if padding_type in (None, Padding.EXPLICIT, Padding.TILE):
return op
- # IFM
- op.ifm_shapes[0] = Shape4D(
- [ifm_shape.batch, ifm_shape.height, ifm_shape.width // stride_x, ifm_shape.depth * stride_x]
+ # Compute current padding as if IFM padding is SAME
+ curr_padding_x = needed_total_padding(ifm_shape.width, stride_x, k_w)
+ # Compute the padding needed on the filter for the optimisation
+ _, left_filter_padding, _, right_filter_padding = calc_filter_padding(
+ padding_type, curr_padding_x, final_stride, resize_factor, k_w
)
+ total_horizontal_padding = left_filter_padding + right_filter_padding
+ # If IFM padding is enabled, check if pre-opt and post-opt padding is
+ # the same while taking into consideration the extra filter padding.
+ if padding_type == Padding.SAME:
+ optimised_padding_x = needed_total_padding(
+ ifm_shape.width // resize_factor, final_stride, (k_w + 1 + total_horizontal_padding) // resize_factor
+ )
+ if curr_padding_x != optimised_padding_x:
+ # Horizontal padding would become different after optimisation; this would not work
+ return op
- # Weights
- weight_shape = weight_tensor.shape
- if weight_shape[1] % 2 != 0:
- weight_shape[1] = weight_shape[1] + 1
- padded_array = np.zeros(weight_shape)
- for i in range(weight_shape[0]):
- padded_array[i] = np.vstack(
- [
- weight_tensor.values[i],
- np.full((1, weight_shape[2], weight_shape[3]), weight_tensor.quantization.zero_point),
- ]
- )
- weight_tensor.values = padded_array
+ # Resize IFM
+ op.ifm_shapes[0] = Shape4D(
+ [ifm_shape.batch, ifm_shape.height, ifm_shape.width // resize_factor, ifm_shape.depth * resize_factor]
+ )
+ # Compute list of 0 padding for each dimensions of the filter
+ filter_dimension_padding = [(0, 0) for _ in weight_tensor.shape]
+ # Update padding for filter width with computed padding
+ filter_dimension_padding[1] = (left_filter_padding, right_filter_padding)
+ # Add padding to the filter
+ zero_point = weight_tensor.quantization.zero_point
+ padding_constant = zero_point if np.isscalar(zero_point) else 0
+ padded_filter_tensor = np.pad(weight_tensor.values, filter_dimension_padding, constant_values=padding_constant)
+ weight_shape[1] = padded_filter_tensor.shape[1]
+ weight_tensor.values = padded_filter_tensor
# Change weight shape based on stride_x
- weight_shape[1] //= stride_x
- weight_shape[2] *= stride_x
+ weight_shape[1] //= resize_factor
+ weight_shape[2] *= resize_factor
weight_tensor.values = np.reshape(weight_tensor.values, weight_shape)
weight_tensor.set_all_shapes(weight_shape)
@@ -1016,7 +1098,7 @@ def fixup_strided_conv(op: Operation, arch, nng):
weight_tensor.value_id = uuid.uuid4()
# Strides
- stride_x = 1
+ stride_x = final_stride
op.attrs.update({"stride_w": stride_x, "stride_h": stride_y, "strides": (1, stride_y, stride_x, 1)})
return op
diff --git a/ethosu/vela/tflite_supported_operators.py b/ethosu/vela/tflite_supported_operators.py
index 95c7de33..8e9ab12f 100644
--- a/ethosu/vela/tflite_supported_operators.py
+++ b/ethosu/vela/tflite_supported_operators.py
@@ -542,12 +542,11 @@ class TFLiteSupportedOperators:
@staticmethod
def constraint_conv_stride(op):
- "Stride values for height must be between 1 and 3 and for width between 1 and 4"
+ "Stride width must be greater than or equal to 1 and stride height must be between 1 and 3"
w, h = op.get_kernel_stride()
- stride_min_w_h = 1
- stride_max_w = 4
+ stride_min = 1
stride_max_h = 3
- valid = (stride_min_w_h <= w <= stride_max_w) and (stride_min_w_h <= h <= stride_max_h)
+ valid = (stride_min <= w) and (stride_min <= h <= stride_max_h)
return valid, f"Op has stride WxH as: {w}x{h}"
@staticmethod