From ea4ba666c035827aabe9a807503c185a6a9d3f0f Mon Sep 17 00:00:00 2001
From: Tim Hall <tim.hall@arm.com>
Date: Fri, 11 Nov 2022 18:19:53 +0000
Subject: MLBEDSW-6905: Add dilation greater than 2 support

 - Added graph optimisation pass to support dilations greater than 2
in either dimension
 - Removed supported operators restrictions
 - Removed erroneous dilation on TRANSPOSE_CONV
 - Updated unit tests and documentation

Signed-off-by: Tim Hall <tim.hall@arm.com>
Change-Id: Ide302374b0d5eff25c20501383a63f6aa7625c52
---
 .../vela/test/test_tflite_supported_operators.py   |  7 ----
 ethosu/vela/tflite_graph_optimiser.py              | 48 ++++++++++++++++++++++
 ethosu/vela/tflite_model_semantic.py               |  4 +-
 ethosu/vela/tflite_supported_operators.py          | 19 ++-------
 4 files changed, 55 insertions(+), 23 deletions(-)

(limited to 'ethosu')

diff --git a/ethosu/vela/test/test_tflite_supported_operators.py b/ethosu/vela/test/test_tflite_supported_operators.py
index 4410938e..6468d3de 100644
--- a/ethosu/vela/test/test_tflite_supported_operators.py
+++ b/ethosu/vela/test/test_tflite_supported_operators.py
@@ -111,13 +111,6 @@ def test_constraint_stride_range():
     assert not support.is_operator_supported(op)
 
 
-def test_constraint_dilation_range():
-    # Dilation width and height must lie within a certain range
-    op = testutil.create_op_with_quant_tensors(Op.Conv2DBias, [1, 8, 8, 8], [1, 8, 8, 8])
-    op.attrs = {"stride_w": 1, "stride_h": 1, "dilation_w_factor": 0, "dilation_h_factor": 20}
-    assert not support.is_operator_supported(op)
-
-
 def test_constraint_dilated_height_range():
     # Dilated kernel height must lie within a certain range
     op = testutil.create_op_with_quant_tensors(Op.Conv2DBias, [1, 8, 8, 8], [1, 8, 8, 8], weights_shape=[65, 64, 1, 1])
diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py
index 90b29327..fcaac151 100644
--- a/ethosu/vela/tflite_graph_optimiser.py
+++ b/ethosu/vela/tflite_graph_optimiser.py
@@ -1810,6 +1810,53 @@ def convert_shape_op_to_constant_tensor(op: Operation, arch, nng):
     return op
 
 
+def fixup_dilation_gt2(op, arch, nng):
+    assert op.run_on_npu
+    if op.type == Op.Conv2DBias or op.type == Op.DepthwiseConv2DBias:
+        dilation_w, dilation_h = op.get_kernel_dilation()
+
+        # if dilation in either axis is greater than that supported by the hardware then we must manually dilate the
+        # kernel
+        if dilation_w > 2 or dilation_h > 2:
+            kernel_w, kernel_h = op.get_kernel_size()
+            kernel_ic = op.weights.shape[-2]
+            kernel_oc = op.weights.shape[-1]
+
+            # if the dilation is a multiple of 2 then the hardware dialtion can be enabled to provide that multiple
+            # of 2. this allows the kernel size to be reduced (via the scaled dilation) by half in that dimension.
+            # odd = 1, even = 2
+            hw_dilation_h = 1 if (dilation_h & 1) else 2
+            hw_dilation_w = 1 if (dilation_w & 1) else 2
+
+            scale_dilation_h = dilation_h // hw_dilation_h
+            scale_dilation_w = dilation_w // hw_dilation_w
+
+            # create new empty kernel (HWIO format)
+            new_kernel_h = (kernel_h - 1) * scale_dilation_h + 1
+            new_kernel_w = (kernel_w - 1) * scale_dilation_w + 1
+
+            new_kernel_shape = [new_kernel_h, new_kernel_w, kernel_ic, kernel_oc]
+            new_kernel_values = np.zeros(new_kernel_shape, dtype=op.weights.values.dtype)
+
+            # copy the original kernel values into the new sparse kernel
+            for h in range(0, kernel_h):
+                for w in range(0, kernel_w):
+                    new_h = h * scale_dilation_h
+                    new_w = w * scale_dilation_w
+                    new_kernel_values[new_h, new_w, :, :] = op.weights.values[h, w, :, :]
+
+            # update the weight tensor with the new dilated kernel
+            op.weights.shape = new_kernel_shape
+            op.weights.values = new_kernel_values
+
+            # enable(=2) / disable(=1) hardware dilation
+            op.attrs["dilation"] = (1, hw_dilation_h, hw_dilation_w, 1)  # nhwc format
+            op.attrs["dilation_h_factor"] = hw_dilation_h
+            op.attrs["dilation_w_factor"] = hw_dilation_w
+
+    return op
+
+
 def supported_operator_check(op, arch, nng):
     op.run_on_npu = arch.tflite_supported_operators.is_operator_supported(op)
     return op
@@ -1909,6 +1956,7 @@ def tflite_optimise_graph(nng, arch):
         fixup_asymmetric_weights,
         convert_tanh_sigmoid_to_lut,
         replace_pad_by_hw_pad,
+        fixup_dilation_gt2,
     ]
 
     for idx, sg in enumerate(nng.subgraphs):
diff --git a/ethosu/vela/tflite_model_semantic.py b/ethosu/vela/tflite_model_semantic.py
index 7a0e234d..189e8370 100644
--- a/ethosu/vela/tflite_model_semantic.py
+++ b/ethosu/vela/tflite_model_semantic.py
@@ -106,7 +106,9 @@ class TFLiteSemantic:
         # Conv-like checks:
         for op_type in TFLiteSemantic.convolution_like_ops:
             self.specific_constraints[op_type].append(TFLiteSemantic.constraint_stride_type)
-            self.specific_constraints[op_type].append(TFLiteSemantic.constraint_dilation_type)
+            if op_type not in TFLiteSemantic.transpose_convolution_ops:
+                # Transpose Conv does not contain dilation
+                self.specific_constraints[op_type].append(TFLiteSemantic.constraint_dilation_type)
 
         # Pooling checks:
         for op_type in TFLiteSemantic.pooling_ops:
diff --git a/ethosu/vela/tflite_supported_operators.py b/ethosu/vela/tflite_supported_operators.py
index fd8bbeef..abbfb171 100644
--- a/ethosu/vela/tflite_supported_operators.py
+++ b/ethosu/vela/tflite_supported_operators.py
@@ -189,7 +189,6 @@ class TFLiteSupportedOperators:
     # Defined ranges for allowed values:
     tens_dim_range = (1, 65535)
     stride_range = (1, 3)
-    dilation_range = (1, 2)
     dilated_height_range = (1, 64)
     dilated_product_range = (1, 64 * 64)
     weights_limit = 127 * 65536
@@ -225,8 +224,10 @@ class TFLiteSupportedOperators:
 
         # Conv-like checks:
         for op_type in TFLiteSupportedOperators.convolution_like_ops:
-            self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_stride_range)
-            self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_dilation_range)
+            if op_type not in TFLiteSupportedOperators.transpose_convolution_ops:
+                # Transpose Conv has a specific stride constraint (see constraint_tconv_stride below)
+                self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_stride_range)
+
             self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_dilated_height_range)
             self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_dilated_product_range)
             self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_weights_type)
@@ -234,9 +235,6 @@ class TFLiteSupportedOperators:
             self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_weights_limit)
             self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_bias_type)
             self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_bias_40bit)
-        # Remove stride contraint from Transpose Conv because it has a specific one (see below)
-        for op_type in TFLiteSupportedOperators.transpose_convolution_ops:
-            self.specific_constraints[op_type].remove(TFLiteSupportedOperators.constraint_stride_range)
         # Transpose Conv specific checks:
         for op_type in TFLiteSupportedOperators.transpose_convolution_ops:
             self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_tconv_stride)
@@ -433,15 +431,6 @@ class TFLiteSupportedOperators:
         valid = (stride_min <= w <= stride_max) and (stride_min <= h <= stride_max)
         return valid, f"Op has stride WxH as: {w}x{h}"
 
-    @classmethod
-    @docstring_format_args(dilation_range)
-    def constraint_dilation_range(cls, op):
-        "Dilation factor values for both width and height must be in the range [{}, {}]"
-        w, h = op.get_kernel_dilation()
-        dilation_min, dilation_max = cls.dilation_range
-        valid = (dilation_min <= w <= dilation_max) and (dilation_min <= h <= dilation_max)
-        return valid, f"Op has dilation factor WxH as: {w}x{h}"
-
     @classmethod
     @docstring_format_args(dilated_height_range)
     def constraint_dilated_height_range(cls, op):
-- 
cgit v1.2.1