From 79a89048a7c500384b66535c5ba77d31ee6cbdde Mon Sep 17 00:00:00 2001
From: wilisa01 <william.isaksson@arm.com>
Date: Wed, 2 Nov 2022 17:18:43 +0000
Subject: MLBEDSW-6915: MLCE - Missing operators in Debug DB

- Adds missing operators and type conversion recording to DebugDB

Change-Id: If76b0b430bbe73ae1469024c3160ecf0eea26abe
Signed-off-by: wilisa01 <william.isaksson@arm.com>
---
 ethosu/vela/compiler_driver.py        |  2 +-
 ethosu/vela/debug_database.py         | 79 ++++++++++++++++++++++-------------
 ethosu/vela/graph_optimiser_util.py   |  6 +--
 ethosu/vela/tflite_graph_optimiser.py | 19 +++++++--
 4 files changed, 69 insertions(+), 37 deletions(-)

diff --git a/ethosu/vela/compiler_driver.py b/ethosu/vela/compiler_driver.py
index 014d12ae..439b954a 100644
--- a/ethosu/vela/compiler_driver.py
+++ b/ethosu/vela/compiler_driver.py
@@ -134,7 +134,7 @@ def next_sram_factor(alloc_results):
 
 
 def _record_operator(op, arch):
-    if op.type != Op.Const:
+    if op.type not in (Op.Const, Op.Placeholder):
         DebugDatabase.add_source(op)
 
 
diff --git a/ethosu/vela/debug_database.py b/ethosu/vela/debug_database.py
index bd45b468..c1389e51 100644
--- a/ethosu/vela/debug_database.py
+++ b/ethosu/vela/debug_database.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright 2020-2021 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# SPDX-FileCopyrightText: Copyright 2020-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
 #
 # SPDX-License-Identifier: Apache-2.0
 #
@@ -61,39 +61,58 @@ class DebugDatabase:
             [uid, str(op.type), op.kernel.width, op.kernel.height, ofm_shape[-2], ofm_shape[-3], ofm_shape[-1]]
         )
 
+    # Ops are added when their type changes, and after optimisation. If an op was already 
+    # added before optimisation was finished it will only be added again if it's entry
+    # has changed in any way from it's previous entry.
     @classmethod
     def add_optimised(cls, parent: Operation, op: Operation):
         assert isinstance(parent, Operation) and isinstance(op, Operation)
-        if op not in cls._optimisedUID:
-            if parent not in cls._sourceUID:
-                # The the parent wasn't in the source network try to look it
-                # up in the optimised network and use that op's source parent.
-                if parent in cls._optimisedUID:
-                    src_uid = cls._optimisedUID[parent][1]
-                else:
-                    if DebugDatabase.show_warnings:
-                        print("Debug Database: Associated parent '{0}' not in network".format(parent.type))
-                    src_uid = DebugDatabase.NULLREF
-            else:
-                src_uid = cls._sourceUID[parent]
-            uid = len(cls._optimisedUID)
-            cls._optimisedUID[op] = (uid, src_uid)
-            if len(op.ofm_shapes) == 0:
-                ofm_shape = Shape4D(op.outputs[0].shape)
+        if parent not in cls._sourceUID:
+            # If the parent wasn't in the source network try to look it
+            # up in the optimised network and use that op's source parent.
+            if parent in cls._optimisedUID:
+                src_uid = cls._optimisedUID[parent][1]
             else:
-                ofm_shape = op.ofm_shapes[0]
-            cls._optimisedTable.append(
-                [
-                    uid,
-                    src_uid,
-                    str(op.type),
-                    op.kernel.width,
-                    op.kernel.height,
-                    ofm_shape.width,
-                    ofm_shape.height,
-                    ofm_shape.depth,
-                ]
-            )
+                if DebugDatabase.show_warnings:
+                    print("Debug Database: Associated parent '{0}' not in network".format(parent.type))
+                src_uid = DebugDatabase.NULLREF
+        else:
+            src_uid = cls._sourceUID[parent]
+
+        # correction for missing shapes
+        if len(op.ofm_shapes) == 0:
+            ofm_shape = Shape4D(op.outputs[0].shape)
+        else:
+            ofm_shape = op.ofm_shapes[0]
+
+        next_uid = len(cls._optimisedTable)  # required because no longer 1:1 UID->table correspondence
+        opt_uid = cls._optimisedUID.get(op, (next_uid, 0))[0]  # already seen or next uid (if not seen)
+
+        opt_table_entry = [
+            opt_uid,
+            src_uid,
+            str(op.type),
+            op.kernel.width,
+            op.kernel.height,
+            ofm_shape.width,
+            ofm_shape.height,
+            ofm_shape.depth,
+        ]
+
+        if op not in cls._optimisedUID:
+            # optimised op does not exist
+            cls._optimisedUID[op] = (next_uid, src_uid)
+            cls._optimisedTable.append(opt_table_entry)
+        else:
+            # optimised op already exists
+            existing_entry = cls._optimisedTable[
+                cls._optimisedUID[op][0]
+            ]  # Existing entry is where the 'op' object was last inserted
+            if opt_table_entry != existing_entry:
+                # only add again if it's changed in any way
+                opt_table_entry[0] = next_uid  # give it a new unique id (required)
+                cls._optimisedUID[op] = (next_uid, src_uid)
+                cls._optimisedTable.append(opt_table_entry)
 
     @classmethod
     def add_stream(cls, key):
diff --git a/ethosu/vela/graph_optimiser_util.py b/ethosu/vela/graph_optimiser_util.py
index fde01cfe..4e233c44 100644
--- a/ethosu/vela/graph_optimiser_util.py
+++ b/ethosu/vela/graph_optimiser_util.py
@@ -257,7 +257,6 @@ def move_splitsliceread_to_consumer(op, cons_op):
         cons_op.read_shapes[1] = op.read_shapes[0]
         cons_op.set_input_tensor(op.ifm, cons_op.type.info.indices.ifms[1])
         cons_op.ifm_shapes[1] = op.ifm_shapes[0]
-
     op.ofm.consumer_list.remove(cons_op)
     op.ofm.ops = []
     op.ifm.consumer_list.remove(op)
@@ -270,7 +269,7 @@ def check_memory_only_removed(op, arch):
 
 
 def record_optimised(op, arch):
-    if op.type != Op.Const:
+    if op.type not in (Op.Const, Op.Placeholder):
         DebugDatabase.add_optimised(op, op)
 
 
@@ -392,12 +391,12 @@ def convert_depthwise_to_conv(op, arch, nng):
 
             weight_tensor.values = np.transpose(weight_tensor.values, (0, 1, 3, 2))
             weight_tensor.set_all_shapes(list(weight_tensor.values.shape))
+            DebugDatabase.add_optimised(op, op)
         else:
             raise UnsupportedFeatureError(
                 f"Unsupported 'DEPTHWISE_CONV_2D' with depth_multiplier = {op.attrs['depth_multiplier']},",
                 f" ifm channels = {ifm_shape.depth}, ofm channels = {ofm_shape.depth}",
             )
-        DebugDatabase.add_optimised(op, op)
     return op
 
 
@@ -426,4 +425,5 @@ def convert_to_lut(op, lut_values, lut_name):
     lut_tensor = lut.create_lut_tensor(op.name + "_values", lut_values, DataType.int8)
     op.set_activation_lut(lut_tensor)
     op.set_ifm_ofm_shapes()
+    DebugDatabase.add_optimised(op, op)
     return op
diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py
index b858f648..36c1de5a 100644
--- a/ethosu/vela/tflite_graph_optimiser.py
+++ b/ethosu/vela/tflite_graph_optimiser.py
@@ -76,10 +76,10 @@ def create_avg_pool_for_concat(concat_op, name, ifm, ifm_shape: Shape4D, write_o
     avgpool_op.write_offset = write_offset
     avgpool_op.write_shape = ifm_shape
     ofm.ops.append(avgpool_op)
-    DebugDatabase.add_optimised(concat_op, avgpool_op)
     avgpool_op.ifm_shapes.append(ifm_shape)
     avgpool_op.ofm_shapes.append(concat_op.ofm_shapes[0])
     avgpool_op.memory_function = Op.ConcatSliceWrite
+    DebugDatabase.add_optimised(concat_op, avgpool_op)
     return avgpool_op
 
 
@@ -279,6 +279,7 @@ def fixup_conv2d_backprop(op, arch, nng):
 
         # Update strides
         op.attrs.update({"stride_w": 1, "stride_h": 1, "strides": (1, 1, 1, 1)})
+        DebugDatabase.add_optimised(op, op)
 
     return op
 
@@ -301,6 +302,7 @@ def convert_resize_1x1_to_add(op):
     op.inputs[1] = op.inputs[0]
     op.inputs[0] = tens
     op.set_ifm_ofm_shapes()
+    DebugDatabase.add_optimised(op, op)
 
     return op
 
@@ -390,6 +392,7 @@ def convert_resizenn_ac_to_depthwise_conv(op, upscale_factor):
 
     # finally update the shape incase we've change the tensor shapes or connections
     op.set_ifm_ofm_shapes()
+    DebugDatabase.add_optimised(op, op)
 
     return op
 
@@ -433,6 +436,7 @@ def convert_resize_to_upscale_and_average_pool(op):
         pre_op = scaled_op
 
         scaled_op.set_ifm_ofm_shapes()
+        DebugDatabase.add_optimised(op, scaled_op)
 
     # Last x2 upscaling
     if n > 1:
@@ -463,6 +467,7 @@ def convert_resize_to_upscale_and_average_pool(op):
     scaled_op.outputs = outputs
     scaled_op.outputs[0].ops = [scaled_op]
     scaled_op.set_ifm_ofm_shapes()
+    DebugDatabase.add_optimised(op, scaled_op)
 
     return op
 
@@ -531,6 +536,7 @@ def convert_resizebilinear_to_depthwise_convolutions(op, half_pixel_centers=True
         avgpool_op.add_input_tensor(ifm)
         avgpool_op.set_output_tensor(intermediate_tens)
         avgpool_op.set_ifm_ofm_shapes()
+        DebugDatabase.add_optimised(op, op)
 
         dw_conv = Operation(Op.DepthwiseConv2DBias, "depthwise_conv")
         dw_conv._original_type = Op.ResizeBilinear
@@ -592,6 +598,8 @@ def convert_resizebilinear_to_depthwise_convolutions(op, half_pixel_centers=True
                 fixup_bias_tensors(dw_conv, None, None, dtype=DataType.int32)
 
                 dw_conv.set_ifm_ofm_shapes()
+                DebugDatabase.add_optimised(op, dw_conv)
+
                 dw_conv = dw_conv.clone(f"_{index}")
         return op
 
@@ -674,6 +682,7 @@ def unfuse_activation_function(op):
         act_op.add_input_tensor(intermediate_tens)
         op.set_output_tensor(intermediate_tens)
         act_op.set_ifm_ofm_shapes()
+        DebugDatabase.add_optimised(op, act_op)
 
 
 def rewrite_stridedslice_output(op, arch, nng):
@@ -955,6 +964,7 @@ def convert_prelu(op, arch, nng):
                     fm_id = ofm.clone(op.name + "_id", set_unique=True)
                     mul_identity.set_output_tensor(fm_id)
                     mul_identity.set_ifm_ofm_shapes()
+                    DebugDatabase.add_optimised(op, mul_identity)
 
                 # Combine scaled and alpha multiplied values
                 max_op = Operation(Op.Maximum, op.name + "_max")
@@ -1389,7 +1399,7 @@ def fuse_activation_function_with_prev(op, arch, nng):
         prev_op.set_activation_lut(op.activation_lut)
     # Bypass op
     prev_op.set_output_tensor(ofm)
-    DebugDatabase.add_optimised(op, prev_op)
+    DebugDatabase.add_optimised(prev_op, prev_op)
     return op
 
 
@@ -1482,6 +1492,8 @@ def replace_pad_by_hw_pad(op: Operation, arch, nng):
         op.attrs["padding"] = Padding.EXPLICIT
         op.attrs["explicit_padding"] = (top, left, bottom, right)
         op.set_ifm_ofm_shapes()
+        DebugDatabase.add_optimised(op, op)
+
     return op
 
 
@@ -1682,6 +1694,7 @@ def convert_mean_to_depthwise_conv_or_avgpool(op, arch, nng):
 
         # If the AvgPool version is used, we don't need to do anything else
         if op.type == Op.AvgPool:
+            DebugDatabase.add_optimised(op, op)
             return op
 
         # Make unit weight tensor quantization
@@ -1712,6 +1725,7 @@ def convert_mean_to_depthwise_conv_or_avgpool(op, arch, nng):
         # Add bias tensor
         bias_shape = [shape[-1]]
         op.inputs.append(create_const_tensor("bias", bias_shape, DataType.int32, np.ones(bias_shape) * bias))
+        DebugDatabase.add_optimised(op, op)
 
     return op
 
@@ -1803,7 +1817,6 @@ def convert_shape_op_to_constant_tensor(op: Operation, arch, nng):
 
         # Convert this SHAPE op to const
         op.type = Op.Const
-        DebugDatabase.add_optimised(op, op)
 
         # Add size calculation to shape output tensors
         ofm.values = np.array(ifm.shape)
-- 
cgit v1.2.1