From 9a03fdff316662be69a1adc4e391e43bc6519b08 Mon Sep 17 00:00:00 2001
From: Charles Xu <charles.xu@arm.com>
Date: Thu, 2 Jul 2020 15:12:40 +0200
Subject: MLBEDSW-2569:Support 1x1 IFM ResizeBilinear

Signed-off-by: Charles Xu <charles.xu@arm.com>
Change-Id: I44428d77b2e8e44a477e5c4dfe28ab8dd1792838
---
 ethosu/vela/graph_optimiser.py                   | 38 ++++++++++++++++++++++--
 ethosu/vela/npu_serialisation.py                 |  5 ++--
 ethosu/vela/register_command_stream_generator.py | 17 ++++++++++-
 ethosu/vela/supported_operators.py               |  2 ++
 4 files changed, 57 insertions(+), 5 deletions(-)

diff --git a/ethosu/vela/graph_optimiser.py b/ethosu/vela/graph_optimiser.py
index c805be50..355b16ff 100644
--- a/ethosu/vela/graph_optimiser.py
+++ b/ethosu/vela/graph_optimiser.py
@@ -27,6 +27,7 @@ from .ethos_u55_regs.ethos_u55_regs import resampling_mode
 from .numeric_util import full_shape
 from .operation import NpuBlockType
 from .operation import Operation
+from .tensor import QuantizationParameters
 from .tensor import Tensor
 
 passthrough_nodes = set(("Identity",))
@@ -181,6 +182,39 @@ def fixup_conv2d_backprop(op, arch):
     return op
 
 
+# Convert the op to an elementwise add
+def convert_resizebilinear_1x1_to_add(op):
+    op.type = "AddAct"
+    op.name = op.name + "_add"
+    op.attrs.update({"npu_block_type": NpuBlockType.ElementWise})
+    op.attrs["resizebilinear"] = True
+    # Create an input tensor filled with zeros
+    shape = op.outputs[0].shape
+    tens = Tensor(shape, op.inputs[0].dtype, op.inputs[1].name + "_add")
+    tens.values = np.zeros(shape)
+    tens.quant_values = np.zeros(shape, np.uint8)
+    tens.quantization = QuantizationParameters(0.0, 255.0)
+    tens.quantization.scale_f32 = 1.0
+    tens.quantization.zero_point = 0
+    tens.consumer_list = [op]
+    tens_op = op.inputs[1].ops[0]
+    tens_op.outputs = [tens]
+    tens.ops = [tens_op]
+    # Set the add inputs
+    op.inputs[1] = op.inputs[0]
+    op.inputs[0] = tens
+
+    return op
+
+
+def fixup_resizebilinear(op, arch):
+    if op.type == "ResizeBilinear":
+        if op.inputs[0].shape[1] == 1 and op.inputs[0].shape[2] == 1:
+            convert_resizebilinear_1x1_to_add(op)
+
+    return op
+
+
 def fixup_fully_connected_input(op, arch):
     if op.type == "FullyConnectedAct":
         inp = op.inputs[0]
@@ -614,8 +648,7 @@ def add_attrs_to_resizebilinear(op, arch):
             # produce a (M * 2 - 1, N * 2 - 1) sized output
             op.attrs["padding"] = b"VALID"
         else:
-            # If this exception is raised, something is wrong with the supported op check
-            raise UnsupportedFeatureError("Unsupported upscaling factor")
+            return op
         input_tensor.resampling_mode = resampling_mode.NEAREST
         op.attrs.update({"strides": (1, 1, 1, 1), "ksize": (1, 2, 2, 1)})
     return op
@@ -647,6 +680,7 @@ def optimise_graph_a(nng, arch, verbose_graph=False):
         mark_npu_block_type,
         fixup_elementwise_with_scalars,
         reorder_depthwise_weights,
+        fixup_resizebilinear,
         # convert_mul_max_to_abs_or_lrelu # TODO: enable optimisation once quantisation issues are resolved
     ]
 
diff --git a/ethosu/vela/npu_serialisation.py b/ethosu/vela/npu_serialisation.py
index 2d1c6b10..4b5a888f 100644
--- a/ethosu/vela/npu_serialisation.py
+++ b/ethosu/vela/npu_serialisation.py
@@ -51,8 +51,9 @@ def copy_compressed_values_to_memory_tensor(memory_tensor, src_tensor):
 
 def copy_ifm_values_to_memory_tensor(memory_tensor, src_tensor):
     start_addr = src_tensor.address
-    end_addr = start_addr + src_tensor.quant_values.size
-    memory_tensor.values[start_addr:end_addr] = src_tensor.quant_values
+    values = src_tensor.quant_values.flatten()
+    end_addr = start_addr + values.size
+    memory_tensor.values[start_addr:end_addr] = values
 
 
 def serialise_npu_subgraph_into_tensors(nng, sg, arch, scratch_tens, scratch_fast_tens, flash_tens):
diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py
index d32766b0..38b40ba5 100644
--- a/ethosu/vela/register_command_stream_generator.py
+++ b/ethosu/vela/register_command_stream_generator.py
@@ -478,6 +478,11 @@ def generate_register_command_stream(nng, sg, arch, verbose=False):
                         if (faf == "Sigmoid") or (faf == "Tanh"):
                             output_scale = 1 / 0x3000
 
+                        # Force output scale same as the input scale for
+                        # resizebiliner 1x1 that is converted to add
+                        if "resizebilinear" in primary_op.attrs:
+                            output_scale = input2_scale
+
                         if input_scale == input2_scale:
                             opa_scale, opb_scale, ofm_scale, shift = scaling.simplified_elementwise_add_sub_scale(
                                 input_scale, input2_scale, output_scale
@@ -860,7 +865,17 @@ def generate_register_command_stream(nng, sg, arch, verbose=False):
                     emit.cmd0_with_param(zero_point_op, 0)
                 else:
                     assert tens.quantization.zero_point is not None, "need an actual zero point set"
-                    emit.cmd0_with_param(zero_point_op, int(tens.quantization.zero_point))
+                    if (
+                        "resizebilinear" in primary_op.attrs
+                        and primary_op.type == "AddAct"
+                        and cmd0.NPU_SET_OFM_ZERO_POINT == zero_point_op
+                    ):
+                        # Force output zero point same as the input zero point
+                        # for resizebiliner 1x1 that is converted to add
+                        zero_point = cmd.ifm2_tensor.quantization.zero_point
+                    else:
+                        zero_point = tens.quantization.zero_point
+                    emit.cmd0_with_param(zero_point_op, int(zero_point))
 
                 if tens.shape == []:
                     # Empty shape, elementwise constant
diff --git a/ethosu/vela/supported_operators.py b/ethosu/vela/supported_operators.py
index e8e8d852..3ec3429a 100644
--- a/ethosu/vela/supported_operators.py
+++ b/ethosu/vela/supported_operators.py
@@ -236,6 +236,8 @@ class SupportedOperators:
     def check_resize_restrictions(self, op):
         # check unsupported upscaling factor
         if op.type == "ResizeBilinear":
+            if op.inputs[0].shape[1] == 1 and op.inputs[0].shape[2] == 1:
+                return True
             upscaled_shape = [op.inputs[0].shape[1] * 2, op.inputs[0].shape[2] * 2]
             out_shape = op.outputs[0].shape[1:3]
             if not op.attrs["align_corners"] and out_shape != upscaled_shape:
-- 
cgit v1.2.1