From 885033b5bf2f6513b438f273b2bc71964f0c6c59 Mon Sep 17 00:00:00 2001
From: Tim Hall <tim.hall@arm.com>
Date: Thu, 21 Jul 2022 11:46:03 +0100
Subject: MLBEDSW-4157: Add RESIZE_NEAREST_NEIGHBOR support

 - Changed ResizeBilinear to support ResizeNearestNeighbor as well for
1x1 IFM, IFM equal OFM, and non-align corners
 - Added support for ResizeNearestNeighbor with align corners by
converting to a DepthwiseConv
 - Updated supported operator unit tests
 - Added is_resize() helper function and some associated refactoring

Signed-off-by: Tim Hall <tim.hall@arm.com>
Change-Id: Id5bdf2a25e8aa6a4f28b7236250abf768141ce37
---
 ethosu/vela/api.py                                 |   2 +-
 ethosu/vela/high_level_command_stream_generator.py |   2 +-
 ethosu/vela/high_level_command_to_npu_op.py        |  30 ++--
 ethosu/vela/operation.py                           |   8 +-
 ethosu/vela/pass_packing.py                        |   5 +-
 ethosu/vela/register_command_stream_generator.py   |   2 +-
 .../vela/test/test_tflite_supported_operators.py   | 160 +++++++++----------
 ethosu/vela/tflite_graph_optimiser.py              | 172 +++++++++++++++------
 ethosu/vela/tflite_mapping.py                      |   2 +-
 ethosu/vela/tflite_supported_operators.py          |  18 +--
 10 files changed, 238 insertions(+), 163 deletions(-)

diff --git a/ethosu/vela/api.py b/ethosu/vela/api.py
index 399fd46d..26ca291d 100644
--- a/ethosu/vela/api.py
+++ b/ethosu/vela/api.py
@@ -374,7 +374,7 @@ class NpuPoolingOperation(NpuBlockOperation):
     def __init__(self, pooling_op_type: NpuPoolingOp):
         super().__init__(NpuOperationType.Pooling)
         self.sub_op_type: NpuPoolingOp = pooling_op_type
-        # Set to a float value for ResizeBilinear operations (affects scaling), else to None
+        # Set to a float value for ResizeBilinear/NearestNeighbor operations (affects scaling), else to None
         self.rescale: Optional[float] = None
 
 
diff --git a/ethosu/vela/high_level_command_stream_generator.py b/ethosu/vela/high_level_command_stream_generator.py
index a52bdc37..7e13b62f 100644
--- a/ethosu/vela/high_level_command_stream_generator.py
+++ b/ethosu/vela/high_level_command_stream_generator.py
@@ -85,7 +85,7 @@ def generate_high_level_commands_for_sched_op(sched_op, schedule):
     upscaling = 1
     if sched_op.op_type == Op.Conv2DBackpropInputSwitchedBias:
         upscaling = ofm_shape.height // ifm.shape.height
-    elif sched_op.op_type == Op.ResizeBilinear:
+    elif sched_op.op_type.is_resize_op():
         upscaling = round_up_divide(ofm_shape.height, ifm.shape.height)
 
     # Get kernel height and height dilation
diff --git a/ethosu/vela/high_level_command_to_npu_op.py b/ethosu/vela/high_level_command_to_npu_op.py
index e6bfc1c4..2ce150fc 100644
--- a/ethosu/vela/high_level_command_to_npu_op.py
+++ b/ethosu/vela/high_level_command_to_npu_op.py
@@ -129,7 +129,7 @@ def ifm_ifm2_correct_order(ifm_shape: List[int], ifm2_shape: List[int]) -> bool:
 def get_rounding_mode(op: Operation, fused_quantize: bool) -> NpuRoundingMode:
     """Specifies type of rounding to be used"""
     rounding_mode = NpuRoundingMode.TFL
-    if op.type == Op.ResizeBilinear:
+    if op.type.is_resize_op():
         rounding_mode = NpuRoundingMode.NATURAL
     elif (
         op.type.npu_block_type in (NpuBlockType.ConvolutionMxN, NpuBlockType.ConvolutionDepthWise)
@@ -201,17 +201,6 @@ def get_mem_limits_for_regions(arch: ArchitectureFeatures) -> Dict[int, int]:
     return mem_limits
 
 
-def get_upscale(op: Operation) -> NpuResamplingMode:
-    upscale = NpuResamplingMode.NONE
-    if op.type == Op.ResizeBilinear:
-        # perform nearest neighbor upscale
-        upscale = NpuResamplingMode.NEAREST
-    elif op.type == Op.Conv2DBackpropInputSwitchedBias:
-        # perform insert zero upscale
-        upscale = NpuResamplingMode.TRANSPOSE
-    return upscale
-
-
 def get_ifm_depth(npu_block_type: NpuBlockType, ifm_box: Box, ofm_box: Box) -> int:
     if npu_block_type in (NpuBlockType.ConvolutionMxN, NpuBlockType.VectorProduct, NpuBlockType.ReduceSum):
         block = ifm_box.get_block()
@@ -224,7 +213,7 @@ def use_zero_point_0(ps, tens: Tensor, is_ifm_tensor: bool) -> bool:
     """Checks if quantization should use 0 as zero point"""
     if tens.dtype == DataType.int32 and is_ifm_tensor:
         return True
-    if ps.primary_op.type not in (Op.AvgPool, Op.ResizeBilinear, Op.CLZ, Op.SHL):
+    if ps.primary_op.type not in (Op.AvgPool, Op.CLZ, Op.SHL) and not ps.primary_op.type.is_resize_op():
         return False
     if ps.primary_op.type == Op.AvgPool and ps.primary_op.explicit_scaling:
         return False
@@ -435,10 +424,9 @@ def create_npu_pool_op(cmd: NpuStripe, arch: ArchitectureFeatures) -> NpuPooling
     """Converts the command to NpuPoolingOperation"""
     ps = cmd.ps
     op = ps.primary_op
-    pool_op = NpuPoolingOp.AVERAGE
     if op.type.is_maxpool_op():
         pool_op = NpuPoolingOp.MAX
-    elif op.type.is_avgpool_op() or op.type == Op.ResizeBilinear:
+    elif op.type.is_avgpool_op() or op.type.is_resize_op():
         pool_op = NpuPoolingOp.AVERAGE
     elif op.type == Op.ReduceSum:
         pool_op = NpuPoolingOp.REDUCE_SUM
@@ -485,18 +473,18 @@ def create_npu_elementwise_op(cmd: NpuStripe, arch: ArchitectureFeatures) -> Npu
     set_common_op_fields(npu_op, cmd, arch)
     # Check if output scale needs to be overridden
     output_scale = None
-    if op.type == Op.Add and "resizebilinear" in op.attrs:
+    if op.type == Op.Add and op.original_type.is_resize_op():
         # Force output scale same as the input scale for
-        # resizebilinear 1x1 that is converted to add
+        # resizebilinear/nearestneighbor 1x1 that is converted to add
         output_scale = npu_op.ifm2.quantization.scale_f32
-    if op.type == Op.Abs:
+    elif op.type == Op.Abs:
         output_scale = npu_op.ifm.quantization.scale_f32 / npu_op.ofm.quantization.scale_f32
-    if op.type == Op.LeakyRelu:
+    elif op.type == Op.LeakyRelu:
         output_scale = op.attrs["alpha"]
-    if op.type in (Op.RescaleAdd, Op.RescaleMul):
+    elif op.type in (Op.RescaleAdd, Op.RescaleMul):
         assert op.rescale is not None, f"{op.type} must have rescale"
         npu_op.rescale = op.rescale
-    if op.type in (Op.Add, Op.Mul, Op.Sub):
+    elif op.type in (Op.Add, Op.Mul, Op.Sub):
         if op.activation is not None and op.activation.op_type in (Op.Sigmoid, Op.Tanh):
             output_scale = 1 / 0x3000
     if output_scale is not None:
diff --git a/ethosu/vela/operation.py b/ethosu/vela/operation.py
index f3eace7e..1a34d0e1 100644
--- a/ethosu/vela/operation.py
+++ b/ethosu/vela/operation.py
@@ -248,8 +248,9 @@ class Op(Enum):
     RescaleAdd = OperatorInfo(block_type=NpuBlockType.ElementWise, indices=NNG_IFM_IFM2_INDICES)
     RescaleMul = OperatorInfo(block_type=NpuBlockType.ElementWise, indices=NNG_IFM_IFM2_INDICES)
     Reshape = OperatorInfo(indices=NNG_IFM_INDICES)
+    # resize ops map to pooling operations unless explicitly converted to other operations in the graph optimiser
     ResizeBilinear = OperatorInfo(block_type=NpuBlockType.Pooling, indices=NNG_IFM_INDICES)
-    ResizeNearestNeighbor = OperatorInfo()
+    ResizeNearestNeighbor = OperatorInfo(block_type=NpuBlockType.Pooling, indices=NNG_IFM_INDICES)
     ReverseSequence = OperatorInfo()
     ReverseV2 = OperatorInfo()
     Rnn = OperatorInfo(block_type=NpuBlockType.VectorProduct, indices=NNG_IFM_WEIGHTS_INDICES)
@@ -364,6 +365,9 @@ class Op(Enum):
     def is_concat_op(self):
         return self in (Op.Concat, Op.ConcatTFLite, Op.PackReshaped, Op.Pack)
 
+    def is_resize_op(self):
+        return self in (Op.ResizeBilinear, Op.ResizeNearestNeighbor)
+
     def needs_bias(self):
         return bool(self.info.indices.biases)
 
@@ -467,6 +471,7 @@ class Operation:
 
     __slots__ = (
         "type",
+        "original_type",
         "name",
         "op_index",
         "attrs",
@@ -497,6 +502,7 @@ class Operation:
 
     def __init__(self, op_type: Op, name: str):
         self.type = op_type
+        self.original_type = op_type
         self.name = name
         self.attrs: Dict[str, Any] = {}
         self.inputs: List[Optional[Tensor]] = []
diff --git a/ethosu/vela/pass_packing.py b/ethosu/vela/pass_packing.py
index 050b0965..988e52e6 100644
--- a/ethosu/vela/pass_packing.py
+++ b/ethosu/vela/pass_packing.py
@@ -61,10 +61,9 @@ mac_main_ops = set(
         Op.AvgPool,
         Op.MaxPool,
         Op.ReduceSum,
-        # deconvolution
-        Op.ResizeBilinear,
     )
-)
+    # resize ops use pooling operations unless explicitly converted to other operations prior to pass packing
+) | Op.op_set(Op.is_resize_op)
 
 binary_elem_wise_main_ops = Op.op_set(Op.is_binary_elementwise_op)
 
diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py
index 12a36caf..a8d1ddff 100644
--- a/ethosu/vela/register_command_stream_generator.py
+++ b/ethosu/vela/register_command_stream_generator.py
@@ -706,7 +706,7 @@ def generate_ofm_scaling_for_pooling(emit: CommandStreamEmitter, pool_op: NpuPoo
             scale = explicit_scaling.multiplier[0]
             shift = explicit_scaling.shift[0]
         else:
-            # for ResizeBilinear operations with rescale
+            # for ResizeBilinear/NearestNeighbor operations with rescale
             rescale = pool_op.rescale
             rescale_bits = len(bin(round_up_to_int(rescale))) - 2 + 1
             scale, shift = scaling.quantise_pooling_scale(kernel.height * kernel.width, rescale_bits)
diff --git a/ethosu/vela/test/test_tflite_supported_operators.py b/ethosu/vela/test/test_tflite_supported_operators.py
index ab12e417..89c27997 100644
--- a/ethosu/vela/test/test_tflite_supported_operators.py
+++ b/ethosu/vela/test/test_tflite_supported_operators.py
@@ -306,84 +306,88 @@ def test_constraint_filter_product_height_range():
     assert not support.is_operator_supported(op)
 
 
-def test_constraint_bilinear_resize():
-    # IFM W and H == 1
-    op = testutil.create_op_with_quant_tensors(Op.ResizeBilinear, [1, 1, 1, 8], [1, 8, 8, 8])
-    op.add_input_tensor(create_const_tensor("size", [2], DataType.int32, [8, 8], np.int32))
-    assert support.is_operator_supported(op)
-
-    # IFM == OFM
-    op = testutil.create_op_with_quant_tensors(Op.ResizeBilinear, [1, 8, 8, 8], [1, 8, 8, 8])
-    op.add_input_tensor(create_const_tensor("size", [2], DataType.int32, [8, 8], np.int32))
-    assert support.is_operator_supported(op)
-
-    # IFM x2 == OFM ; align_corners = False
-    op = testutil.create_op_with_quant_tensors(Op.ResizeBilinear, [1, 4, 4, 8], [1, 8, 8, 8])
-    op.add_input_tensor(create_const_tensor("size", [2], DataType.int32, [8, 8], np.int32))
-    assert support.is_operator_supported(op)
-
-    # IFM x4 == OFM ; align_corners = False
-    op = testutil.create_op_with_quant_tensors(Op.ResizeBilinear, [1, 4, 4, 8], [1, 16, 16, 8])
-    op.add_input_tensor(create_const_tensor("size", [2], DataType.int32, [16, 16], np.int32))
-    assert support.is_operator_supported(op)
-
-    # IFM x8 == OFM ; align_corners = False
-    op = testutil.create_op_with_quant_tensors(Op.ResizeBilinear, [1, 4, 4, 8], [1, 32, 32, 8])
-    op.add_input_tensor(create_const_tensor("size", [2], DataType.int32, [32, 32], np.int32))
-    assert support.is_operator_supported(op)
-
-    # IFM -1 x2 == OFM -1 ; align_corners = True
-    op = testutil.create_op_with_quant_tensors(Op.ResizeBilinear, [1, 4, 4, 8], [1, 7, 7, 8])
-    op.add_input_tensor(create_const_tensor("size", [2], DataType.int32, [7, 7], np.int32))
-    op.attrs["align_corners"] = True
-    assert support.is_operator_supported(op)
-
-    # IFM -1 x4 == OFM -1 ; align_corners = True
-    op = testutil.create_op_with_quant_tensors(Op.ResizeBilinear, [1, 4, 4, 8], [1, 13, 13, 8])
-    op.add_input_tensor(create_const_tensor("size", [2], DataType.int32, [13, 13], np.int32))
-    op.attrs["align_corners"] = True
-    assert support.is_operator_supported(op)
-
-    # IFM -1 x8 == OFM -1 ; align_corners = True
-    op = testutil.create_op_with_quant_tensors(Op.ResizeBilinear, [1, 4, 4, 8], [1, 25, 25, 8])
-    op.add_input_tensor(create_const_tensor("size", [2], DataType.int32, [25, 25], np.int32))
-    op.attrs["align_corners"] = True
-    assert support.is_operator_supported(op)
-
-    # Invalid case - upscale size
-    op = testutil.create_op_with_quant_tensors(Op.ResizeBilinear, [1, 4, 4, 8], [1, 17, 17, 8])
-    op.add_input_tensor(create_const_tensor("size", [2], DataType.int32, [17, 17], np.int32))
-    assert not support.is_operator_supported(op)
-
-    # Invalid case - upscale size with align corners
-    op = testutil.create_op_with_quant_tensors(Op.ResizeBilinear, [1, 4, 4, 8], [1, 15, 15, 8])
-    op.add_input_tensor(create_const_tensor("size", [2], DataType.int32, [15, 15], np.int32))
-    op.attrs["align_corners"] = True
-    assert not support.is_operator_supported(op)
-
-
-def test_constraint_bilinear_resize_size():
-    # Invalid case - size != ofm size
-    op = testutil.create_op_with_quant_tensors(Op.ResizeBilinear, [1, 4, 4, 8], [1, 8, 8, 8])
-    op.add_input_tensor(create_const_tensor("size", [2], DataType.int32, [7, 7], np.int32))
-    assert not support.is_operator_supported(op)
-
-
-def test_constraint_bilinear_resize_attrs():
-    # Invalid case - both align corners and half-pixel centers
-    op = testutil.create_op_with_quant_tensors(Op.ResizeBilinear, [1, 4, 4, 8], [1, 8, 8, 8])
-    op.add_input_tensor(create_const_tensor("size", [2], DataType.int32, [8, 8], np.int32))
-    op.attrs["align_corners"] = True
-    op.attrs["half_pixel_centers"] = True
-    assert not support.is_operator_supported(op)
-
-
-def test_constraint_bilinear_resize_hpc():
-    # Invalid case - half-pixel centers (not supported)
-    op = testutil.create_op_with_quant_tensors(Op.ResizeBilinear, [1, 4, 4, 8], [1, 8, 8, 8])
-    op.add_input_tensor(create_const_tensor("size", [2], DataType.int32, [8, 8], np.int32))
-    op.attrs["half_pixel_centers"] = True
-    assert not support.is_operator_supported(op)
+def test_constraint_resize():
+    for resize_op in Op.op_set(Op.is_resize_op):
+        # IFM W and H == 1
+        op = testutil.create_op_with_quant_tensors(resize_op, [1, 1, 1, 8], [1, 8, 8, 8])
+        op.add_input_tensor(create_const_tensor("size", [2], DataType.int32, [8, 8], np.int32))
+        assert support.is_operator_supported(op)
+
+        # IFM == OFM
+        op = testutil.create_op_with_quant_tensors(resize_op, [1, 8, 8, 8], [1, 8, 8, 8])
+        op.add_input_tensor(create_const_tensor("size", [2], DataType.int32, [8, 8], np.int32))
+        assert support.is_operator_supported(op)
+
+        # IFM x2 == OFM ; align_corners = False
+        op = testutil.create_op_with_quant_tensors(resize_op, [1, 4, 4, 8], [1, 8, 8, 8])
+        op.add_input_tensor(create_const_tensor("size", [2], DataType.int32, [8, 8], np.int32))
+        assert support.is_operator_supported(op)
+
+        # IFM x4 == OFM ; align_corners = False
+        op = testutil.create_op_with_quant_tensors(resize_op, [1, 4, 4, 8], [1, 16, 16, 8])
+        op.add_input_tensor(create_const_tensor("size", [2], DataType.int32, [16, 16], np.int32))
+        assert support.is_operator_supported(op)
+
+        # IFM x8 == OFM ; align_corners = False
+        op = testutil.create_op_with_quant_tensors(resize_op, [1, 4, 4, 8], [1, 32, 32, 8])
+        op.add_input_tensor(create_const_tensor("size", [2], DataType.int32, [32, 32], np.int32))
+        assert support.is_operator_supported(op)
+
+        # IFM -1 x2 == OFM -1 ; align_corners = True
+        op = testutil.create_op_with_quant_tensors(resize_op, [1, 4, 4, 8], [1, 7, 7, 8])
+        op.add_input_tensor(create_const_tensor("size", [2], DataType.int32, [7, 7], np.int32))
+        op.attrs["align_corners"] = True
+        assert support.is_operator_supported(op)
+
+        # IFM -1 x4 == OFM -1 ; align_corners = True
+        op = testutil.create_op_with_quant_tensors(resize_op, [1, 4, 4, 8], [1, 13, 13, 8])
+        op.add_input_tensor(create_const_tensor("size", [2], DataType.int32, [13, 13], np.int32))
+        op.attrs["align_corners"] = True
+        assert support.is_operator_supported(op)
+
+        # IFM -1 x8 == OFM -1 ; align_corners = True
+        op = testutil.create_op_with_quant_tensors(resize_op, [1, 4, 4, 8], [1, 25, 25, 8])
+        op.add_input_tensor(create_const_tensor("size", [2], DataType.int32, [25, 25], np.int32))
+        op.attrs["align_corners"] = True
+        assert support.is_operator_supported(op)
+
+        # Invalid case - upscale size
+        op = testutil.create_op_with_quant_tensors(resize_op, [1, 4, 4, 8], [1, 17, 17, 8])
+        op.add_input_tensor(create_const_tensor("size", [2], DataType.int32, [17, 17], np.int32))
+        assert not support.is_operator_supported(op)
+
+        # Invalid case - upscale size with align corners
+        op = testutil.create_op_with_quant_tensors(resize_op, [1, 4, 4, 8], [1, 15, 15, 8])
+        op.add_input_tensor(create_const_tensor("size", [2], DataType.int32, [15, 15], np.int32))
+        op.attrs["align_corners"] = True
+        assert not support.is_operator_supported(op)
+
+
+def test_constraint_resize_size():
+    for resize_op in Op.op_set(Op.is_resize_op):
+        # Invalid case - size != ofm size
+        op = testutil.create_op_with_quant_tensors(resize_op, [1, 4, 4, 8], [1, 8, 8, 8])
+        op.add_input_tensor(create_const_tensor("size", [2], DataType.int32, [7, 7], np.int32))
+        assert not support.is_operator_supported(op)
+
+
+def test_constraint_resize_attrs():
+    for resize_op in Op.op_set(Op.is_resize_op):
+        # Invalid case - both align corners and half-pixel centers
+        op = testutil.create_op_with_quant_tensors(resize_op, [1, 4, 4, 8], [1, 8, 8, 8])
+        op.add_input_tensor(create_const_tensor("size", [2], DataType.int32, [8, 8], np.int32))
+        op.attrs["align_corners"] = True
+        op.attrs["half_pixel_centers"] = True
+        assert not support.is_operator_supported(op)
+
+
+def test_constraint_resize_half_pixel_centers():
+    for resize_op in Op.op_set(Op.is_resize_op):
+        # Invalid case - half-pixel centers (not supported)
+        op = testutil.create_op_with_quant_tensors(resize_op, [1, 4, 4, 8], [1, 8, 8, 8])
+        op.add_input_tensor(create_const_tensor("size", [2], DataType.int32, [8, 8], np.int32))
+        op.attrs["half_pixel_centers"] = True
+        assert not support.is_operator_supported(op)
 
 
 def test_constraint_concat_pass():
diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py
index d2899c4c..ed8fa1e3 100644
--- a/ethosu/vela/tflite_graph_optimiser.py
+++ b/ethosu/vela/tflite_graph_optimiser.py
@@ -279,10 +279,9 @@ def fixup_conv2d_backprop(op, arch, nng):
 
 
 # Convert the op to an elementwise add
-def convert_resizebilinear_1x1_to_add(op):
-    op.type = Op.Add
+def convert_resize_1x1_to_add(op):
+    op.type = Op.Add  # original_type will stay as Op.ResizeBilinear or Op.ResizeNearestNeighbor
     op.name = op.name + "_add"
-    op.attrs["resizebilinear"] = True
     # Create an input tensor filled with zeros
     shape = op.ofm_shapes[0].as_list()
     tens = Tensor(shape, op.inputs[0].dtype, op.inputs[1].name + "_add")
@@ -301,12 +300,103 @@ def convert_resizebilinear_1x1_to_add(op):
     return op
 
 
-# Convert ResizeBilinear to a number of 2x2 nearest neighbor upscaling and one avgpool op with kernel size dependent
-# on the upscaling factor. Avgpool kernel limit of 8x8 when padding is applied limits upscaling to 8x8.
-def convert_resizebilinear_to_upscale_and_average_pool(op):
+# Convert ResizeNearestNeightbor with align corners to a depthwise convolution. The IFM will already have been upscaled
+# apart from the final x2 scaling which will be done as part of this operation. The kernel contains a single coefficient
+# to select the appropriate nearest neighbor value
+def convert_resizenn_ac_to_depthwise_conv(op, upscale_factor):
+    ifm = op.ifm
+    ofm = op.ofm
+    output_depth = ofm.shape[-1]
+    dw_op_attrs = {
+        "padding": Padding.VALID,
+        "stride_h": 1,
+        "stride_w": 1,
+        "strides": (1, 1, 1, 1),
+        "depth_multiplier": 1,
+        "channel_multiplier": 1,
+        "dilation_h_factor": 1,
+        "dilation_w_factor": 1,
+        "dilation": (1, 1, 1, 1),
+    }
+
+    # change resizebilinear to depthwise
+    op.type = Op.DepthwiseConv2DBias
+    op.attrs.update(dw_op_attrs)
+    op.set_input_tensor(ifm, 0)  # ifm tensor index
+    op.activation = None
+
+    # add input resample to resize by x2
+    op.ifm_resampling_mode = resampling_mode.NEAREST
+
+    # don't care about the rounding mode as it is nearest neighbor
+
+    # setup weight tensor
+    weight_quant = QuantizationParameters()
+    weight_quant.scale_f32 = 1.0  # no scaling as only a single non-zero coeff to select the desired value
+    weight_quant.zero_point = 0
+    weight_quant.quant_dim = 0
+    ofm_dtype = ofm.dtype
+    if ofm_dtype == DataType.uint8:
+        weight_value_dtype = np.uint8
+        weight_quant.quant_min = 0
+        weight_quant.quant_max = (1 << ofm_dtype.bits) - 1
+    else:
+        if ofm_dtype == DataType.int8:
+            weight_value_dtype = np.int8
+        else:
+            assert ofm_dtype == DataType.int16
+            weight_value_dtype = np.int16
+
+        weight_quant.quant_min = -(1 << (ofm_dtype.bits - 1))
+        weight_quant.quant_max = (1 << (ofm_dtype.bits - 1)) - 1
+
+    weight_shape = [upscale_factor, upscale_factor, output_depth, output_depth]  # HWIO
+
+    # the single non-zero coefficient used to select the desired value needs to be placed in the 'centre value', which
+    # is calculated by finding the 'centre position' ('*' in the diagram below) and then choosing the 'value' that is
+    # below-and-right (i.e. next) to it (D).
+    # 0---1---2
+    # | A | B |
+    # 1---*---+
+    # | C | D |
+    # 2---+---+
+    weight_values = [0] * (upscale_factor * upscale_factor)
+    centre_coeff = (upscale_factor // 2) * upscale_factor + (upscale_factor // 2)
+    weight_values[centre_coeff] = 1
+
+    # add weight tensor, this will discard the size tensor of the resize op
+    op.set_input_tensor(
+        create_const_tensor(
+            "weights",
+            weight_shape,
+            ofm.dtype,
+            np.array(weight_values).reshape(weight_shape),
+            value_dtype=weight_value_dtype,
+            quantization=weight_quant,
+        ),
+        1,  # inputs tensor weight index
+    )
+
+    # setup bias tensor by assign None and then call the fix-up function to create a suitable tensor.
+    # need to append the bias tensor as resize ops only have 2 inputs
+    assert len(op.inputs) == 2
+    op.inputs.append(None)
+    fixup_bias_tensors(op, None, None)
+
+    # finally update the shape incase we've change the tensor shapes or connections
+    op.set_ifm_ofm_shapes()
+
+    return op
+
+
+# Convert ResizeBilinear/NearestNeighbor to a number of 1x1 average pools with nearest neighbor x2 upscaling and one
+# final average pool with a kernel size that depends upon the resize ops upscaling factor (x2, x4 or x8). The maximum
+# upscale factor is limited to x8 because of the limit 8x8 kernel size limit for average pool with padding.
+def convert_resize_to_upscale_and_average_pool(op):
     pre_op = op
     outputs = op.outputs
     dtype = op.ifm.dtype
+
     op.attrs.update({"strides": (1, 1, 1, 1), "ksize": (1, 1, 1, 1)})
     op.attrs["padding"] = Padding.SAME  # doesn't really matter as the kernel is 1x1
     op.ifm_resampling_mode = resampling_mode.NEAREST
@@ -321,14 +411,14 @@ def convert_resizebilinear_to_upscale_and_average_pool(op):
     # between different versions of numpy. This consistency ensures that the kernel dimensions are kept integral
     n = int(np.log2(upscale_factor))
 
-    # Perform 2x2 upscaling n-1 times
+    # Perform x2 upscaling n-1 times
     scaled_op = pre_op
     for count in range(n - 1):
         if count > 0:
             scaled_op = op.clone(f"_{count}")
             scaled_op.inputs[0] = pre_op.outputs[0]
 
-        # Nearest neighbor 2x2 upscaling
+        # Nearest neighbor x2 upscaling
         upscaled_shape = upscaled_shape * 2
         shape = op.ofm_shapes[0].as_list()
         shape[1:3] = upscaled_shape
@@ -339,17 +429,30 @@ def convert_resizebilinear_to_upscale_and_average_pool(op):
 
         scaled_op.set_ifm_ofm_shapes()
 
-    # Last 2x2 upscaling also applies avgpool with kernel size dependent on the upscaling factor and adds
-    # padding to the right and bottom.
+    # Last x2 upscaling
     if n > 1:
         scaled_op = op.clone(f"_{n-1}")
         scaled_op.inputs[0] = pre_op.outputs[0]
-    if op.attrs["align_corners"]:
-        scaled_op.attrs["padding"] = Padding.VALID
-    else:
-        scaled_op.attrs["padding"] = Padding.EXPLICIT
-        scaled_op.attrs["explicit_padding"] = [0, 0, upscale_factor - 1, upscale_factor - 1]
-    scaled_op.attrs.update({"ksize": (1, upscale_factor, upscale_factor, 1)})
+
+    if scaled_op.original_type == Op.ResizeBilinear:
+        if scaled_op.attrs["align_corners"]:
+            # no padding
+            scaled_op.attrs["padding"] = Padding.VALID
+        else:
+            # padding to the right and bottom (limits average pool to 8x8 kernel)
+            scaled_op.attrs["padding"] = Padding.EXPLICIT
+            scaled_op.attrs["explicit_padding"] = [0, 0, upscale_factor - 1, upscale_factor - 1]
+
+        # kernal size dependent on the upscaling factor
+        scaled_op.attrs.update({"ksize": (1, upscale_factor, upscale_factor, 1)})
+    else:  # Op.ResizeNearestNeighbor
+        if scaled_op.attrs["align_corners"]:
+            # use depthwise conv to select the correct value
+            scaled_op = convert_resizenn_ac_to_depthwise_conv(scaled_op, upscale_factor)
+        else:
+            # keep 1x1 kernel and average pool
+            pass
+
     scaled_op.outputs = outputs
     scaled_op.outputs[0].ops = [scaled_op]
     scaled_op.set_ifm_ofm_shapes()
@@ -357,16 +460,16 @@ def convert_resizebilinear_to_upscale_and_average_pool(op):
     return op
 
 
-def fixup_resizebilinear(op, arch, nng):
-    if op.type == Op.ResizeBilinear and op.run_on_npu:
+def fixup_resize(op, arch, nng):
+    if op.type.is_resize_op() and op.run_on_npu:
         if op.ifm_shapes[0] == op.ofm_shapes[0]:
-            # Bypass nop resizebilinear
+            # Bypass the resize op which is essentially a NOP
             op.inputs = op.inputs[:1]
             op.type = Op.Identity
         elif op.ifm_shapes[0].height == 1 and op.ifm_shapes[0].width == 1:
-            convert_resizebilinear_1x1_to_add(op)
+            convert_resize_1x1_to_add(op)
         else:
-            convert_resizebilinear_to_upscale_and_average_pool(op)
+            convert_resize_to_upscale_and_average_pool(op)
 
     return op
 
@@ -1130,31 +1233,6 @@ def convert_pad(op: Operation, arch, nng):
     return avgpool_op
 
 
-def add_attrs_to_resizebilinear(op, arch, nng):
-    if op.type == Op.ResizeBilinear and op.run_on_npu:
-        input_shape = op.ifm_shapes[0]
-        upscaled_height = input_shape.height * 2
-        upscaled_width = input_shape.width * 2
-        out_shape = op.ofm_shapes[0]
-        if not op.attrs["align_corners"] and out_shape.height == upscaled_height and out_shape.width == upscaled_width:
-            # this means the output is supposed to be a x2 upscale,
-            # so we need to do SAME padding
-            op.attrs["padding"] = Padding.SAME
-        elif (
-            op.attrs["align_corners"]
-            and out_shape.height == (upscaled_height - 1)
-            and out_shape.width == (upscaled_width - 1)
-        ):
-            # here we can just run the avg pool without padding and
-            # produce a (M * 2 - 1, N * 2 - 1) sized output
-            op.attrs["padding"] = Padding.VALID
-        else:
-            return op
-        op.ifm_resampling_mode = resampling_mode.NEAREST
-        op.attrs.update({"strides": (1, 1, 1, 1), "ksize": (1, 2, 2, 1)})
-    return op
-
-
 def fixup_bias_tensors(op, arch, nng):
     if op.type.needs_bias() and op.bias is None:
         # Op has no bias, add bias tensor filled with zeros
@@ -1577,7 +1655,7 @@ def tflite_optimise_graph(nng, arch):
         fixup_conv2d_backprop,
         fixup_relus_with_differing_ifm_ofm_scaling,
         reorder_depthwise_weights,
-        fixup_resizebilinear,
+        fixup_resize,
         fixup_bias_tensors,
         fixup_asymmetric_weights,
         convert_mul_max_to_abs_or_lrelu,
diff --git a/ethosu/vela/tflite_mapping.py b/ethosu/vela/tflite_mapping.py
index bf155b9c..39b08b9e 100644
--- a/ethosu/vela/tflite_mapping.py
+++ b/ethosu/vela/tflite_mapping.py
@@ -799,7 +799,7 @@ builtin_operator_map = {
     BuiltinOperator.RESIZE_NEAREST_NEIGHBOR: (
         Op.ResizeNearestNeighbor,
         OptionsSerializer("ResizeNearestNeighborOptions", ("align_corners", "half_pixel_centers")),
-        TFLITE_NO_INDICES,
+        TFLITE_IFM_INDICES,
     ),
     BuiltinOperator.LEAKY_RELU: (Op.LeakyRelu, OptionsSerializer("LeakyReluOptions", ("alpha",)), TFLITE_IFM_INDICES),
     BuiltinOperator.SQUARED_DIFFERENCE: (
diff --git a/ethosu/vela/tflite_supported_operators.py b/ethosu/vela/tflite_supported_operators.py
index 01d2e61f..90d93d0f 100644
--- a/ethosu/vela/tflite_supported_operators.py
+++ b/ethosu/vela/tflite_supported_operators.py
@@ -58,7 +58,7 @@ class TFLiteSupportedOperators:
     max_pooling_ops = Op.op_set(Op.is_maxpool_op)
     avg_pooling_ops = Op.op_set(Op.is_avgpool_op)
     pooling_ops = set((Op.ReduceSum,)) | max_pooling_ops | avg_pooling_ops
-    resizing_ops = set((Op.ResizeBilinear,))
+    resizing_ops = Op.op_set(Op.is_resize_op)
     fc_vector_products = set(
         (
             Op.QuantizedMatMul,
@@ -242,10 +242,10 @@ class TFLiteSupportedOperators:
 
         # Resizing specific checks:
         for op_type in TFLiteSupportedOperators.resizing_ops:
-            self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_bilinear_resize)
-            self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_bilinear_resize_size)
-            self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_bilinear_resize_attrs)
-            self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_bilinear_resize_hpc)
+            self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_resize)
+            self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_resize_size)
+            self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_resize_attrs)
+            self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_resize_half_pixel_centers)
 
         # Vector Product specific checks:
         for op_type in TFLiteSupportedOperators.fc_vector_products:
@@ -589,7 +589,7 @@ class TFLiteSupportedOperators:
         return True, "Op has padding=SAME"
 
     @staticmethod
-    def constraint_bilinear_resize(op):
+    def constraint_resize(op):
         """The width and height of the IFM and OFM must match one of the following criteria:
         IFM W and H must both be 1
         IFM must match OFM
@@ -625,7 +625,7 @@ class TFLiteSupportedOperators:
         return valid, f"Op has ifm_shape={ifm_shape}, ofm_shape={ofm_shape} and align_corners={align_corners}"
 
     @staticmethod
-    def constraint_bilinear_resize_size(op):
+    def constraint_resize_size(op):
         "The size tensor must match the output tensor shape"
         valid = False
         ofm_shape = op.ofm.shape
@@ -640,7 +640,7 @@ class TFLiteSupportedOperators:
         return valid, f"Op has size={size_h}x{size_w} and ofm_shape={ofm_shape}."
 
     @staticmethod
-    def constraint_bilinear_resize_attrs(op):
+    def constraint_resize_attrs(op):
         "Both align_corners and half_pixel_centers can't be True"
         valid = True
         align_corners = op.attrs.get("align_corners", False)
@@ -651,7 +651,7 @@ class TFLiteSupportedOperators:
         return valid, "Op has both align_corners and half_pixel_centers set to True."
 
     @staticmethod
-    def constraint_bilinear_resize_hpc(op):
+    def constraint_resize_half_pixel_centers(op):
         "half_pixel_centers are not supported"
         valid = True
         if op.attrs.get("half_pixel_centers", False):
-- 
cgit v1.2.1