MLBEDSW-7881: Convert Quantize op to Avgpool op in graph optimiser

This convert is already done in the pass packing stage, but doing it in the graph optimiser stage is better. Change-Id: Ib9baa98d115cf88491ce39936972a93467a378ce Signed-off-by: Johan Gunnarsson <johan.gunnarsson@arm.com>
author: Johan Gunnarsson <johan.gunnarsson@arm.com> 2023-08-10 13:10:44 +0200
committer: Johan Gunnarsson <johan.gunnarsson@arm.com> 2023-08-29 16:54:41 +0200
commit: 985563791a811e1ea3b5137f97e5a5fc4dafd4b1 (patch)
tree: cd1d9a41c9e194e9fcd1fe9ee090d8ef07a640a9
parent: c02eaa3e25840aee4ff909df263d4d0673227c5d (diff)
download: ethos-u-vela-985563791a811e1ea3b5137f97e5a5fc4dafd4b1.tar.gz
2 files changed, 19 insertions, 3 deletions
diff --git a/ethosu/vela/high_level_command_to_npu_op.py b/ethosu/vela/high_level_command_to_npu_op.py
index 79ac392..4384f2c 100644
--- a/ethosu/vela/high_level_command_to_npu_op.py
+++ b/ethosu/vela/high_level_command_to_npu_op.py
@@ -143,7 +143,7 @@ def get_rounding_mode(op: Operation, fused_quantize: bool) -> NpuRoundingMode:
     if op.type.is_resize_op():
         rounding_mode = NpuRoundingMode.NATURAL
     elif (
-        op._original_type.npu_block_type in (NpuBlockType.ConvolutionMxN, NpuBlockType.ConvolutionDepthWise)
+        op.original_type.npu_block_type in (NpuBlockType.ConvolutionMxN, NpuBlockType.ConvolutionDepthWise)
         and op.ifm.dtype == DataType.int16
     ):
         rounding_mode = NpuRoundingMode.NATURAL
@@ -334,7 +334,7 @@ def use_zero_point_0(ps, tens: Tensor, is_ifm_tensor: bool) -> bool:
         return False
     if ps.primary_op.type == Op.AvgPool and ps.primary_op.explicit_scaling:
         return False
-    fused_quantize = any(op.type == Op.Quantize for op in ps.ops)
+    fused_quantize = any(op.type == Op.Quantize or op.original_type == Op.Quantize for op in ps.ops)
     forced_ofm_quantization = ps.primary_op.forced_output_quantization
     use_0 = (
         (
@@ -521,7 +521,7 @@ def set_common_op_fields(npu_op: NpuBlockOperation, cmd: NpuStripe, arch: Archit
     if cmd.weight_tensor is not None:
         npu_op.weights, npu_op.biases = create_weights(cmd.weight_tensor, cmd.weight_box, cmd.scale_tensor, arch)
     npu_op.activation = create_npu_activation(op)
-    npu_op.fused_quantize = any(op.type == Op.Quantize for op in ps.ops)
+    npu_op.fused_quantize = any(op.type == Op.Quantize or op.original_type == Op.Quantize for op in ps.ops)
     npu_op.rounding_mode = get_rounding_mode(op, npu_op.fused_quantize)
     npu_op.block_config = NpuShape3D(height=ps.block_config[0], width=ps.block_config[1], depth=ps.block_config[3])
 
diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py
index ef6b90b..218f499 100644
--- a/ethosu/vela/tflite_graph_optimiser.py
+++ b/ethosu/vela/tflite_graph_optimiser.py
@@ -1685,6 +1685,21 @@ def convert_tanh_sigmoid_to_lut(op: Operation, arch, nng) -> Operation:
     return op
 
 
+def convert_quantize(op: Operation, arch, nng) -> Operation:
+    """Convert Quantize to Avgpool. This conversion only works for int-to-int re-quantization and
+    not to/from floats. Therefor, this rewrite should only run after the supported ops check to
+    avoid rewriting ops that will run on CPU."""
+    if op.type == Op.Quantize:
+        # Create a new AvgPool op and steal its attrs, then reuse the original op with different type
+        avgpool_op = create_avgpool_nop(op.name + "_avgpool")
+        op.type = Op.AvgPool
+        op.attrs = avgpool_op.attrs.copy()
+
+        DebugDatabase.add_optimised(op, op)
+
+    return op
+
+
 def fuse_activation_function_with_prev(op, arch, nng):
     # if op is a no-op: attempts to move the activation function to the preceding op
     if not op.attrs.get("is_nop", False) or op.activation is None:
@@ -2645,6 +2660,7 @@ def tflite_optimise_graph(nng, arch, force_symmetric_int_weights):
         fixup_bias_tensors,
         fixup_asymmetric_weights,
         convert_tanh_sigmoid_to_lut,
+        convert_quantize,
         replace_pad_by_hw_pad,
         fixup_dilation_gt2,
     ]
author	Johan Gunnarsson <johan.gunnarsson@arm.com>	2023-08-10 13:10:44 +0200
committer	Johan Gunnarsson <johan.gunnarsson@arm.com>	2023-08-29 16:54:41 +0200
commit	985563791a811e1ea3b5137f97e5a5fc4dafd4b1 (patch)
tree	cd1d9a41c9e194e9fcd1fe9ee090d8ef07a640a9
parent	c02eaa3e25840aee4ff909df263d4d0673227c5d (diff)
download	ethos-u-vela-985563791a811e1ea3b5137f97e5a5fc4dafd4b1.tar.gz