2 files changed, 16 insertions, 5 deletions
diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py
index 0630ef41..6b454e3d 100644
--- a/ethosu/vela/tflite_graph_optimiser.py
+++ b/ethosu/vela/tflite_graph_optimiser.py
@@ -385,7 +385,7 @@ def convert_resizenn_ac_to_depthwise_conv(op, upscale_factor):
     # need to append the bias tensor as resize ops only have 2 inputs
     assert len(op.inputs) == 2
     op.inputs.append(None)
-    fixup_bias_tensors(op, None, None)
+    fixup_bias_tensors(op, None, None, DataType.int32)
 
     # finally update the shape incase we've change the tensor shapes or connections
     op.set_ifm_ofm_shapes()
@@ -1324,7 +1324,7 @@ def replace_pad_by_hw_pad(op: Operation, arch, nng):
             op.add_input_tensor(weight_tens)
             # Add bias tensor, all biases set to 0
             op.inputs.append(None)
-            fixup_bias_tensors(op, arch, nng)
+            fixup_bias_tensors(op, arch, nng, DataType.int32)
             # Add other inputs
             op.inputs.extend(other_inputs)
             op.rounding_mode = NpuRoundingMode.NATURAL
@@ -1407,12 +1407,22 @@ def convert_pad(op: Operation, arch, nng):
     return avgpool_op
 
 
-def fixup_bias_tensors(op, arch, nng):
+def fixup_bias_tensors(op, arch, nng, dtype=None):
     if op.type.needs_bias() and op.bias is None:
         # Op has no bias, add bias tensor filled with zeros
         nr_biases = op.inputs[1].shape[-1]
         bias_values = [0] * nr_biases
-        bias_tensor = create_const_tensor(op.name + "_bias", [nr_biases], DataType.int32, bias_values)
+        # The DataType of the bias tensor can be explicitly provided or deduced from the ifm
+        # DataType. Default is int32 bias for 8-bit ifms and int64 for int16 ifms.
+        # For int16 the selected bias DataType will have an impact on the scaling
+        # used when encoding the scales and biases later. The default mode will match the
+        # refence with reduced scaling for int64 bias.
+        # This means that in cases (in the graph optimiser) where DepthwiseConv2DBias
+        # is used to emulate average pool int32 bias should be selected for full precision
+        # int16 scaling.
+        if dtype is None:
+            dtype = DataType.int64 if op.ifm.dtype == DataType.int16 else DataType.int32
+        bias_tensor = create_const_tensor(op.name + "_bias", [nr_biases], dtype, bias_values)
         op.set_input_tensor(bias_tensor, op.type.info.indices.biases[0])
 
     return op
diff --git a/ethosu/vela/weight_compressor.py b/ethosu/vela/weight_compressor.py
index 78c43511..db225fb6 100644
--- a/ethosu/vela/weight_compressor.py
+++ b/ethosu/vela/weight_compressor.py
@@ -275,7 +275,8 @@ def _prepare_scale_and_bias(arch, tens, rescale_for_faf, explicit_scaling):
         quantised_scales = [(int(m), int(s)) for s, m in zip(explicit_scaling.shift, explicit_scaling.multiplier)]
     else:
         # quantise all of the weight scales into (scale_factor, shift)
-        if ifm_dtype == DataType.int16:
+        if ifm_dtype == DataType.int16 and bias_tens.dtype == DataType.int64:
+            # Reference uses reduced scaling for int16 with int64 bias
             quantised_scales = [reduced_quantise_scale(scale) for scale in scales]
         else:
             quantised_scales = [quantise_scale(scale) for scale in scales]