aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ethosu/vela/tflite_graph_optimiser.py18
-rw-r--r--ethosu/vela/weight_compressor.py3
2 files changed, 16 insertions, 5 deletions
diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py
index 0630ef41..6b454e3d 100644
--- a/ethosu/vela/tflite_graph_optimiser.py
+++ b/ethosu/vela/tflite_graph_optimiser.py
@@ -385,7 +385,7 @@ def convert_resizenn_ac_to_depthwise_conv(op, upscale_factor):
# need to append the bias tensor as resize ops only have 2 inputs
assert len(op.inputs) == 2
op.inputs.append(None)
- fixup_bias_tensors(op, None, None)
+ fixup_bias_tensors(op, None, None, DataType.int32)
# finally update the shape incase we've change the tensor shapes or connections
op.set_ifm_ofm_shapes()
@@ -1324,7 +1324,7 @@ def replace_pad_by_hw_pad(op: Operation, arch, nng):
op.add_input_tensor(weight_tens)
# Add bias tensor, all biases set to 0
op.inputs.append(None)
- fixup_bias_tensors(op, arch, nng)
+ fixup_bias_tensors(op, arch, nng, DataType.int32)
# Add other inputs
op.inputs.extend(other_inputs)
op.rounding_mode = NpuRoundingMode.NATURAL
@@ -1407,12 +1407,22 @@ def convert_pad(op: Operation, arch, nng):
return avgpool_op
-def fixup_bias_tensors(op, arch, nng):
+def fixup_bias_tensors(op, arch, nng, dtype=None):
if op.type.needs_bias() and op.bias is None:
# Op has no bias, add bias tensor filled with zeros
nr_biases = op.inputs[1].shape[-1]
bias_values = [0] * nr_biases
- bias_tensor = create_const_tensor(op.name + "_bias", [nr_biases], DataType.int32, bias_values)
+ # The DataType of the bias tensor can be explicitly provided or deduced from the ifm
+ # DataType. Default is int32 bias for 8-bit ifms and int64 for int16 ifms.
+ # For int16 the selected bias DataType will have an impact on the scaling
+ # used when encoding the scales and biases later. The default mode will match the
+ # refence with reduced scaling for int64 bias.
+ # This means that in cases (in the graph optimiser) where DepthwiseConv2DBias
+ # is used to emulate average pool int32 bias should be selected for full precision
+ # int16 scaling.
+ if dtype is None:
+ dtype = DataType.int64 if op.ifm.dtype == DataType.int16 else DataType.int32
+ bias_tensor = create_const_tensor(op.name + "_bias", [nr_biases], dtype, bias_values)
op.set_input_tensor(bias_tensor, op.type.info.indices.biases[0])
return op
diff --git a/ethosu/vela/weight_compressor.py b/ethosu/vela/weight_compressor.py
index 78c43511..db225fb6 100644
--- a/ethosu/vela/weight_compressor.py
+++ b/ethosu/vela/weight_compressor.py
@@ -275,7 +275,8 @@ def _prepare_scale_and_bias(arch, tens, rescale_for_faf, explicit_scaling):
quantised_scales = [(int(m), int(s)) for s, m in zip(explicit_scaling.shift, explicit_scaling.multiplier)]
else:
# quantise all of the weight scales into (scale_factor, shift)
- if ifm_dtype == DataType.int16:
+ if ifm_dtype == DataType.int16 and bias_tens.dtype == DataType.int64:
+ # Reference uses reduced scaling for int16 with int64 bias
quantised_scales = [reduced_quantise_scale(scale) for scale in scales]
else:
quantised_scales = [quantise_scale(scale) for scale in scales]