aboutsummaryrefslogtreecommitdiff
path: root/ethosu/vela/tflite_graph_optimiser.py
diff options
context:
space:
mode:
Diffstat (limited to 'ethosu/vela/tflite_graph_optimiser.py')
-rw-r--r--ethosu/vela/tflite_graph_optimiser.py44
1 files changed, 29 insertions, 15 deletions
diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py
index f68e0cf9..daaca8dd 100644
--- a/ethosu/vela/tflite_graph_optimiser.py
+++ b/ethosu/vela/tflite_graph_optimiser.py
@@ -27,7 +27,6 @@ import numpy as np
from . import fp_math
from . import rewrite_graph
from . import scaling
-from .api import NpuRoundingMode
from .data_type import BaseType
from .data_type import DataType
from .debug_database import DebugDatabase
@@ -56,6 +55,7 @@ from .operation import NpuBlockType
from .operation import Op
from .operation import Operation
from .operation import Padding
+from .operation import RoundingMode
from .operation_util import create_add_nop
from .operation_util import create_avgpool_nop
from .operation_util import create_cast_op
@@ -295,7 +295,7 @@ def convert_resize_1x1_to_add(op):
return op
-# Convert ResizeNearestNeightbor with align corners to a depthwise convolution. The IFM will already have been upscaled
+# Convert ResizeNearestNeighbor with align corners to a depthwise convolution. The IFM will already have been upscaled
# apart from the final x2 scaling which will be done as part of this operation. The kernel contains a single coefficient
# to select the appropriate nearest neighbor value
def convert_resizenn_ac_to_depthwise_conv(op, upscale_factor):
@@ -314,7 +314,7 @@ def convert_resizenn_ac_to_depthwise_conv(op, upscale_factor):
"dilation": (1, 1, 1, 1),
}
- # change resizebilinear to depthwise
+ # change ResizeNearestNeighbor to Depthwise
op.type = Op.DepthwiseConv2DBias
op.attrs.update(dw_op_attrs)
op.set_input_tensor(ifm, 0) # ifm tensor index
@@ -695,12 +695,8 @@ def convert_resizebilinear_to_depthwise_convolutions(op, half_pixel_centers=True
dw_conv.write_shape = Shape4D(n, h, w, c)
dw_conv.write_offset = Shape4D(0, 0, 0, 0)
- # Set the output rounding mode. Resize bilinear requires rounding away from zero. Therefore, we need to
- # adjust the accumulated value by a "small" amount before applying natural rounding. The "small" amount
- # should be big enough to cause a x.5 to be rounded correctly but small enough not to cause smaller
- # values to be incorrectly rounded
- ofm.quantization.next_after = True
- dw_conv.rounding_mode = NpuRoundingMode.NATURAL
+ # Resize bilinear requires rounding away from zero
+ dw_conv.rounding_mode = RoundingMode.AwayZero
# Double height and width stride to write the output of each of the four depthwise convolutions below
# interleaved with each other when combined with OFM tile base offsets.
@@ -1730,12 +1726,30 @@ def replace_pad_by_hw_pad(op: Operation, arch, nng):
op.inputs = []
op.add_input_tensor(ifm)
op.add_input_tensor(weight_tens)
- # Add bias tensor, all biases set to 0
- op.inputs.append(None)
- fixup_bias_tensors(op, arch, nng, DataType.int32)
+
+ if op.ifm.dtype == DataType.uint8:
+ op.rounding_mode = RoundingMode.HalfUp
+
+ # Add bias tensor, all biases set to 0
+ op.inputs.append(None)
+ fixup_bias_tensors(op, arch, nng, DataType.int32)
+
+ else:
+ op.rounding_mode = RoundingMode.AwayZero
+
+ # The DepthwiseConv needs to be performed with the IFM zero point set appropriately so that the correct
+ # pad values are used. However, in order to use the rounding away from zero mode the zero point needs to
+ # have been removed so that the zero point is at zero. This is done by adding a kernel sized amount of
+ # the zero point as a bias. The datatype of the bias needs to be set to int32, even for an int16 IFM,
+ # because this will cause full precision scaling to be used (see weight compression). Finally, the OFM
+ # zero point will need forcing to zero (as it has already been removed)
+ nr_biases = op.inputs[1].shape[-1]
+ bias_values = [op.ifm.quantization.zero_point * k_h * k_w] * nr_biases
+ bias_tensor = create_const_tensor(op.name + "_bias", [nr_biases], DataType.int32, bias_values)
+ op.add_input_tensor(bias_tensor)
+
# Add other inputs
op.inputs.extend(other_inputs)
- op.rounding_mode = NpuRoundingMode.NATURAL
# Bypass the PAD operator
op.set_input_tensor(pad_op.ifm, 0)
@@ -1946,7 +1960,7 @@ def convert_mean_to_depthwise_conv(op, arch, nng):
# Set weight shape to [H,W,C,B]
weight_shape = [h, w, shape[3], shape[0]]
- op.rounding_mode = NpuRoundingMode.NATURAL
+ op.rounding_mode = RoundingMode.HalfUp
identity_quant = QuantizationParameters(scale_f32=1.0, zero_point=0)
op.forced_input_quantization = identity_quant
op.forced_output_quantization = identity_quant
@@ -2016,7 +2030,7 @@ def convert_mean_to_depthwise_conv(op, arch, nng):
mul_op.set_ifm_ofm_shapes()
# Reference using TFL rounding for the multiply
- mul_op.rounding_mode = NpuRoundingMode.TFL
+ mul_op.rounding_mode = RoundingMode.TFLite
# Need to use explicit scaling to get the wanted shift
mul_op.explicit_scaling = ExplicitScaling(False, [output_shift_vela], [1])