From cc8569fcd243d7c96a20e0f531f6f97a90df83f7 Mon Sep 17 00:00:00 2001 From: Fredrik Svedberg Date: Mon, 1 Nov 2021 14:25:29 +0100 Subject: MLBEDSW-5209 Vela: output diff depthwise with non-zero zero points Fixed by adjusting zero points for ops with int8 IFM and asymmetric weights since the reference does not support asymmetric weights for int8 IFM and ignores the zero points. Signed-off-by: Fredrik Svedberg Change-Id: I2a206a01a471a53aa864a6a3616aa23d2a5a23c8 --- SUPPORTED_OPS.md | 4 +++- ethosu/vela/tensor.py | 14 +++++++++++++- ethosu/vela/tflite_graph_optimiser.py | 11 +++++++++++ ethosu/vela/tflite_reader.py | 1 + ethosu/vela/tflite_writer.py | 2 ++ 5 files changed, 30 insertions(+), 2 deletions(-) diff --git a/SUPPORTED_OPS.md b/SUPPORTED_OPS.md index f96bd4af..cfb3a5da 100644 --- a/SUPPORTED_OPS.md +++ b/SUPPORTED_OPS.md @@ -1,7 +1,7 @@ # Supported Ops This file was automatically generated by Vela using the `--supported-ops-report` parameter. -Vela version: `3.1.1.dev13+gf54e94a.d20210914` +Vela version: `3.1.1.dev32+gdc3b1f9` This file complies with [**Gitiles Markdown syntax**](https://github.com/google/gitiles/blob/master/Documentation/markdown.md) @@ -210,6 +210,7 @@ This is a list of constraints that the MEAN operator must satisfy in order to be - IFM must be int8 or uint8 - Input tensor must be at least 2D - Axis indices must correspond to height and width axes +- IFM Tensor batch size must be 1 - Product of height and width can be at most 65536 - Product of height and width can be at most 4096 when IFM and OFM have different scale or zero point, or keep_dims is True @@ -265,6 +266,7 @@ This is a list of constraints that the RESIZE_BILINEAR operator must satisfy in IFM must match OFM OFM W and H must be 2x IFM -1, if align_corners is True OFM W and H must be 2x IFM, if align_corners is False +- half_pixel_centers are not supported ### TFLite SOFTMAX Constraints diff --git a/ethosu/vela/tensor.py b/ethosu/vela/tensor.py index d62ebc8e..8c5e277a 100644 --- a/ethosu/vela/tensor.py +++ b/ethosu/vela/tensor.py @@ -209,7 +209,17 @@ def create_equivalence_id(key) -> UUID: class QuantizationParameters: - __slots__ = "min", "max", "num_bits", "narrow_range", "scale_f32", "zero_point", "quant_min", "quant_max" + __slots__ = ( + "min", + "max", + "num_bits", + "narrow_range", + "scale_f32", + "zero_point", + "quant_min", + "quant_max", + "quant_dim", + ) def __init__( self, @@ -228,6 +238,7 @@ class QuantizationParameters: self.zero_point: Union[int, np.ndarray, None] = None self.quant_min: Optional[float] = None self.quant_max: Optional[float] = None + self.quant_dim: Optional[int] = None def __str__(self): return "" % ( @@ -252,6 +263,7 @@ class QuantizationParameters: res.zero_point = self.zero_point res.quant_min = self.quant_min res.quant_max = self.quant_max + res.quant_dim = self.quant_dim return res def dequantize(self, values) -> np.ndarray: diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py index 2469a700..e01433d0 100644 --- a/ethosu/vela/tflite_graph_optimiser.py +++ b/ethosu/vela/tflite_graph_optimiser.py @@ -1152,6 +1152,16 @@ def fixup_bias_tensors(op, arch, nng): return op +def fixup_asymmetric_weights(op, arch, nng): + if op.run_on_npu and (op.type.is_conv2d_op() or op.type.is_depthwise_conv2d_op()): + if op.ifm.dtype == DataType.int8: + if not np.all(op.weights.quantization.zero_point == 0): + print(f"Warning: {op.type} '{op.name}' has asymmetric weights, zero points have been adjusted.") + op.weights.quantization.zero_point *= 0 + + return op + + def convert_mean_to_depthwise_conv_or_avgpool(op, arch, nng): if op.type == Op.Mean and op.run_on_npu: keep_dims = op.attrs.get("keep_dims", False) @@ -1405,6 +1415,7 @@ def tflite_optimise_graph(nng, arch): reorder_depthwise_weights, fixup_resizebilinear, fixup_bias_tensors, + fixup_asymmetric_weights, convert_mul_max_to_abs_or_lrelu, convert_lrelu, convert_tanh_sigmoid_to_lut, diff --git a/ethosu/vela/tflite_reader.py b/ethosu/vela/tflite_reader.py index fbee7930..8dc5efe1 100644 --- a/ethosu/vela/tflite_reader.py +++ b/ethosu/vela/tflite_reader.py @@ -88,6 +88,7 @@ class TFLiteSubgraph: tens.quantization.max = self.len1_array_to_scalar(quant.MaxAsNumpy()) tens.quantization.scale_f32 = self.len1_array_to_scalar(quant.ScaleAsNumpy()) tens.quantization.zero_point = self.len1_array_to_scalar(quant.ZeroPointAsNumpy()) + tens.quantization.quant_dim = quant.QuantizedDimension() if dtype == DataType.uint8: tens.quantization.quant_min = 0 diff --git a/ethosu/vela/tflite_writer.py b/ethosu/vela/tflite_writer.py index e6dd85b5..d134c07c 100644 --- a/ethosu/vela/tflite_writer.py +++ b/ethosu/vela/tflite_writer.py @@ -236,6 +236,8 @@ class TFLiteSerialiser: QuantizationParameters.QuantizationParametersAddScale(builder, scale) if zero_point is not None: QuantizationParameters.QuantizationParametersAddZeroPoint(builder, zero_point) + if quant.quant_dim is not None: + QuantizationParameters.QuantizationParametersAddQuantizedDimension(builder, quant.quant_dim) qp = QuantizationParameters.QuantizationParametersEnd(builder) return qp -- cgit v1.2.1