diff options
-rw-r--r-- | SUPPORTED_OPS.md | 4 | ||||
-rw-r--r-- | ethosu/vela/tensor.py | 14 | ||||
-rw-r--r-- | ethosu/vela/tflite_graph_optimiser.py | 11 | ||||
-rw-r--r-- | ethosu/vela/tflite_reader.py | 1 | ||||
-rw-r--r-- | ethosu/vela/tflite_writer.py | 2 |
5 files changed, 30 insertions, 2 deletions
diff --git a/SUPPORTED_OPS.md b/SUPPORTED_OPS.md index f96bd4af..cfb3a5da 100644 --- a/SUPPORTED_OPS.md +++ b/SUPPORTED_OPS.md @@ -1,7 +1,7 @@ # Supported Ops This file was automatically generated by Vela using the `--supported-ops-report` parameter. -Vela version: `3.1.1.dev13+gf54e94a.d20210914` +Vela version: `3.1.1.dev32+gdc3b1f9` This file complies with [**Gitiles Markdown syntax**](https://github.com/google/gitiles/blob/master/Documentation/markdown.md) @@ -210,6 +210,7 @@ This is a list of constraints that the MEAN operator must satisfy in order to be - IFM must be int8 or uint8 - Input tensor must be at least 2D - Axis indices must correspond to height and width axes +- IFM Tensor batch size must be 1 - Product of height and width can be at most 65536 - Product of height and width can be at most 4096 when IFM and OFM have different scale or zero point, or keep_dims is True @@ -265,6 +266,7 @@ This is a list of constraints that the RESIZE_BILINEAR operator must satisfy in IFM must match OFM OFM W and H must be 2x IFM -1, if align_corners is True OFM W and H must be 2x IFM, if align_corners is False +- half_pixel_centers are not supported ### TFLite SOFTMAX Constraints diff --git a/ethosu/vela/tensor.py b/ethosu/vela/tensor.py index d62ebc8e..8c5e277a 100644 --- a/ethosu/vela/tensor.py +++ b/ethosu/vela/tensor.py @@ -209,7 +209,17 @@ def create_equivalence_id(key) -> UUID: class QuantizationParameters: - __slots__ = "min", "max", "num_bits", "narrow_range", "scale_f32", "zero_point", "quant_min", "quant_max" + __slots__ = ( + "min", + "max", + "num_bits", + "narrow_range", + "scale_f32", + "zero_point", + "quant_min", + "quant_max", + "quant_dim", + ) def __init__( self, @@ -228,6 +238,7 @@ class QuantizationParameters: self.zero_point: Union[int, np.ndarray, None] = None self.quant_min: Optional[float] = None self.quant_max: Optional[float] = None + self.quant_dim: Optional[int] = None def __str__(self): return "<nng.QuantizationParameters min=%s max=%s, num_bits=%s, scale=%s, zero_point=%s>" % ( @@ -252,6 +263,7 @@ class QuantizationParameters: res.zero_point = self.zero_point res.quant_min = self.quant_min res.quant_max = self.quant_max + res.quant_dim = self.quant_dim return res def dequantize(self, values) -> np.ndarray: diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py index 2469a700..e01433d0 100644 --- a/ethosu/vela/tflite_graph_optimiser.py +++ b/ethosu/vela/tflite_graph_optimiser.py @@ -1152,6 +1152,16 @@ def fixup_bias_tensors(op, arch, nng): return op +def fixup_asymmetric_weights(op, arch, nng): + if op.run_on_npu and (op.type.is_conv2d_op() or op.type.is_depthwise_conv2d_op()): + if op.ifm.dtype == DataType.int8: + if not np.all(op.weights.quantization.zero_point == 0): + print(f"Warning: {op.type} '{op.name}' has asymmetric weights, zero points have been adjusted.") + op.weights.quantization.zero_point *= 0 + + return op + + def convert_mean_to_depthwise_conv_or_avgpool(op, arch, nng): if op.type == Op.Mean and op.run_on_npu: keep_dims = op.attrs.get("keep_dims", False) @@ -1405,6 +1415,7 @@ def tflite_optimise_graph(nng, arch): reorder_depthwise_weights, fixup_resizebilinear, fixup_bias_tensors, + fixup_asymmetric_weights, convert_mul_max_to_abs_or_lrelu, convert_lrelu, convert_tanh_sigmoid_to_lut, diff --git a/ethosu/vela/tflite_reader.py b/ethosu/vela/tflite_reader.py index fbee7930..8dc5efe1 100644 --- a/ethosu/vela/tflite_reader.py +++ b/ethosu/vela/tflite_reader.py @@ -88,6 +88,7 @@ class TFLiteSubgraph: tens.quantization.max = self.len1_array_to_scalar(quant.MaxAsNumpy()) tens.quantization.scale_f32 = self.len1_array_to_scalar(quant.ScaleAsNumpy()) tens.quantization.zero_point = self.len1_array_to_scalar(quant.ZeroPointAsNumpy()) + tens.quantization.quant_dim = quant.QuantizedDimension() if dtype == DataType.uint8: tens.quantization.quant_min = 0 diff --git a/ethosu/vela/tflite_writer.py b/ethosu/vela/tflite_writer.py index e6dd85b5..d134c07c 100644 --- a/ethosu/vela/tflite_writer.py +++ b/ethosu/vela/tflite_writer.py @@ -236,6 +236,8 @@ class TFLiteSerialiser: QuantizationParameters.QuantizationParametersAddScale(builder, scale) if zero_point is not None: QuantizationParameters.QuantizationParametersAddZeroPoint(builder, zero_point) + if quant.quant_dim is not None: + QuantizationParameters.QuantizationParametersAddQuantizedDimension(builder, quant.quant_dim) qp = QuantizationParameters.QuantizationParametersEnd(builder) return qp |