aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--SUPPORTED_OPS.md4
-rw-r--r--ethosu/vela/tensor.py14
-rw-r--r--ethosu/vela/tflite_graph_optimiser.py11
-rw-r--r--ethosu/vela/tflite_reader.py1
-rw-r--r--ethosu/vela/tflite_writer.py2
5 files changed, 30 insertions, 2 deletions
diff --git a/SUPPORTED_OPS.md b/SUPPORTED_OPS.md
index f96bd4af..cfb3a5da 100644
--- a/SUPPORTED_OPS.md
+++ b/SUPPORTED_OPS.md
@@ -1,7 +1,7 @@
# Supported Ops
This file was automatically generated by Vela using the `--supported-ops-report` parameter.
-Vela version: `3.1.1.dev13+gf54e94a.d20210914`
+Vela version: `3.1.1.dev32+gdc3b1f9`
This file complies with
[**Gitiles Markdown syntax**](https://github.com/google/gitiles/blob/master/Documentation/markdown.md)
@@ -210,6 +210,7 @@ This is a list of constraints that the MEAN operator must satisfy in order to be
- IFM must be int8 or uint8
- Input tensor must be at least 2D
- Axis indices must correspond to height and width axes
+- IFM Tensor batch size must be 1
- Product of height and width can be at most 65536
- Product of height and width can be at most 4096 when IFM and OFM have different scale or zero point,
or keep_dims is True
@@ -265,6 +266,7 @@ This is a list of constraints that the RESIZE_BILINEAR operator must satisfy in
IFM must match OFM
OFM W and H must be 2x IFM -1, if align_corners is True
OFM W and H must be 2x IFM, if align_corners is False
+- half_pixel_centers are not supported
### TFLite SOFTMAX Constraints
diff --git a/ethosu/vela/tensor.py b/ethosu/vela/tensor.py
index d62ebc8e..8c5e277a 100644
--- a/ethosu/vela/tensor.py
+++ b/ethosu/vela/tensor.py
@@ -209,7 +209,17 @@ def create_equivalence_id(key) -> UUID:
class QuantizationParameters:
- __slots__ = "min", "max", "num_bits", "narrow_range", "scale_f32", "zero_point", "quant_min", "quant_max"
+ __slots__ = (
+ "min",
+ "max",
+ "num_bits",
+ "narrow_range",
+ "scale_f32",
+ "zero_point",
+ "quant_min",
+ "quant_max",
+ "quant_dim",
+ )
def __init__(
self,
@@ -228,6 +238,7 @@ class QuantizationParameters:
self.zero_point: Union[int, np.ndarray, None] = None
self.quant_min: Optional[float] = None
self.quant_max: Optional[float] = None
+ self.quant_dim: Optional[int] = None
def __str__(self):
return "<nng.QuantizationParameters min=%s max=%s, num_bits=%s, scale=%s, zero_point=%s>" % (
@@ -252,6 +263,7 @@ class QuantizationParameters:
res.zero_point = self.zero_point
res.quant_min = self.quant_min
res.quant_max = self.quant_max
+ res.quant_dim = self.quant_dim
return res
def dequantize(self, values) -> np.ndarray:
diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py
index 2469a700..e01433d0 100644
--- a/ethosu/vela/tflite_graph_optimiser.py
+++ b/ethosu/vela/tflite_graph_optimiser.py
@@ -1152,6 +1152,16 @@ def fixup_bias_tensors(op, arch, nng):
return op
+def fixup_asymmetric_weights(op, arch, nng):
+ if op.run_on_npu and (op.type.is_conv2d_op() or op.type.is_depthwise_conv2d_op()):
+ if op.ifm.dtype == DataType.int8:
+ if not np.all(op.weights.quantization.zero_point == 0):
+ print(f"Warning: {op.type} '{op.name}' has asymmetric weights, zero points have been adjusted.")
+ op.weights.quantization.zero_point *= 0
+
+ return op
+
+
def convert_mean_to_depthwise_conv_or_avgpool(op, arch, nng):
if op.type == Op.Mean and op.run_on_npu:
keep_dims = op.attrs.get("keep_dims", False)
@@ -1405,6 +1415,7 @@ def tflite_optimise_graph(nng, arch):
reorder_depthwise_weights,
fixup_resizebilinear,
fixup_bias_tensors,
+ fixup_asymmetric_weights,
convert_mul_max_to_abs_or_lrelu,
convert_lrelu,
convert_tanh_sigmoid_to_lut,
diff --git a/ethosu/vela/tflite_reader.py b/ethosu/vela/tflite_reader.py
index fbee7930..8dc5efe1 100644
--- a/ethosu/vela/tflite_reader.py
+++ b/ethosu/vela/tflite_reader.py
@@ -88,6 +88,7 @@ class TFLiteSubgraph:
tens.quantization.max = self.len1_array_to_scalar(quant.MaxAsNumpy())
tens.quantization.scale_f32 = self.len1_array_to_scalar(quant.ScaleAsNumpy())
tens.quantization.zero_point = self.len1_array_to_scalar(quant.ZeroPointAsNumpy())
+ tens.quantization.quant_dim = quant.QuantizedDimension()
if dtype == DataType.uint8:
tens.quantization.quant_min = 0
diff --git a/ethosu/vela/tflite_writer.py b/ethosu/vela/tflite_writer.py
index e6dd85b5..d134c07c 100644
--- a/ethosu/vela/tflite_writer.py
+++ b/ethosu/vela/tflite_writer.py
@@ -236,6 +236,8 @@ class TFLiteSerialiser:
QuantizationParameters.QuantizationParametersAddScale(builder, scale)
if zero_point is not None:
QuantizationParameters.QuantizationParametersAddZeroPoint(builder, zero_point)
+ if quant.quant_dim is not None:
+ QuantizationParameters.QuantizationParametersAddQuantizedDimension(builder, quant.quant_dim)
qp = QuantizationParameters.QuantizationParametersEnd(builder)
return qp