5 files changed, 30 insertions, 2 deletions
diff --git a/SUPPORTED_OPS.md b/SUPPORTED_OPS.md
index f96bd4af..cfb3a5da 100644
--- a/SUPPORTED_OPS.md
+++ b/SUPPORTED_OPS.md
@@ -1,7 +1,7 @@
 # Supported Ops
 
 This file was automatically generated by Vela using the `--supported-ops-report` parameter.  
-Vela version: `3.1.1.dev13+gf54e94a.d20210914`
+Vela version: `3.1.1.dev32+gdc3b1f9`
 
 This file complies with
 [**Gitiles Markdown syntax**](https://github.com/google/gitiles/blob/master/Documentation/markdown.md)
@@ -210,6 +210,7 @@ This is a list of constraints that the MEAN operator must satisfy in order to be
 - IFM must be int8 or uint8
 - Input tensor must be at least 2D
 - Axis indices must correspond to height and width axes
+- IFM Tensor batch size must be 1
 - Product of height and width can be at most 65536
 - Product of height and width can be at most 4096 when IFM and OFM have different scale or zero point,  
         or keep_dims is True
@@ -265,6 +266,7 @@ This is a list of constraints that the RESIZE_BILINEAR operator must satisfy in
         IFM must match OFM  
         OFM W and H must be 2x IFM -1, if align_corners is True  
         OFM W and H must be 2x IFM, if align_corners is False
+- half_pixel_centers are not supported
 
 ### TFLite SOFTMAX Constraints
 
diff --git a/ethosu/vela/tensor.py b/ethosu/vela/tensor.py
index d62ebc8e..8c5e277a 100644
--- a/ethosu/vela/tensor.py
+++ b/ethosu/vela/tensor.py
@@ -209,7 +209,17 @@ def create_equivalence_id(key) -> UUID:
 
 
 class QuantizationParameters:
-    __slots__ = "min", "max", "num_bits", "narrow_range", "scale_f32", "zero_point", "quant_min", "quant_max"
+    __slots__ = (
+        "min",
+        "max",
+        "num_bits",
+        "narrow_range",
+        "scale_f32",
+        "zero_point",
+        "quant_min",
+        "quant_max",
+        "quant_dim",
+    )
 
     def __init__(
         self,
@@ -228,6 +238,7 @@ class QuantizationParameters:
         self.zero_point: Union[int, np.ndarray, None] = None
         self.quant_min: Optional[float] = None
         self.quant_max: Optional[float] = None
+        self.quant_dim: Optional[int] = None
 
     def __str__(self):
         return "<nng.QuantizationParameters min=%s max=%s, num_bits=%s, scale=%s, zero_point=%s>" % (
@@ -252,6 +263,7 @@ class QuantizationParameters:
         res.zero_point = self.zero_point
         res.quant_min = self.quant_min
         res.quant_max = self.quant_max
+        res.quant_dim = self.quant_dim
         return res
 
     def dequantize(self, values) -> np.ndarray:
diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py
index 2469a700..e01433d0 100644
--- a/ethosu/vela/tflite_graph_optimiser.py
+++ b/ethosu/vela/tflite_graph_optimiser.py
@@ -1152,6 +1152,16 @@ def fixup_bias_tensors(op, arch, nng):
     return op
 
 
+def fixup_asymmetric_weights(op, arch, nng):
+    if op.run_on_npu and (op.type.is_conv2d_op() or op.type.is_depthwise_conv2d_op()):
+        if op.ifm.dtype == DataType.int8:
+            if not np.all(op.weights.quantization.zero_point == 0):
+                print(f"Warning: {op.type} '{op.name}' has asymmetric weights, zero points have been adjusted.")
+                op.weights.quantization.zero_point *= 0
+
+    return op
+
+
 def convert_mean_to_depthwise_conv_or_avgpool(op, arch, nng):
     if op.type == Op.Mean and op.run_on_npu:
         keep_dims = op.attrs.get("keep_dims", False)
@@ -1405,6 +1415,7 @@ def tflite_optimise_graph(nng, arch):
         reorder_depthwise_weights,
         fixup_resizebilinear,
         fixup_bias_tensors,
+        fixup_asymmetric_weights,
         convert_mul_max_to_abs_or_lrelu,
         convert_lrelu,
         convert_tanh_sigmoid_to_lut,
diff --git a/ethosu/vela/tflite_reader.py b/ethosu/vela/tflite_reader.py
index fbee7930..8dc5efe1 100644
--- a/ethosu/vela/tflite_reader.py
+++ b/ethosu/vela/tflite_reader.py
@@ -88,6 +88,7 @@ class TFLiteSubgraph:
             tens.quantization.max = self.len1_array_to_scalar(quant.MaxAsNumpy())
             tens.quantization.scale_f32 = self.len1_array_to_scalar(quant.ScaleAsNumpy())
             tens.quantization.zero_point = self.len1_array_to_scalar(quant.ZeroPointAsNumpy())
+            tens.quantization.quant_dim = quant.QuantizedDimension()
 
         if dtype == DataType.uint8:
             tens.quantization.quant_min = 0
diff --git a/ethosu/vela/tflite_writer.py b/ethosu/vela/tflite_writer.py
index e6dd85b5..d134c07c 100644
--- a/ethosu/vela/tflite_writer.py
+++ b/ethosu/vela/tflite_writer.py
@@ -236,6 +236,8 @@ class TFLiteSerialiser:
                 QuantizationParameters.QuantizationParametersAddScale(builder, scale)
             if zero_point is not None:
                 QuantizationParameters.QuantizationParametersAddZeroPoint(builder, zero_point)
+            if quant.quant_dim is not None:
+                QuantizationParameters.QuantizationParametersAddQuantizedDimension(builder, quant.quant_dim)
             qp = QuantizationParameters.QuantizationParametersEnd(builder)
 
         return qp