5 files changed, 34 insertions, 19 deletions
diff --git a/SUPPORTED_OPS.md b/SUPPORTED_OPS.md
index 83429b7a..d16d5f8e 100644
--- a/SUPPORTED_OPS.md
+++ b/SUPPORTED_OPS.md
@@ -1,7 +1,7 @@
 # Supported Ops
 
 This file was automatically generated by Vela using the `--supported-ops-report` parameter.  
-Vela version: `3.4.0rc3.dev1+g5e0ae55`
+Vela version: `3.4.1.dev3+g5c30971e`
 
 This file complies with
 [**Gitiles Markdown syntax**](https://github.com/google/gitiles/blob/master/Documentation/markdown.md)
@@ -42,6 +42,7 @@ Please check the supported operator list for your chosen runtime for further inf
 | RELU_N1_TO_1 | [Generic](#tflite-generic-constraints) |
 | RESHAPE | [Generic](#tflite-generic-constraints), [Specific](#tflite-reshape-constraints) |
 | RESIZE_BILINEAR | [Generic](#tflite-generic-constraints), [Specific](#tflite-resize_bilinear-constraints) |
+| SHAPE | [Generic](#tflite-generic-constraints) |
 | SLICE | [Generic](#tflite-generic-constraints) |
 | SOFTMAX | [Generic](#tflite-generic-constraints), [Specific](#tflite-softmax-constraints) |
 | SPLIT | [Generic](#tflite-generic-constraints) |
@@ -61,14 +62,14 @@ This is a list of constraints most NPU operators must satisfy in order to be sch
 - Input(s) and Output tensors must not be dynamic - [Quantize]
 - Input(s) and Output tensors must have a defined shape 
 - Output tensors cannot be scalar - [Quantize]
-- Scalar Input tensors are only valid for op type: ADD, EXPAND_DIMS, MAXIMUM, MEAN, MINIMUM, MUL, SPLIT, SPLIT_V, SUB - [Quantize]
+- Scalar Input tensors are only valid for op type: ADD, EXPAND_DIMS, MAXIMUM, MEAN, MINIMUM, MUL, QUANTIZE, SPLIT, SPLIT_V, SUB 
 - Input(s) and Output tensors must not be greater than 4D 
 - Input(s), Output and Weight tensors must have quantization parameters - [Shape]
-- Input(s), Output and Weight tensors with quantization scales must be finite - [Shape]
-- Input and Output tensors must have quantization scales that fit within float32 precision - [Shape]
+- Input(s), Output and Weight tensors with quantization scales must be finite 
+- Input and Output tensors must have quantization scales that fit within float32 precision 
 - Constant tensors should not have NoneType-values 
 - Tensors must be of type: int16, int32, int8, uint8
-- Tensors which are int32 are only valid when op type is: ADD, MUL, SUB
+- Tensors which are int32 are only valid when op type is: ADD, MUL, SHAPE, SUB
 - Tensor dimensions must be in the range [1, 65535]
 - Per-axis quantization is only supported for the following op types: CONV_2D, DEPTHWISE_CONV_2D, TRANSPOSE_CONV
 - The fused activation function (if present) must be one of type: LOGISTIC, RELU, RELU6, RELU_N1_TO_1, TANH
diff --git a/ethosu/vela/operation.py b/ethosu/vela/operation.py
index 5ed18621..db1c6f18 100644
--- a/ethosu/vela/operation.py
+++ b/ethosu/vela/operation.py
@@ -834,7 +834,16 @@ class Operation:
         self.ifm_shapes = []
         self.ofm_shapes = []
 
-        ifm_tensor, ifm2_tensor, weight_tensor, ofm_tensor = self.get_ifm_ifm2_weights_ofm()
+        ifm_tensor, ifm2_tensor, ofm_tensor = self.get_ifm_ifm2_ofm()
+
+        if self.type == Op.Reshape:
+            # Set ofm shape
+            if len(self.inputs) > 1 and self.inputs[1].values is not None:
+                ofm_tensor.shape = self.inputs[1].values.flatten().tolist()
+                ofm_elements = ofm_tensor.elements()
+                # Stretch dimension
+                if ofm_elements < 0:
+                    ofm_tensor.shape[ofm_tensor.shape.index(-1)] = int(ifm_tensor.elements() / abs(ofm_elements))
 
         # set all shapes to op, as 4D
         if self.type == Op.FullyConnected:
@@ -847,7 +856,7 @@ class Operation:
                 self.ofm_shapes.append(Shape4D([self.ofm.shape[0], 1, 1, self.ofm.shape[1]]))
             else:
                 self.ofm_shapes.append(Shape4D(ofm_tensor.get_full_shape()))
-        if self.type == Op.Softmax:
+        elif self.type == Op.Softmax:
             self.ifm_shapes.append(Shape4D(ifm_tensor.get_full_shape()))
             self.ofm_shapes.append(Shape4D(ofm_tensor.get_full_shape()))
         elif self.type.is_split_op() or self.type.is_concat_op():
diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py
index f2a8c803..b1a56605 100644
--- a/ethosu/vela/tflite_graph_optimiser.py
+++ b/ethosu/vela/tflite_graph_optimiser.py
@@ -1408,10 +1408,8 @@ def optimise_quantize(op: Operation, arch, nng):
         if input_values.ndim == 0:
             input_values = np.array([input_values])
 
-        # requantized int8 to int8
-        if (ifm.dtype == DataType.int8 and ofm.dtype == DataType.int8) or (
-            ifm.dtype == DataType.int16 and ofm.dtype == DataType.int16
-        ):
+        # requantized int8 to int8 or int16 to int16
+        if ifm.dtype == ofm.dtype == DataType.int8 or ifm.dtype == ofm.dtype == DataType.int16:
 
             # scale needs to use double precision to match TFLite reference kernel
             effective_scale = np.float64(ifm.quantization.scale_f32) / np.float64(ofm.quantization.scale_f32)
@@ -1495,7 +1493,7 @@ def supported_operator_check(op, arch, nng):
 
 
 def tflite_optimise_graph(nng, arch):
-    # Compile time optimisations
+    # Compile time static optimisations
     optimisation_list = [optimise_quantize, convert_shape_op_to_constant_tensor]
 
     for idx, sg in enumerate(nng.subgraphs):
diff --git a/ethosu/vela/tflite_model_semantic.py b/ethosu/vela/tflite_model_semantic.py
index 9408e0ce..16ca2797 100644
--- a/ethosu/vela/tflite_model_semantic.py
+++ b/ethosu/vela/tflite_model_semantic.py
@@ -76,7 +76,7 @@ class TFLiteSemantic:
     )
     binary_elem_wise_main_ops = binary_elem_wise_min_max_ops | binary_elem_wise_add_mul_sub | binary_elem_wise_shift_ops
     elem_wise_main_ops = binary_elem_wise_main_ops | unary_elem_wise_main_ops
-    shapeless_input_ops = binary_elem_wise_main_ops | set((Op.Split, Op.SplitV, Op.Mean, Op.ExpandDims))
+    shapeless_input_ops = binary_elem_wise_main_ops | set((Op.Split, Op.SplitV, Op.Mean, Op.ExpandDims, Op.Quantize))
     reshape_ops = set(
         (
             Op.Reshape,
@@ -214,13 +214,10 @@ class TFLiteSemantic:
         generic_constraints_exclude_list = {
             Op.Shape: [
                 TFLiteSemantic.constraint_tens_quant_none_check,
-                TFLiteSemantic.constraint_tens_quant_scale,
-                TFLiteSemantic.constraint_quant_scale_inf,
             ],
             Op.Quantize: [
                 TFLiteSemantic.constraint_tens_no_dynamic,
                 TFLiteSemantic.constraint_tens_output_scalar,
-                TFLiteSemantic.constraint_tens_input_scalar,
             ],
         }
         return generic_constraints_exclude_list
@@ -314,7 +311,11 @@ class TFLiteSemantic:
         extra = []
         tensors = [tens for tens in op.get_ifm_ifm2_weights_ofm() if tens]
         for tens in tensors:
-            if (tens.quantization.scale_f32 is not None) and np.isinf(tens.quantization.scale_f32).any():
+            if (
+                tens.quantization
+                and tens.quantization.scale_f32 is not None
+                and np.isinf(tens.quantization.scale_f32).any()
+            ):
                 valid = False
                 extra.append(f"Tensor '{tens.name}' has quantization scale: {tens.quantization.scale_f32}")
         return valid, ", ".join(extra)
diff --git a/ethosu/vela/tflite_supported_operators.py b/ethosu/vela/tflite_supported_operators.py
index 6328a4e5..25a34e82 100644
--- a/ethosu/vela/tflite_supported_operators.py
+++ b/ethosu/vela/tflite_supported_operators.py
@@ -39,7 +39,12 @@ def _optype_formatter(op_list):
 
 class TFLiteSupportedOperators:
     # Categorised lists of supported operators
-    npu_pre_ops = set((Op.SplitSliceRead,))
+    npu_pre_ops = set(
+        (
+            Op.SplitSliceRead,
+            Op.Shape,
+        )
+    )
     convolution_ops = set(
         (
             Op.Conv2DBias,
@@ -103,6 +108,7 @@ class TFLiteSupportedOperators:
             (
                 Op.ReduceSum,
                 Op.CLZ,
+                Op.Shape,
             )
         )
         | binary_elem_wise_add_mul_sub
@@ -363,7 +369,7 @@ class TFLiteSupportedOperators:
         if op.type not in cls.per_axis_quant_ops:
             tensors = [tens for tens in op.get_ifm_ifm2_weights_ofm() if tens]
             for tens in tensors:
-                if tens.quantization.is_per_axis():
+                if tens.quantization and tens.quantization.is_per_axis():
                     valid = False
                     extra.append(tens.name)
         return valid, "The following tensor(s) have per-axis quantization parameters: " + ", ".join(extra)