4 files changed, 28 insertions, 14 deletions
diff --git a/ethosu/vela/operation.py b/ethosu/vela/operation.py
index 5ed18621..db1c6f18 100644
--- a/ethosu/vela/operation.py
+++ b/ethosu/vela/operation.py
@@ -834,7 +834,16 @@ class Operation:
         self.ifm_shapes = []
         self.ofm_shapes = []
 
-        ifm_tensor, ifm2_tensor, weight_tensor, ofm_tensor = self.get_ifm_ifm2_weights_ofm()
+        ifm_tensor, ifm2_tensor, ofm_tensor = self.get_ifm_ifm2_ofm()
+
+        if self.type == Op.Reshape:
+            # Set ofm shape
+            if len(self.inputs) > 1 and self.inputs[1].values is not None:
+                ofm_tensor.shape = self.inputs[1].values.flatten().tolist()
+                ofm_elements = ofm_tensor.elements()
+                # Stretch dimension
+                if ofm_elements < 0:
+                    ofm_tensor.shape[ofm_tensor.shape.index(-1)] = int(ifm_tensor.elements() / abs(ofm_elements))
 
         # set all shapes to op, as 4D
         if self.type == Op.FullyConnected:
@@ -847,7 +856,7 @@ class Operation:
                 self.ofm_shapes.append(Shape4D([self.ofm.shape[0], 1, 1, self.ofm.shape[1]]))
             else:
                 self.ofm_shapes.append(Shape4D(ofm_tensor.get_full_shape()))
-        if self.type == Op.Softmax:
+        elif self.type == Op.Softmax:
             self.ifm_shapes.append(Shape4D(ifm_tensor.get_full_shape()))
             self.ofm_shapes.append(Shape4D(ofm_tensor.get_full_shape()))
         elif self.type.is_split_op() or self.type.is_concat_op():
diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py
index f2a8c803..b1a56605 100644
--- a/ethosu/vela/tflite_graph_optimiser.py
+++ b/ethosu/vela/tflite_graph_optimiser.py
@@ -1408,10 +1408,8 @@ def optimise_quantize(op: Operation, arch, nng):
         if input_values.ndim == 0:
             input_values = np.array([input_values])
 
-        # requantized int8 to int8
-        if (ifm.dtype == DataType.int8 and ofm.dtype == DataType.int8) or (
-            ifm.dtype == DataType.int16 and ofm.dtype == DataType.int16
-        ):
+        # requantized int8 to int8 or int16 to int16
+        if ifm.dtype == ofm.dtype == DataType.int8 or ifm.dtype == ofm.dtype == DataType.int16:
 
             # scale needs to use double precision to match TFLite reference kernel
             effective_scale = np.float64(ifm.quantization.scale_f32) / np.float64(ofm.quantization.scale_f32)
@@ -1495,7 +1493,7 @@ def supported_operator_check(op, arch, nng):
 
 
 def tflite_optimise_graph(nng, arch):
-    # Compile time optimisations
+    # Compile time static optimisations
     optimisation_list = [optimise_quantize, convert_shape_op_to_constant_tensor]
 
     for idx, sg in enumerate(nng.subgraphs):
diff --git a/ethosu/vela/tflite_model_semantic.py b/ethosu/vela/tflite_model_semantic.py
index 9408e0ce..16ca2797 100644
--- a/ethosu/vela/tflite_model_semantic.py
+++ b/ethosu/vela/tflite_model_semantic.py
@@ -76,7 +76,7 @@ class TFLiteSemantic:
     )
     binary_elem_wise_main_ops = binary_elem_wise_min_max_ops | binary_elem_wise_add_mul_sub | binary_elem_wise_shift_ops
     elem_wise_main_ops = binary_elem_wise_main_ops | unary_elem_wise_main_ops
-    shapeless_input_ops = binary_elem_wise_main_ops | set((Op.Split, Op.SplitV, Op.Mean, Op.ExpandDims))
+    shapeless_input_ops = binary_elem_wise_main_ops | set((Op.Split, Op.SplitV, Op.Mean, Op.ExpandDims, Op.Quantize))
     reshape_ops = set(
         (
             Op.Reshape,
@@ -214,13 +214,10 @@ class TFLiteSemantic:
         generic_constraints_exclude_list = {
             Op.Shape: [
                 TFLiteSemantic.constraint_tens_quant_none_check,
-                TFLiteSemantic.constraint_tens_quant_scale,
-                TFLiteSemantic.constraint_quant_scale_inf,
             ],
             Op.Quantize: [
                 TFLiteSemantic.constraint_tens_no_dynamic,
                 TFLiteSemantic.constraint_tens_output_scalar,
-                TFLiteSemantic.constraint_tens_input_scalar,
             ],
         }
         return generic_constraints_exclude_list
@@ -314,7 +311,11 @@ class TFLiteSemantic:
         extra = []
         tensors = [tens for tens in op.get_ifm_ifm2_weights_ofm() if tens]
         for tens in tensors:
-            if (tens.quantization.scale_f32 is not None) and np.isinf(tens.quantization.scale_f32).any():
+            if (
+                tens.quantization
+                and tens.quantization.scale_f32 is not None
+                and np.isinf(tens.quantization.scale_f32).any()
+            ):
                 valid = False
                 extra.append(f"Tensor '{tens.name}' has quantization scale: {tens.quantization.scale_f32}")
         return valid, ", ".join(extra)
diff --git a/ethosu/vela/tflite_supported_operators.py b/ethosu/vela/tflite_supported_operators.py
index 6328a4e5..25a34e82 100644
--- a/ethosu/vela/tflite_supported_operators.py
+++ b/ethosu/vela/tflite_supported_operators.py
@@ -39,7 +39,12 @@ def _optype_formatter(op_list):
 
 class TFLiteSupportedOperators:
     # Categorised lists of supported operators
-    npu_pre_ops = set((Op.SplitSliceRead,))
+    npu_pre_ops = set(
+        (
+            Op.SplitSliceRead,
+            Op.Shape,
+        )
+    )
     convolution_ops = set(
         (
             Op.Conv2DBias,
@@ -103,6 +108,7 @@ class TFLiteSupportedOperators:
             (
                 Op.ReduceSum,
                 Op.CLZ,
+                Op.Shape,
             )
         )
         | binary_elem_wise_add_mul_sub
@@ -363,7 +369,7 @@ class TFLiteSupportedOperators:
         if op.type not in cls.per_axis_quant_ops:
             tensors = [tens for tens in op.get_ifm_ifm2_weights_ofm() if tens]
             for tens in tensors:
-                if tens.quantization.is_per_axis():
+                if tens.quantization and tens.quantization.is_per_axis():
                     valid = False
                     extra.append(tens.name)
         return valid, "The following tensor(s) have per-axis quantization parameters: " + ", ".join(extra)