From 701ba91870fa09e63d1ee3af6654689f78c138e8 Mon Sep 17 00:00:00 2001
From: Fredrik Svedberg <fredrik.svedberg@arm.com>
Date: Wed, 7 Sep 2022 16:01:15 +0200
Subject: MLBEDSW-6869 Improve LeakyRelu support

Added support for int16 LeakyRelu for negative alpha and alpha
greater than one.

Signed-off-by: Fredrik Svedberg <fredrik.svedberg@arm.com>
Change-Id: I7f522ebfe014786d0a1d96172e75c7d9bdd76921
---
 ethosu/vela/test/test_tflite_model_semantic.py | 14 -------
 ethosu/vela/tflite_graph_optimiser.py          | 54 +++++++++++++++++++++-----
 ethosu/vela/tflite_model_semantic.py           | 12 +-----
 3 files changed, 46 insertions(+), 34 deletions(-)

(limited to 'ethosu/vela')

diff --git a/ethosu/vela/test/test_tflite_model_semantic.py b/ethosu/vela/test/test_tflite_model_semantic.py
index e290dd2c..115d1cf5 100644
--- a/ethosu/vela/test/test_tflite_model_semantic.py
+++ b/ethosu/vela/test/test_tflite_model_semantic.py
@@ -412,20 +412,6 @@ def test_constraint_matching_either_shapes():
     assert not semantic_checker.is_operator_semantic_valid(op)
 
 
-def test_constraint_alpha_valid():
-    # Alpha can only be negative for int8 and uint8
-    op = testutil.create_elemwise_op(Op.LeakyRelu, "op", [2, 2], None, [2, 2], DataType.int16)
-    op.attrs["alpha"] = 0
-    assert semantic_checker.is_operator_semantic_valid(op)
-    op.attrs["alpha"] = -1
-    assert not semantic_checker.is_operator_semantic_valid(op)
-    op = testutil.create_elemwise_op(Op.LeakyRelu, "op", [2, 2], None, [2, 2], DataType.int8)
-    op.attrs["alpha"] = 0
-    assert semantic_checker.is_operator_semantic_valid(op)
-    op.attrs["alpha"] = -1
-    assert semantic_checker.is_operator_semantic_valid(op)
-
-
 def test_constraint_hardswish_dtype():
     # HardSwish operator dtype should be int8 or uint8, and input dtype must match output
     # UINT8
diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py
index 611046ba..052f824c 100644
--- a/ethosu/vela/tflite_graph_optimiser.py
+++ b/ethosu/vela/tflite_graph_optimiser.py
@@ -1017,11 +1017,34 @@ def convert_lrelu_to_mul_max(op, arch):
     if ifm is None or ofm is None:
         return op
 
+    alpha = np.float32(op.attrs["alpha"])
+    use_mul_max = 0 < alpha < 1
+    if use_mul_max:
+        mul_ifm = ifm
+        new_op = Op.Maximum
+    else:
+        # Need to use a different approach for alpha > 1
+        no_scale_quant = ifm.quantization.clone()
+        no_scale_quant.scale_f32 = None
+        no_scale_quant.zero_point = 0
+        zero = create_const_tensor("zero_const", [], ifm.dtype, [0], quantization=no_scale_quant)
+
+        # Select values < 0
+        min_op = Operation(Op.Minimum, op.name + "_min")
+        min_op.add_input_tensor(ifm)
+        min_op.add_input_tensor(zero)
+        mul_ifm = ifm.clone(op.name + "_negative", set_unique=True)
+        mul_ifm.dtype = DataType.int32
+        min_op.set_output_tensor(mul_ifm)
+        min_op.set_ifm_ofm_shapes()
+        new_op = Op.RescaleAdd
+        op.rescale = (1, 0)  # No scale or shift
+        DebugDatabase.add_optimised(op, min_op)
+
     # Add multiplication with alpha
     mul_alpha = Operation(Op.Mul, op.name + "_mul_alpha")
-    mul_alpha.add_input_tensor(ifm)
+    mul_alpha.add_input_tensor(mul_ifm)
     # Create const tensor containing alpha as scalar
-    alpha = np.float32(op.attrs["alpha"])
     quantization = ifm.quantization.clone()
     quantization.min = 0
     quantization.max = alpha * (quantization.quant_max - quantization.quant_min)
@@ -1037,15 +1060,24 @@ def convert_lrelu_to_mul_max(op, arch):
         scalar = 0
     else:
         quantization.scale_f32 = alpha
-        scalar = 1
-    alpha_tens = create_const_tensor(op.name + "_alpha_scalar", [], ifm.dtype, [scalar], quantization=quantization)
+        scalar, _ = scaling.elementwise_mul_scale(ifm.quantization.scale_f32, alpha, ofm.quantization.scale_f32)
+    alpha_tens = create_const_tensor(
+        op.name + "_alpha_scalar", [1, 1, 1, 1], DataType.int32, [scalar], np.int32, quantization=quantization
+    )
     mul_alpha.add_input_tensor(alpha_tens)
     fm_alpha = ofm.clone(op.name + "_alpha", set_unique=True)
     mul_alpha.set_output_tensor(fm_alpha)
     mul_alpha.set_ifm_ofm_shapes()
     DebugDatabase.add_optimised(op, mul_alpha)
 
-    if check_quantized_tens_scaling_equal(ifm, ofm):
+    if not use_mul_max:
+        relu_op = Operation(Op.Relu, op.name + "_relu")
+        relu_op.add_input_tensor(ifm)
+        fm_id = ofm.clone(op.name + "_positive_scaled", set_unique=True)
+        relu_op.set_output_tensor(fm_id)
+        relu_op.set_ifm_ofm_shapes()
+        DebugDatabase.add_optimised(op, relu_op)
+    elif check_quantized_tens_scaling_equal(ifm, ofm):
         # No identity multiplication is needed
         fm_id = ifm
     else:
@@ -1069,8 +1101,8 @@ def convert_lrelu_to_mul_max(op, arch):
         DebugDatabase.add_optimised(op, mul_identity)
 
     # Convert LeakyRelu to Max, add the results of the multiplication(s) as inputs
-    op.type = Op.Maximum
-    op.name = op.name.replace("LeakyRelu", "Maximum")
+    op.type = new_op
+    op.name = op.name.replace("LeakyRelu", new_op.name)
     op.inputs = []
     ifm.consumer_list.remove(op)
     op.add_input_tensor(fm_alpha)
@@ -1145,8 +1177,12 @@ def convert_lrelu(op, arch, nng):
     if ifm.dtype in (DataType.uint8, DataType.int8) and ifm.dtype == ofm.dtype:
         # use LUT for int8/uint8
         return convert_lrelu_to_lut(op, arch)
-    if check_quantized_tens_scaling_equal(ifm, ofm) and ifm.dtype == ofm.dtype == DataType.int16:
-        # use LeakyRelu unmodified for int16 with equal input/output scaling
+    if (
+        check_quantized_tens_scaling_equal(ifm, ofm)
+        and ifm.dtype == ofm.dtype == DataType.int16
+        and op.attrs["alpha"] >= 0
+    ):
+        # use LeakyRelu unmodified for int16 with equal input/output scaling and positive alpha
         return op
     return convert_lrelu_to_mul_max(op, arch)
 
diff --git a/ethosu/vela/tflite_model_semantic.py b/ethosu/vela/tflite_model_semantic.py
index 16ca2797..abda886c 100644
--- a/ethosu/vela/tflite_model_semantic.py
+++ b/ethosu/vela/tflite_model_semantic.py
@@ -161,9 +161,6 @@ class TFLiteSemantic:
         self.specific_constraints[Op.StridedSlice].append(TFLiteSemantic.constraint_axis_masks)
         self.specific_constraints[Op.StridedSlice].append(TFLiteSemantic.constraint_slice_ranges)
 
-        # LeakyRelu specific checks:
-        self.specific_constraints[Op.LeakyRelu].append(TFLiteSemantic.constraint_alpha_valid)
-
         # FullyConnected specific checks:
         self.specific_constraints[Op.FullyConnected].append(TFLiteSemantic.constraint_fc_output_2d)
         self.specific_constraints[Op.FullyConnected].append(TFLiteSemantic.constraint_keep_dim_ifm_ofm)
@@ -175,6 +172,7 @@ class TFLiteSemantic:
         # HardSwish specific checks:
         self.specific_constraints[Op.HardSwish].append(TFLiteSemantic.constraint_input_8bit)
         self.specific_constraints[Op.HardSwish].append(TFLiteSemantic.constraint_matching_in_out_types)
+
         # Mean specific checks:
         self.specific_constraints[Op.Mean].append(TFLiteSemantic.constraint_input_8bit)
         self.specific_constraints[Op.Mean].append(TFLiteSemantic.constraint_mean_input_dims)
@@ -557,14 +555,6 @@ class TFLiteSemantic:
         valid = (ifm_shape == ofm_shape) or (ifm2_shape == ofm_shape)
         return valid, f"Op has ifm_shape={ifm_shape}, ifm2_shape={ifm2_shape} and ofm_shape={ofm_shape}"
 
-    @staticmethod
-    def constraint_alpha_valid(op):
-        "Alpha only allowed to be negative if IFM is int8 or uint8"
-        alpha = op.attrs["alpha"]
-        ifm_dtype = op.ifm.dtype
-        valid = ifm_dtype == DataType.int8 or ifm_dtype == DataType.uint8 or alpha >= 0
-        return valid, f"Op has alpha={alpha} and ifm_dtype={ifm_dtype} "
-
     @staticmethod
     def constraint_keep_dim_ifm_ofm(op):
         "The IFM and OFM must have the same number of dimensions if keep_num_dims is set to true"
-- 
cgit v1.2.1