From ece4e65a786d61934d0c356169a3c5f11f188538 Mon Sep 17 00:00:00 2001
From: Louis Verhaard <louis.verhaard@arm.com>
Date: Thu, 7 Jan 2021 13:35:47 +0100
Subject: MLBEDSW-3418: Bug fixes 16-bit leaky relu

Bug fixes for 16-bit leaky relu with different quantizations for IFM/OFM:
- Overflow error occurred for alpha == 0
- The identity multiplication overwrote the result of the alpha
  multiplication

Signed-off-by: Louis Verhaard <louis.verhaard@arm.com>
Change-Id: I18f8d121f6e7c598b721c472b476b9285eeff543
---
 ethosu/vela/graph_optimiser.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/ethosu/vela/graph_optimiser.py b/ethosu/vela/graph_optimiser.py
index 3c80658e..5f111786 100644
--- a/ethosu/vela/graph_optimiser.py
+++ b/ethosu/vela/graph_optimiser.py
@@ -918,9 +918,17 @@ def convert_lrelu_to_mul_max(op, arch):
     quantization = ifm.quantization.clone()
     quantization.min = 0
     quantization.max = alpha * (quantization.quant_max - quantization.quant_min)
-    quantization.scale_f32 = alpha
     quantization.zero_point = 0
-    alpha_tens = create_const_tensor(op.name + "_alpha_scalar", [], ifm.dtype, [1], np.int8, quantization=quantization)
+    if np.isinf(1 / np.float32(alpha)):
+        # Handling of alpha near zero
+        quantization.scale_f32 = 1
+        scalar = 0
+    else:
+        quantization.scale_f32 = alpha
+        scalar = 1
+    alpha_tens = create_const_tensor(
+        op.name + "_alpha_scalar", [], ifm.dtype, [scalar], np.int8, quantization=quantization
+    )
     mul_alpha.add_input_tensor(alpha_tens)
     fm_alpha = ofm.clone(op.name + "_alpha")
     mul_alpha.set_output_tensor(fm_alpha)
@@ -944,7 +952,8 @@ def convert_lrelu_to_mul_max(op, arch):
             op.name + "_id_scalar", [], ifm.dtype, [1], np.uint8, quantization=quantization
         )
         mul_identity.add_input_tensor(identity_tens)
-        fm_id = ofm.clone(op.name + "_id")
+        # Make sure that fm_id is allocated to a different address than fm_alpha
+        fm_id = ofm.clone(op.name + "_id", set_unique=True)
         mul_identity.set_output_tensor(fm_id)
         mul_identity.set_ifm_ofm_shapes()
         DebugDatabase.add_optimised(op, mul_identity)
-- 
cgit v1.2.1