aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLouis Verhaard <louis.verhaard@arm.com>2021-01-07 13:35:47 +0100
committerpatrik.gustavsson <patrik.gustavsson@arm.com>2021-01-22 13:30:25 +0000
commitece4e65a786d61934d0c356169a3c5f11f188538 (patch)
treed131e131aa915ad200185e7705861422878a730f
parentd4738e5d68260ed56c0b878cd5dc11c67fdbbfaa (diff)
downloadethos-u-vela-ece4e65a786d61934d0c356169a3c5f11f188538.tar.gz
MLBEDSW-3418: Bug fixes 16-bit leaky relu
Bug fixes for 16-bit leaky relu with different quantizations for IFM/OFM: - Overflow error occurred for alpha == 0 - The identity multiplication overwrote the result of the alpha multiplication Signed-off-by: Louis Verhaard <louis.verhaard@arm.com> Change-Id: I18f8d121f6e7c598b721c472b476b9285eeff543
-rw-r--r--ethosu/vela/graph_optimiser.py15
1 files changed, 12 insertions, 3 deletions
diff --git a/ethosu/vela/graph_optimiser.py b/ethosu/vela/graph_optimiser.py
index 3c80658..5f11178 100644
--- a/ethosu/vela/graph_optimiser.py
+++ b/ethosu/vela/graph_optimiser.py
@@ -918,9 +918,17 @@ def convert_lrelu_to_mul_max(op, arch):
quantization = ifm.quantization.clone()
quantization.min = 0
quantization.max = alpha * (quantization.quant_max - quantization.quant_min)
- quantization.scale_f32 = alpha
quantization.zero_point = 0
- alpha_tens = create_const_tensor(op.name + "_alpha_scalar", [], ifm.dtype, [1], np.int8, quantization=quantization)
+ if np.isinf(1 / np.float32(alpha)):
+ # Handling of alpha near zero
+ quantization.scale_f32 = 1
+ scalar = 0
+ else:
+ quantization.scale_f32 = alpha
+ scalar = 1
+ alpha_tens = create_const_tensor(
+ op.name + "_alpha_scalar", [], ifm.dtype, [scalar], np.int8, quantization=quantization
+ )
mul_alpha.add_input_tensor(alpha_tens)
fm_alpha = ofm.clone(op.name + "_alpha")
mul_alpha.set_output_tensor(fm_alpha)
@@ -944,7 +952,8 @@ def convert_lrelu_to_mul_max(op, arch):
op.name + "_id_scalar", [], ifm.dtype, [1], np.uint8, quantization=quantization
)
mul_identity.add_input_tensor(identity_tens)
- fm_id = ofm.clone(op.name + "_id")
+ # Make sure that fm_id is allocated to a different address than fm_alpha
+ fm_id = ofm.clone(op.name + "_id", set_unique=True)
mul_identity.set_output_tensor(fm_id)
mul_identity.set_ifm_ofm_shapes()
DebugDatabase.add_optimised(op, mul_identity)