From 2e186c794b4bdc68d89428a58848e376591f76a0 Mon Sep 17 00:00:00 2001
From: Louis Verhaard <louis.verhaard@arm.com>
Date: Fri, 9 Oct 2020 10:47:04 +0200
Subject: MLBEDSW-3154 Bug fix for LUT ops with IFM from SplitSliceRead

- Incorrect length check in high level command stream generator
- Improved tensor names related to LUT based operations

Change-Id: Ib8844a35a986e2dbef095df23f143f4633b255f9
Signed-off-by: Louis Verhaard <louis.verhaard@arm.com>
---
 ethosu/vela/graph_optimiser.py                     | 18 +++++++++---------
 ethosu/vela/high_level_command_stream_generator.py |  2 +-
 2 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'ethosu/vela')

diff --git a/ethosu/vela/graph_optimiser.py b/ethosu/vela/graph_optimiser.py
index 1966a82d..d4423524 100644
--- a/ethosu/vela/graph_optimiser.py
+++ b/ethosu/vela/graph_optimiser.py
@@ -826,30 +826,30 @@ def convert_lrelu_to_mul_max(op, arch):
     return op
 
 
-def convert_to_lut(op, lut_values):
+def convert_to_lut(op, lut_values, lut_name):
     # Rewrite the operation by Add with scalar 0 + LUT activation
     ifm = op.inputs[0]
     assert ifm.dtype.size_in_bytes() == 1
     op.type = Op.Add
-    op.name = op.name + "_add"
+    op.name = op.name + "_lut_" + lut_name
     # Mark as no-op to enable potential fusing optimizations
     op.attrs["is_nop"] = True
     # Create an input tensor containing scalar zero
     quantization = QuantizationParameters(0.0, 255.0)
     quantization.scale_f32 = ifm.quantization.scale_f32
     quantization.zero_point = 0
-    tens = create_const_tensor(op.inputs[0].name + "_add", [], ifm.dtype, [0], np.uint8, quantization=quantization)
+    tens = create_const_tensor(op.inputs[0].name + "_scalar0", [], ifm.dtype, [0], np.uint8, quantization=quantization)
     op.add_input_tensor(tens)
     # The LUT must be applied without any preceding rescaling (the LUT itself performs the rescale),
     # so even if the OFM has a different scale than the IFM, the generated OFM scale instructions
     # should be the same as the IFM
     op.forced_output_quantization = ifm.quantization
-    lut_tensor = lut.create_lut_tensor(op.name + "_lut", lut_values, DataType.int8)
+    lut_tensor = lut.create_lut_tensor(op.name + "_values", lut_values, DataType.int8)
     op.set_activation_lut(lut_tensor)
     return op
 
 
-def convert_to_lut8(op, fn):
+def convert_to_lut8(op, fn, fn_name):
     # Converts op to a no-op + int8/uint8 LUT which is generated with the given function.
     # fn is a function(real) -> real
     ifm, ofm = op.get_ifm_ofm()
@@ -870,7 +870,7 @@ def convert_to_lut8(op, fn):
         lut_result = round_away_zero(zp_out + y_real / ofm_scale)
         lut_result = min(quantized_max, max(quantized_min, lut_result))
         values.append(lut_result)
-    return convert_to_lut(op, values)
+    return convert_to_lut(op, values, fn_name)
 
 
 def convert_lrelu_to_lut(op, arch):
@@ -900,7 +900,7 @@ def convert_lrelu_to_lut(op, arch):
             lut_result = zp_out + fp_math.multiply_by_quantized_multiplier(x - zp_in, identity_scale, identity_shift)
         lut_result = min(quantized_max, max(quantized_min, lut_result))
         values.append(lut_result)
-    return convert_to_lut(op, values)
+    return convert_to_lut(op, values, "lrelu")
 
 
 def convert_lrelu(op, arch, nng):
@@ -920,9 +920,9 @@ def convert_lrelu(op, arch, nng):
 def convert_tanh_sigmoid_to_lut(op, arch, nng):
     # Converts int8/uint8 Sigmoid and Tanh to a LUT based solution
     if op.type == Op.Sigmoid:
-        return convert_to_lut8(op, clamp_sigmoid)
+        return convert_to_lut8(op, clamp_sigmoid, "sigmoid")
     elif op.type == Op.Tanh:
-        return convert_to_lut8(op, math.tanh)
+        return convert_to_lut8(op, math.tanh, "tanh")
     return op
 
 
diff --git a/ethosu/vela/high_level_command_stream_generator.py b/ethosu/vela/high_level_command_stream_generator.py
index dc52ae52..3e3cda19 100644
--- a/ethosu/vela/high_level_command_stream_generator.py
+++ b/ethosu/vela/high_level_command_stream_generator.py
@@ -51,7 +51,7 @@ def generate_high_level_command_stream_for_pass(strat, passes, block_configs, id
     npu_block_type = ps.npu_block_type
     split_offsets = [None, None]  # offset for [ifm, ifm2]
 
-    if len(ps.inputs) == 2 and npu_block_type == NpuBlockType.ElementWise:
+    if ps.ifm_tensor is not None and ps.ifm2_tensor is not None and npu_block_type == NpuBlockType.ElementWise:
         # Ensure correct ifm and ifm2 order
         if match_tensor(ps.inputs[0], ps.primary_op.inputs[1]) and match_tensor(ps.inputs[1], ps.primary_op.inputs[0]):
             ps.ifm_tensor, ps.ifm2_tensor = ps.ifm2_tensor, ps.ifm_tensor
-- 
cgit v1.2.1