From 2e186c794b4bdc68d89428a58848e376591f76a0 Mon Sep 17 00:00:00 2001 From: Louis Verhaard Date: Fri, 9 Oct 2020 10:47:04 +0200 Subject: MLBEDSW-3154 Bug fix for LUT ops with IFM from SplitSliceRead - Incorrect length check in high level command stream generator - Improved tensor names related to LUT based operations Change-Id: Ib8844a35a986e2dbef095df23f143f4633b255f9 Signed-off-by: Louis Verhaard --- ethosu/vela/graph_optimiser.py | 18 +++++++++--------- ethosu/vela/high_level_command_stream_generator.py | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'ethosu/vela') diff --git a/ethosu/vela/graph_optimiser.py b/ethosu/vela/graph_optimiser.py index 1966a82d..d4423524 100644 --- a/ethosu/vela/graph_optimiser.py +++ b/ethosu/vela/graph_optimiser.py @@ -826,30 +826,30 @@ def convert_lrelu_to_mul_max(op, arch): return op -def convert_to_lut(op, lut_values): +def convert_to_lut(op, lut_values, lut_name): # Rewrite the operation by Add with scalar 0 + LUT activation ifm = op.inputs[0] assert ifm.dtype.size_in_bytes() == 1 op.type = Op.Add - op.name = op.name + "_add" + op.name = op.name + "_lut_" + lut_name # Mark as no-op to enable potential fusing optimizations op.attrs["is_nop"] = True # Create an input tensor containing scalar zero quantization = QuantizationParameters(0.0, 255.0) quantization.scale_f32 = ifm.quantization.scale_f32 quantization.zero_point = 0 - tens = create_const_tensor(op.inputs[0].name + "_add", [], ifm.dtype, [0], np.uint8, quantization=quantization) + tens = create_const_tensor(op.inputs[0].name + "_scalar0", [], ifm.dtype, [0], np.uint8, quantization=quantization) op.add_input_tensor(tens) # The LUT must be applied without any preceding rescaling (the LUT itself performs the rescale), # so even if the OFM has a different scale than the IFM, the generated OFM scale instructions # should be the same as the IFM op.forced_output_quantization = ifm.quantization - lut_tensor = lut.create_lut_tensor(op.name + "_lut", lut_values, DataType.int8) + lut_tensor = lut.create_lut_tensor(op.name + "_values", lut_values, DataType.int8) op.set_activation_lut(lut_tensor) return op -def convert_to_lut8(op, fn): +def convert_to_lut8(op, fn, fn_name): # Converts op to a no-op + int8/uint8 LUT which is generated with the given function. # fn is a function(real) -> real ifm, ofm = op.get_ifm_ofm() @@ -870,7 +870,7 @@ def convert_to_lut8(op, fn): lut_result = round_away_zero(zp_out + y_real / ofm_scale) lut_result = min(quantized_max, max(quantized_min, lut_result)) values.append(lut_result) - return convert_to_lut(op, values) + return convert_to_lut(op, values, fn_name) def convert_lrelu_to_lut(op, arch): @@ -900,7 +900,7 @@ def convert_lrelu_to_lut(op, arch): lut_result = zp_out + fp_math.multiply_by_quantized_multiplier(x - zp_in, identity_scale, identity_shift) lut_result = min(quantized_max, max(quantized_min, lut_result)) values.append(lut_result) - return convert_to_lut(op, values) + return convert_to_lut(op, values, "lrelu") def convert_lrelu(op, arch, nng): @@ -920,9 +920,9 @@ def convert_lrelu(op, arch, nng): def convert_tanh_sigmoid_to_lut(op, arch, nng): # Converts int8/uint8 Sigmoid and Tanh to a LUT based solution if op.type == Op.Sigmoid: - return convert_to_lut8(op, clamp_sigmoid) + return convert_to_lut8(op, clamp_sigmoid, "sigmoid") elif op.type == Op.Tanh: - return convert_to_lut8(op, math.tanh) + return convert_to_lut8(op, math.tanh, "tanh") return op diff --git a/ethosu/vela/high_level_command_stream_generator.py b/ethosu/vela/high_level_command_stream_generator.py index dc52ae52..3e3cda19 100644 --- a/ethosu/vela/high_level_command_stream_generator.py +++ b/ethosu/vela/high_level_command_stream_generator.py @@ -51,7 +51,7 @@ def generate_high_level_command_stream_for_pass(strat, passes, block_configs, id npu_block_type = ps.npu_block_type split_offsets = [None, None] # offset for [ifm, ifm2] - if len(ps.inputs) == 2 and npu_block_type == NpuBlockType.ElementWise: + if ps.ifm_tensor is not None and ps.ifm2_tensor is not None and npu_block_type == NpuBlockType.ElementWise: # Ensure correct ifm and ifm2 order if match_tensor(ps.inputs[0], ps.primary_op.inputs[1]) and match_tensor(ps.inputs[1], ps.primary_op.inputs[0]): ps.ifm_tensor, ps.ifm2_tensor = ps.ifm2_tensor, ps.ifm_tensor -- cgit v1.2.1