From 58520b981013214e458b5a7ff1983d919d6d2363 Mon Sep 17 00:00:00 2001 From: Louis Verhaard Date: Mon, 24 Aug 2020 16:45:38 +0200 Subject: MLBEDSW-2688: use LeakyRelu for int16 For int16, using LeakyRelu (with bug fix) gives exactly the same results as Mul+Max if input/output scales are the same. Signed-off-by: Louis Verhaard Change-Id: I4f4db464d77b0aaf0d25ddfca534f91d08db548d --- ethosu/vela/graph_optimiser.py | 36 ++++++++---------------- ethosu/vela/register_command_stream_generator.py | 2 +- 2 files changed, 13 insertions(+), 25 deletions(-) (limited to 'ethosu/vela') diff --git a/ethosu/vela/graph_optimiser.py b/ethosu/vela/graph_optimiser.py index b9aafcac..46d26c80 100644 --- a/ethosu/vela/graph_optimiser.py +++ b/ethosu/vela/graph_optimiser.py @@ -734,8 +734,9 @@ def convert_lrelu_to_mul_max(op, arch): def convert_lrelu_to_lut(op, arch): - ifm, _, _, ofm = op.get_ifm_weights_biases_ofm() # Rewrite LeakyRelu by Add with scalar 0 + LUT activation + ifm, _, _, ofm = op.get_ifm_weights_biases_ofm() + assert ifm.dtype.size_in_bytes() == 1 op.type = "AddAct" op.name = op.name + "_add" op.attrs.update({"npu_block_type": NpuBlockType.ElementWise}) @@ -750,26 +751,9 @@ def convert_lrelu_to_lut(op, arch): alpha = op.attrs["alpha"] zp = ofm.quantization.zero_point # Generate the LUT - if ifm.dtype.size_in_bytes() == 1: - dtype = DataType.int8 - ix = range(256) if ifm.dtype == DataType.uint8 else range(-128, 128) - values = [int(x) if x >= zp else int(round(zp - alpha * (zp - x))) for x in ix] - else: - # int16 - dtype = DataType.int32 - values = [] - for ix in range(512): - x = (ix - 256) * 128 - if x >= zp: - base = x - slope = 128 - else: - base = int(round(zp - alpha * (zp - x))) - next_base = int(round(zp - alpha * (zp - (x + 127)))) - slope = int(round(128 * (next_base - base) / 127)) - value = ((slope << 16) & 0xFFFF0000) + (base & 0xFFFF) - values.append(value) - lut_tensor = lut.create_lut_tensor(op.name + "_lut", values, dtype) + ix = range(256) if ifm.dtype == DataType.uint8 else range(-128, 128) + values = [int(x) if x >= zp else int(round(zp - alpha * (zp - x))) for x in ix] + lut_tensor = lut.create_lut_tensor(op.name + "_lut", values, DataType.int8) op.set_activation_lut(lut_tensor) return op @@ -779,9 +763,13 @@ def convert_lrelu(op, arch): if op.type != "LeakyRelu": return op ifm, _, _, ofm = op.get_ifm_weights_biases_ofm() - use_lut = (ifm.is_scaling_equal(ofm)) and (ifm.dtype == ofm.dtype) and ifm.dtype in (DataType.uint8, DataType.int8) - if use_lut: - return convert_lrelu_to_lut(op, arch) + if ifm.is_scaling_equal(ofm) and ifm.dtype == ofm.dtype: + if ifm.dtype in (DataType.uint8, DataType.int8): + # use LUT + return convert_lrelu_to_lut(op, arch) + elif ifm.dtype == DataType.int16: + # use LeakyRelu unmodified + return op return convert_lrelu_to_mul_max(op, arch) diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py index 5a14cb4f..8d9f9185 100644 --- a/ethosu/vela/register_command_stream_generator.py +++ b/ethosu/vela/register_command_stream_generator.py @@ -533,7 +533,7 @@ def generate_register_command_stream(nng, sg, arch, verbose=False): use_global_scale = True if primary_op.type == "LeakyRelu": - output_scale *= primary_op.attrs["alpha"] + output_scale = primary_op.attrs["alpha"] ofm_scale, shift = scaling.quantise_scale(output_scale) emit.cmd1_with_offset(cmd1.NPU_SET_OFM_SCALE, ofm_scale, shift) -- cgit v1.2.1