aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLouis Verhaard <louis.verhaard@arm.com>2020-08-24 16:45:38 +0200
committerLouis Verhaard <louis.verhaard@arm.com>2020-08-26 16:52:37 +0200
commit58520b981013214e458b5a7ff1983d919d6d2363 (patch)
treea7be754975e158ba1f3dd6fc3658e4daa300a025
parent90831bc18d45008b703e59aad0594026beb7da82 (diff)
downloadethos-u-vela-58520b981013214e458b5a7ff1983d919d6d2363.tar.gz
MLBEDSW-2688: use LeakyRelu for int16
For int16, using LeakyRelu (with bug fix) gives exactly the same results as Mul+Max if input/output scales are the same. Signed-off-by: Louis Verhaard <louis.verhaard@arm.com> Change-Id: I4f4db464d77b0aaf0d25ddfca534f91d08db548d
-rw-r--r--ethosu/vela/graph_optimiser.py36
-rw-r--r--ethosu/vela/register_command_stream_generator.py2
2 files changed, 13 insertions, 25 deletions
diff --git a/ethosu/vela/graph_optimiser.py b/ethosu/vela/graph_optimiser.py
index b9aafcac..46d26c80 100644
--- a/ethosu/vela/graph_optimiser.py
+++ b/ethosu/vela/graph_optimiser.py
@@ -734,8 +734,9 @@ def convert_lrelu_to_mul_max(op, arch):
def convert_lrelu_to_lut(op, arch):
- ifm, _, _, ofm = op.get_ifm_weights_biases_ofm()
# Rewrite LeakyRelu by Add with scalar 0 + LUT activation
+ ifm, _, _, ofm = op.get_ifm_weights_biases_ofm()
+ assert ifm.dtype.size_in_bytes() == 1
op.type = "AddAct"
op.name = op.name + "_add"
op.attrs.update({"npu_block_type": NpuBlockType.ElementWise})
@@ -750,26 +751,9 @@ def convert_lrelu_to_lut(op, arch):
alpha = op.attrs["alpha"]
zp = ofm.quantization.zero_point
# Generate the LUT
- if ifm.dtype.size_in_bytes() == 1:
- dtype = DataType.int8
- ix = range(256) if ifm.dtype == DataType.uint8 else range(-128, 128)
- values = [int(x) if x >= zp else int(round(zp - alpha * (zp - x))) for x in ix]
- else:
- # int16
- dtype = DataType.int32
- values = []
- for ix in range(512):
- x = (ix - 256) * 128
- if x >= zp:
- base = x
- slope = 128
- else:
- base = int(round(zp - alpha * (zp - x)))
- next_base = int(round(zp - alpha * (zp - (x + 127))))
- slope = int(round(128 * (next_base - base) / 127))
- value = ((slope << 16) & 0xFFFF0000) + (base & 0xFFFF)
- values.append(value)
- lut_tensor = lut.create_lut_tensor(op.name + "_lut", values, dtype)
+ ix = range(256) if ifm.dtype == DataType.uint8 else range(-128, 128)
+ values = [int(x) if x >= zp else int(round(zp - alpha * (zp - x))) for x in ix]
+ lut_tensor = lut.create_lut_tensor(op.name + "_lut", values, DataType.int8)
op.set_activation_lut(lut_tensor)
return op
@@ -779,9 +763,13 @@ def convert_lrelu(op, arch):
if op.type != "LeakyRelu":
return op
ifm, _, _, ofm = op.get_ifm_weights_biases_ofm()
- use_lut = (ifm.is_scaling_equal(ofm)) and (ifm.dtype == ofm.dtype) and ifm.dtype in (DataType.uint8, DataType.int8)
- if use_lut:
- return convert_lrelu_to_lut(op, arch)
+ if ifm.is_scaling_equal(ofm) and ifm.dtype == ofm.dtype:
+ if ifm.dtype in (DataType.uint8, DataType.int8):
+ # use LUT
+ return convert_lrelu_to_lut(op, arch)
+ elif ifm.dtype == DataType.int16:
+ # use LeakyRelu unmodified
+ return op
return convert_lrelu_to_mul_max(op, arch)
diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py
index 5a14cb4f..8d9f9185 100644
--- a/ethosu/vela/register_command_stream_generator.py
+++ b/ethosu/vela/register_command_stream_generator.py
@@ -533,7 +533,7 @@ def generate_register_command_stream(nng, sg, arch, verbose=False):
use_global_scale = True
if primary_op.type == "LeakyRelu":
- output_scale *= primary_op.attrs["alpha"]
+ output_scale = primary_op.attrs["alpha"]
ofm_scale, shift = scaling.quantise_scale(output_scale)
emit.cmd1_with_offset(cmd1.NPU_SET_OFM_SCALE, ofm_scale, shift)