aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLouis Verhaard <louis.verhaard@arm.com>2020-09-25 08:30:44 +0200
committerpatrik.gustavsson <patrik.gustavsson@arm.com>2020-09-29 07:38:13 +0000
commitf03bad31c194d1a24ce808223f1b00310a7325e2 (patch)
tree3e24836d8319522d994da51faad7ce92273f1ed0
parenteadb166072958febd7e222297d2a65bcf6063f01 (diff)
downloadethos-u-vela-f03bad31c194d1a24ce808223f1b00310a7325e2.tar.gz
MLBEDSW-2031: LUT support tanh/sigmoid
Uses LUT for int8/uint8 based tanh/sigmoid. Change-Id: Ib6ac5a5c958ab9a17e47f620b22c3e22d8d60321 Signed-off-by: Louis Verhaard <louis.verhaard@arm.com>
-rw-r--r--ethosu/vela/fp_math.py1
-rw-r--r--ethosu/vela/graph_optimiser.py62
-rw-r--r--ethosu/vela/numeric_util.py6
-rw-r--r--ethosu/vela/operation.py2
4 files changed, 59 insertions, 12 deletions
diff --git a/ethosu/vela/fp_math.py b/ethosu/vela/fp_math.py
index 2515b771..66375611 100644
--- a/ethosu/vela/fp_math.py
+++ b/ethosu/vela/fp_math.py
@@ -21,6 +21,7 @@
# point implementation.
import numpy as np
+
# Convert floating point to fixed point, default Q5.26
def from_float(x, integer_bits=5):
i32info = np.iinfo(np.int32)
diff --git a/ethosu/vela/graph_optimiser.py b/ethosu/vela/graph_optimiser.py
index 68473307..a8f68ae1 100644
--- a/ethosu/vela/graph_optimiser.py
+++ b/ethosu/vela/graph_optimiser.py
@@ -28,6 +28,8 @@ from .data_type import DataType
from .errors import UnsupportedFeatureError
from .ethos_u55_regs.ethos_u55_regs import resampling_mode
from .numeric_util import full_shape
+from .numeric_util import round_away_zero
+from .numeric_util import sigmoid
from .operation import create_avgpool_nop
from .operation import NpuBlockType
from .operation import Operation
@@ -863,9 +865,9 @@ def convert_lrelu_to_mul_max(op, arch):
return op
-def convert_lrelu_to_lut(op, arch):
- # Rewrite LeakyRelu by Add with scalar 0 + LUT activation
- ifm, _, _, ofm = op.get_ifm_weights_biases_ofm()
+def convert_to_lut(op, lut_values):
+ # Rewrite the operation by Add with scalar 0 + LUT activation
+ ifm = op.inputs[0]
assert ifm.dtype.size_in_bytes() == 1
op.type = "AddAct"
op.name = op.name + "_add"
@@ -878,6 +880,41 @@ def convert_lrelu_to_lut(op, arch):
quantization.zero_point = 0
tens = create_const_tensor(op.inputs[0].name + "_add", [], ifm.dtype, [0], np.uint8, quantization=quantization)
op.add_input_tensor(tens)
+ # The LUT must be applied without any preceding rescaling (the LUT itself performs the rescale),
+ # so even if the OFM has a different scale than the IFM, the generated OFM scale instructions
+ # should be the same as the IFM
+ op.attrs["forced_output_quantization"] = ifm.quantization
+ lut_tensor = lut.create_lut_tensor(op.name + "_lut", lut_values, DataType.int8)
+ op.set_activation_lut(lut_tensor)
+ return op
+
+
+def convert_to_lut8(op, fn):
+ # Converts op to a no-op + int8/uint8 LUT which is generated with the given function.
+ # fn is a function(real) -> real
+ ifm, _, _, ofm = op.get_ifm_weights_biases_ofm()
+ if ifm.dtype not in (DataType.uint8, DataType.int8) or ifm.dtype != ofm.dtype:
+ return op
+ # Generate the LUT
+ ifm_scale = np.double(ifm.quantization.scale_f32)
+ ofm_scale = np.double(ofm.quantization.scale_f32)
+ zp_in = ifm.quantization.zero_point
+ zp_out = ofm.quantization.zero_point
+ values = []
+ ix = range(256) if ifm.dtype == DataType.uint8 else range(-128, 128)
+ quantized_min = min(ix)
+ quantized_max = max(ix)
+ for x in ix:
+ x_real = ifm_scale * (x - zp_in)
+ y_real = fn(x_real)
+ lut_result = round_away_zero(zp_out + y_real / ofm_scale)
+ lut_result = min(quantized_max, max(quantized_min, lut_result))
+ values.append(lut_result)
+ return convert_to_lut(op, values)
+
+
+def convert_lrelu_to_lut(op, arch):
+ ifm, _, _, ofm = op.get_ifm_weights_biases_ofm()
# Generate the LUT
alpha = op.attrs["alpha"]
ifm_scale = np.double(ifm.quantization.scale_f32)
@@ -903,13 +940,7 @@ def convert_lrelu_to_lut(op, arch):
lut_result = zp_out + fp_math.multiply_by_quantized_multiplier(x - zp_in, identity_scale, identity_shift)
lut_result = min(quantized_max, max(quantized_min, lut_result))
values.append(lut_result)
- # The LUT must be applied without any preceding rescaling (the LUT itself performs the rescale),
- # so even if the OFM has a different scale than the IFM, the generated OFM scale instructions
- # should be the same as the IFM
- op.attrs["forced_output_quantization"] = ifm.quantization
- lut_tensor = lut.create_lut_tensor(op.name + "_lut", values, DataType.int8)
- op.set_activation_lut(lut_tensor)
- return op
+ return convert_to_lut(op, values)
def convert_lrelu(op, arch):
@@ -926,6 +957,15 @@ def convert_lrelu(op, arch):
return convert_lrelu_to_mul_max(op, arch)
+def convert_tanh_sigmoid_to_lut(op, arch):
+ # Converts int8/uint8 Sigmoid and Tanh to a LUT based solution
+ if op.type == "Sigmoid":
+ return convert_to_lut8(op, sigmoid)
+ elif op.type == "Tanh":
+ return convert_to_lut8(op, math.tanh)
+ return op
+
+
def remove_unwanted_reshapes(op, arch):
# Try to remove reshapes enclosing ElementWise operator with only one non-constant input
if not op.run_on_npu or op.attrs["npu_block_type"] != NpuBlockType.ElementWise:
@@ -971,6 +1011,7 @@ def fuse_activation_function_with_prev(op, arch):
# Note: the below checks on prev_op require that a first optimize pass on the full graph has been performed
fuse = (
prev_op.run_on_npu
+ and "npu_block_type" in prev_op.attrs
and prev_op.attrs["npu_block_type"] != NpuBlockType.Default
and len(ifm.ops) == 1
and len(prev_op.outputs[0].consumers()) == 1
@@ -1058,6 +1099,7 @@ def optimise_graph_a(nng, arch, verbose_graph=False):
convert_mul_max_to_abs_or_lrelu,
remove_unwanted_reshapes,
convert_lrelu,
+ convert_tanh_sigmoid_to_lut,
]
for idx, sg in enumerate(nng.subgraphs):
diff --git a/ethosu/vela/numeric_util.py b/ethosu/vela/numeric_util.py
index 4ebef8e5..3d26444a 100644
--- a/ethosu/vela/numeric_util.py
+++ b/ethosu/vela/numeric_util.py
@@ -77,13 +77,17 @@ def clamp_tanh(x):
return y
+def sigmoid(x):
+ return 1 / (1 + math.exp(-x))
+
+
def clamp_sigmoid(x):
if x <= -8:
y = 0.0
elif x >= 8:
y = 1.0
else:
- y = 1 / (1 + math.exp(-x))
+ y = sigmoid(x)
return y
diff --git a/ethosu/vela/operation.py b/ethosu/vela/operation.py
index 252f03b7..14818870 100644
--- a/ethosu/vela/operation.py
+++ b/ethosu/vela/operation.py
@@ -152,7 +152,7 @@ input and output tensors, as well as an attribute dictionary."""
weight_idx = 1
ofm_idx = 0
- elif self.type in ("Squeeze", "Reshape", "QuantizedReshape", "ExpandDims"):
+ elif self.type in ("Squeeze", "Reshape", "QuantizedReshape", "ExpandDims", "Sigmoid", "Tanh"):
ifm_idx = 0
ofm_idx = 0