aboutsummaryrefslogtreecommitdiff
path: root/ethosu/vela/graph_optimiser.py
diff options
context:
space:
mode:
authorDiqing Zhong <diqing.zhong@arm.com>2021-01-26 12:12:51 +0100
committerDiqing Zhong <diqing.zhong@arm.com>2021-01-29 16:17:40 +0100
commit189f748e1a79ed88044efbe7137963bca830cbb5 (patch)
tree4d3db8614574b5aedcf952941c2194e2bf7f8285 /ethosu/vela/graph_optimiser.py
parent2c2522dd44229a03d3d778cd239478fedc19ee57 (diff)
downloadethos-u-vela-189f748e1a79ed88044efbe7137963bca830cbb5.tar.gz
MLBEDSW-3224: Support HardSwish
Change-Id: If49abc31f093f1bd3393bee86f821fd35972086f Signed-off-by: Diqing Zhong <diqing.zhong@arm.com>
Diffstat (limited to 'ethosu/vela/graph_optimiser.py')
-rw-r--r--ethosu/vela/graph_optimiser.py53
1 files changed, 53 insertions, 0 deletions
diff --git a/ethosu/vela/graph_optimiser.py b/ethosu/vela/graph_optimiser.py
index ab4d916e..7755cc3b 100644
--- a/ethosu/vela/graph_optimiser.py
+++ b/ethosu/vela/graph_optimiser.py
@@ -823,6 +823,58 @@ def convert_mul_max_to_abs_or_lrelu(op, arch, nng):
return op
+def convert_hardswish_to_lut(op, arch, nng):
+ if op.type == Op.HardSwish:
+ ifm, ofm = op.get_ifm_ofm()
+ # Generate the LUT
+ ifm_scale = np.double(ifm.quantization.scale_f32)
+ ofm_scale = np.double(ofm.quantization.scale_f32)
+ zp_in = ifm.quantization.zero_point
+ zp_out = ofm.quantization.zero_point
+ ifm_scale_hires = (1 / 128) * ifm_scale
+ relu_multiplier = np.double(3 / 32768)
+ out_scale, out_shift = scaling.quantise_scale(ifm_scale_hires / ofm_scale)
+ relu_scale, relu_shift = scaling.quantise_scale(ifm_scale_hires / relu_multiplier)
+ # Use 16bit scale
+ out_scale_16 = fp_math.downscale_multiplier_int32_to_int16(out_scale)
+ relu_scale_16 = fp_math.downscale_multiplier_int32_to_int16(relu_scale)
+
+ values = []
+ ix = range(256) if ifm.dtype == DataType.uint8 else range(-128, 128)
+ quantized_min = min(ix)
+ quantized_max = max(ix)
+ for x in ix:
+ input_value = x - zp_in
+ input_value_hires = input_value * 128
+ # Compute the input value on essentially the output scale, not shifted yet
+ input_value_preshift = fp_math.saturating_rounding_mul16(input_value_hires, out_scale_16)
+ # Compute the "relu-ish multiplier". This matches the code in TensorFlow Lite Micro kernel
+ relu_value = np.int16(input_value_hires)
+ if relu_shift < 31:
+ relu_value = fp_math.shift_left16(relu_value, 30 - relu_shift)
+
+ relu_value = fp_math.saturating_rounding_mul16(relu_value, relu_scale_16)
+
+ if relu_shift < 31:
+ relu_value = fp_math.shift_left16(relu_value, 1)
+
+ if relu_shift > 31:
+ relu_value = fp_math.rounding_divide_by_pot(relu_value, relu_shift - 31)
+
+ # Rescaled the value into a 16bit fixedpoint relu_value in [-1, 1]
+ # Now convert that to a 16bit fixedpoint value in [0, 1]
+ relu_value = (relu_value + (1 << 15)) >> 1
+ lut_result = fp_math.saturating_mul16(relu_value, input_value_preshift)
+ shift = 31 - out_shift
+ shift = -shift if shift < 0 else 0
+ # Finally apply the output shift
+ lut_result = fp_math.rounding_divide_by_pot(lut_result, shift) + zp_out
+ lut_result = min(quantized_max, max(quantized_min, lut_result))
+ values.append(lut_result)
+ return convert_to_lut(op, values, "hardswish")
+ return op
+
+
def convert_lrelu_to_mul_max(op, arch):
# Converts LeakyRelu to Max(alpha * IFM, identity * IFM)
# (the opposite of convert_mul_max_to_abs_or_lrelu)
@@ -1245,6 +1297,7 @@ def optimise_graph_a(nng, arch, verbose_graph=False):
convert_conv_to_fc,
convert_softmax,
optimise_strided_conv,
+ convert_hardswish_to_lut,
rewrite_fully_connected_input,
convert_batched_fc_shape,
fixup_conv2d_backprop,