diff options
author | Louis Verhaard <louis.verhaard@arm.com> | 2021-03-19 09:35:48 +0100 |
---|---|---|
committer | patrik.gustavsson <patrik.gustavsson@arm.com> | 2021-03-25 09:50:27 +0000 |
commit | c629129f79666eeea1d86a779c8cb245e052672f (patch) | |
tree | 89af751d45f53674f64e51c1f6e97d1ab87c1da5 /ethosu | |
parent | 024c355e51666868616b7ec560c7f87e03fcd398 (diff) | |
download | ethos-u-vela-c629129f79666eeea1d86a779c8cb245e052672f.tar.gz |
MLBEDSW-4071: Power of two handling 16-bit tanh/sigmoid
Added special handling of power-of-two input scales for
16-bit tanh/sigmoid to align with the reference.
Change-Id: I87831bcd587623d7db7100e768905355c2c98e9d
Signed-off-by: Louis Verhaard <louis.verhaard@arm.com>
Diffstat (limited to 'ethosu')
-rw-r--r-- | ethosu/vela/register_command_stream_generator.py | 22 |
1 files changed, 16 insertions, 6 deletions
diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py index a4466c92..65801b82 100644 --- a/ethosu/vela/register_command_stream_generator.py +++ b/ethosu/vela/register_command_stream_generator.py @@ -17,6 +17,7 @@ # Register level (low-level) command stream generation for Ethos-U. Takes a list of NPU operations and generates # all the register settings. Calculates dependencies between commands and inserts wait operations. And generates a bit # stream suitable for interpretation by the Ethos-U processor. +import math from collections import defaultdict from enum import Enum from enum import IntEnum @@ -640,12 +641,21 @@ def generate_ofm_scaling_for_pooling(emit: CommandStreamEmitter, pool_op: NpuPoo rescale = 0x3000 * ifm_quant.scale_f32 if pool_op.ifm.data_type == NpuDataType.INT16: # Calculate scale and shift for the output scale of 1/(3*4096) - shift = 0 - max_rescale = np.iinfo(np.int16).max / 2 - while rescale <= max_rescale and shift <= 30: - shift += 1 - rescale *= 2 - scale = int(rescale) + x_log2 = math.log2(ifm_quant.scale_f32) + rounded_log2 = int(round(x_log2)) + is_power_of_two = abs(x_log2 - rounded_log2) < 0.001 + shift = rounded_log2 + 12 + if is_power_of_two and shift in (0, 1): + # Special handling if input scale is 1/2048 or 1/4096 + scale = 3 << shift + shift = 0 + else: + shift = 0 + max_rescale = np.iinfo(np.int16).max / 2 + while rescale <= max_rescale and shift <= 30: + shift += 1 + rescale *= 2 + scale = int(rescale) else: rescale_bits = len(bin(round_up_to_int(rescale))) - 2 + 1 scale, shift = scaling.quantise_pooling_scale(kernel.height * kernel.width, rescale_bits) |