diff options
Diffstat (limited to 'ethosu/vela/lut.py')
-rw-r--r-- | ethosu/vela/lut.py | 79 |
1 files changed, 79 insertions, 0 deletions
diff --git a/ethosu/vela/lut.py b/ethosu/vela/lut.py index c8fb7bc0..e8759d9b 100644 --- a/ethosu/vela/lut.py +++ b/ethosu/vela/lut.py @@ -20,6 +20,7 @@ import uuid import numpy as np +from . import fp_math from . import numeric_util from .data_type import DataType from .debug_database import DebugDatabase @@ -27,6 +28,7 @@ from .high_level_command_stream import DMA from .high_level_command_stream import NpuStripe from .numeric_util import round_away_zero from .operation import Op +from .scaling import quantise_scale from .tensor import create_const_tensor from .tensor import create_equivalence_id from .tensor import QuantizationParameters @@ -242,3 +244,80 @@ def create_lut_int16_op(op, lut_fn, fn_name): lut[i] = slope + base return convert_to_lut(op, lut, fn_name) + + +def create_lut_rsqrt_int8_op(op): + # Turn off black formatting for the LUT tables to keep them compact + # fmt: off + + # RSQRT_LUT has been generated by printing the output from the reference. + # These values are always the same but for some unknown reason it is not being + # implemented as a LUT in the reference. + # So based on the input range (-128, 127) the reference produces the following output: + RSQRT_LUT = [ + 0x00000000, 0x00100000, 0x000b504e, 0x00093cd4, 0x00080000, 0x000727c9, 0x0006882f, 0x00060c24, + 0x0005a827, 0x00055555, 0x00050f45, 0x0004d2fe, 0x00049e6a, 0x00047007, 0x000446b4, 0x00042195, + 0x00040000, 0x0003e16d, 0x0003c570, 0x0003abb0, 0x000393e5, 0x00037dd2, 0x00036945, 0x00035613, + 0x00034418, 0x00033333, 0x0003234b, 0x00031447, 0x00030612, 0x0002f89c, 0x0002ebd3, 0x0002dfaa, + 0x0002d414, 0x0002c906, 0x0002be75, 0x0002b45a, 0x0002aaab, 0x0002a161, 0x00029875, 0x00028fe3, + 0x000287a2, 0x00027fb0, 0x00027807, 0x000270a2, 0x0002697f, 0x00026298, 0x00025bec, 0x00025577, + 0x00024f35, 0x00024925, 0x00024343, 0x00023d8e, 0x00023803, 0x000232a1, 0x00022d65, 0x0002284e, + 0x0002235a, 0x00021e87, 0x000219d5, 0x00021541, 0x000210cb, 0x00020c70, 0x00020831, 0x0002040c, + 0x00020000, 0x0001fc0c, 0x0001f82f, 0x0001f468, 0x0001f0b7, 0x0001ed1a, 0x0001e991, 0x0001e61b, + 0x0001e2b8, 0x0001df67, 0x0001dc26, 0x0001d8f7, 0x0001d5d8, 0x0001d2c8, 0x0001cfc8, 0x0001ccd6, + 0x0001c9f2, 0x0001c71c, 0x0001c454, 0x0001c198, 0x0001bee9, 0x0001bc46, 0x0001b9af, 0x0001b723, + 0x0001b4a3, 0x0001b22d, 0x0001afc2, 0x0001ad61, 0x0001ab0a, 0x0001a8bc, 0x0001a678, 0x0001a43e, + 0x0001a20c, 0x00019fe3, 0x00019dc2, 0x00019baa, 0x0001999a, 0x00019791, 0x00019590, 0x00019397, + 0x000191a5, 0x00018fbb, 0x00018dd7, 0x00018bfa, 0x00018a23, 0x00018853, 0x0001868a, 0x000184c6, + 0x00018309, 0x00018152, 0x00017fa0, 0x00017df4, 0x00017c4e, 0x00017aad, 0x00017911, 0x0001777b, + 0x000175e9, 0x0001745d, 0x000172d6, 0x00017153, 0x00016fd5, 0x00016e5b, 0x00016ce7, 0x00016b76, + 0x00016a0a, 0x000168a2, 0x0001673e, 0x000165de, 0x00016483, 0x0001632b, 0x000161d7, 0x00016087, + 0x00015f3b, 0x00015df2, 0x00015cad, 0x00015b6b, 0x00015a2d, 0x000158f2, 0x000157bb, 0x00015686, + 0x00015555, 0x00015427, 0x000152fd, 0x000151d5, 0x000150b0, 0x00014f8f, 0x00014e70, 0x00014d54, + 0x00014c3b, 0x00014b24, 0x00014a11, 0x00014900, 0x000147f1, 0x000146e5, 0x000145dc, 0x000144d5, + 0x000143d1, 0x000142cf, 0x000141d0, 0x000140d3, 0x00013fd8, 0x00013ee0, 0x00013de9, 0x00013cf5, + 0x00013c03, 0x00013b14, 0x00013a26, 0x0001393b, 0x00013851, 0x0001376a, 0x00013684, 0x000135a1, + 0x000134bf, 0x000133e0, 0x00013302, 0x00013226, 0x0001314c, 0x00013074, 0x00012f9e, 0x00012ec9, + 0x00012df6, 0x00012d25, 0x00012c55, 0x00012b87, 0x00012abb, 0x000129f1, 0x00012928, 0x00012860, + 0x0001279a, 0x000126d6, 0x00012613, 0x00012552, 0x00012492, 0x000123d4, 0x00012317, 0x0001225c, + 0x000121a2, 0x000120e9, 0x00012032, 0x00011f7c, 0x00011ec7, 0x00011e14, 0x00011d62, 0x00011cb1, + 0x00011c02, 0x00011b54, 0x00011aa7, 0x000119fb, 0x00011950, 0x000118a7, 0x000117ff, 0x00011758, + 0x000116b3, 0x0001160e, 0x0001156b, 0x000114c8, 0x00011427, 0x00011387, 0x000112e8, 0x0001124a, + 0x000111ad, 0x00011111, 0x00011076, 0x00010fdc, 0x00010f44, 0x00010eac, 0x00010e15, 0x00010d7f, + 0x00010cea, 0x00010c56, 0x00010bc4, 0x00010b32, 0x00010aa0, 0x00010a10, 0x00010981, 0x000108f3, + 0x00010865, 0x000107d9, 0x0001074d, 0x000106c2, 0x00010638, 0x000105af, 0x00010527, 0x0001049f, + 0x00010419, 0x00010393, 0x0001030e, 0x0001028a, 0x00010206, 0x00010183, 0x00010102, 0x00010080 + ] + + # Transform the above LUT so it gets the correct quantization (following the reference) + ifm_scale = op.ifm.quantization.scale_f32 + ofm_scale = op.ofm.quantization.scale_f32 + zp_in = op.ifm.quantization.zero_point + zp_out = op.ofm.quantization.zero_point + + # Make sure zero point is valid + assert (-128 - zp_in) >= 0, f"Rsqrt is only defined for positive values, zeropoint is {zp_in}" + + scale = np.double(1) / np.double(np.sqrt(ifm_scale) * ofm_scale) + output_multiplier, output_shift = quantise_scale(scale) + + # Shift modification (value used in reference but Vela has opposite sign) + kshift = -20 + + ix = range(-128, 128) + quantized_min = min(ix) + quantized_max = max(ix) + + # Any value close to 0 (zero index in LUT) is mapped to the max output value + values = [quantized_max] + for x in ix: + if x == -128: + # Value already populated above + continue + x_real = x - zp_in + val = RSQRT_LUT[x_real] + val = fp_math.multiply_by_quantized_multiplier(val, output_multiplier, output_shift - kshift) + zp_out + lut_result = min(quantized_max, max(quantized_min, val)) + values.append(lut_result) + + return convert_to_lut(op, values, "rsqrt") |