diff options
author | Tim Hall <tim.hall@arm.com> | 2023-01-26 17:27:00 +0000 |
---|---|---|
committer | Tim Hall <tim.hall@arm.com> | 2023-02-09 12:19:46 +0000 |
commit | 1c5904891b51ff8fa90c7fafbd067b39655d1505 (patch) | |
tree | 5d1d432424b315103931b67f5fcc601b8d99a695 | |
parent | 090f18a55fcd4f7ae8ca1ae633418d05c62cbb6e (diff) | |
download | ethos-u-vela-1c5904891b51ff8fa90c7fafbd067b39655d1505.tar.gz |
MLBEDSW-7281: create_const_tensor OverflowError on Microsoft Windows
- Additional overflow checks are performed when running under
Microsoft Windows compared to Linux. These checks happen when
converting from Python int to NumPy int/uint
- The problem is that the lut activation values are int32 type,
however they are defined as Python ints. If these are converted to
numpy.int32 it could result in an overflow error
- The fix is to convert these values to uint32 but keep the
operator's IFM tensor type the same (as this will allow them to be
interpreted correctly)
- Fixing this highlighted another problem where convert_to_lut
always calls create_lut_tensor() with an int8 datatype, whereas it
should be using the IFM datatype
Change-Id: I781a9d850f654267aa4a67754438607c4bb95685
Signed-off-by: Tim Hall <tim.hall@arm.com>
-rw-r--r-- | ethosu/vela/graph_optimiser_util.py | 13 | ||||
-rw-r--r-- | ethosu/vela/softmax.py | 10 | ||||
-rw-r--r-- | ethosu/vela/test/test_lut.py | 2 |
3 files changed, 18 insertions, 7 deletions
diff --git a/ethosu/vela/graph_optimiser_util.py b/ethosu/vela/graph_optimiser_util.py index 2822feb8..24a55836 100644 --- a/ethosu/vela/graph_optimiser_util.py +++ b/ethosu/vela/graph_optimiser_util.py @@ -417,7 +417,8 @@ def convert_depthwise_to_conv(op, arch, nng): def convert_to_lut(op, lut_values, lut_name): # Rewrite the operation by Add with scalar 0 + LUT activation - ifm = op.inputs[0] + ifm = op.ifm + ofm = op.ofm if ifm is None: return op assert ifm.dtype.size_in_bytes() == 1 @@ -429,7 +430,7 @@ def convert_to_lut(op, lut_values, lut_name): quantization = QuantizationParameters(0.0, 255.0) quantization.scale_f32 = ifm.quantization.scale_f32 quantization.zero_point = 0 - tens = create_const_tensor(op.inputs[0].name + "_scalar0", [], ifm.dtype, [0], quantization=quantization) + tens = create_const_tensor(ifm.name + "_scalar0", [], ifm.dtype, [0], quantization=quantization) op.add_input_tensor(tens) op.ifm_shapes.append(Shape4D(tens.shape)) # TODO no shape? @@ -437,7 +438,13 @@ def convert_to_lut(op, lut_values, lut_name): # so even if the OFM has a different scale than the IFM, the generated OFM scale instructions # should be the same as the IFM op.forced_output_quantization = ifm.quantization - lut_tensor = lut.create_lut_tensor(op.name + "_values", lut_values, DataType.int8) + + # the lut tensor datatype needs to match both; the ofm datatype, because these are the values output; and the + # datatype used to generate the lut values (which is probably the ifm datatype), because we want to avoid any + # potential overflow errors in create_lut_tensor() caused by converting Python int (which could represent a uint) + # to NumPy int. this can be guaranteed by checking that the ifm and ofm datatypes are the same + assert ifm.dtype == ofm.dtype + lut_tensor = lut.create_lut_tensor(op.name + "_values", lut_values, ofm.dtype) op.set_activation_lut(lut_tensor) op.set_ifm_ofm_shapes() DebugDatabase.add_optimised(op, op) diff --git a/ethosu/vela/softmax.py b/ethosu/vela/softmax.py index 575e1e66..5a06c1bd 100644 --- a/ethosu/vela/softmax.py +++ b/ethosu/vela/softmax.py @@ -270,7 +270,7 @@ class SoftMax: ifm2_shape=ifm_max_shape, ) sub_op.set_activation_lut( - create_const_tensor(f"{sub_op.name}_exp_lut", [1, 1, 1, 256], DataType.int32, exp_lut, TensorPurpose.LUT) + create_const_tensor(f"{sub_op.name}_exp_lut", [1, 1, 1, 256], DataType.uint32, exp_lut, TensorPurpose.LUT) ) ifm_exp = add_op_get_ofm(sub_op) # Note: activation.min/max are non-quantized values @@ -505,8 +505,10 @@ class SoftMax: f"{name}_const", [1, 1, 1, 1], DataType.int32, [32767], quantization=no_scale_quant ) add_op = create_add(name, mul2_ofm, const_add, mul2_ofm.quantization.clone(), dtype=DataType.int16) + # lut activation values are int32 type however they are defined as Python ints. If these are converted to + # numpy.int32 it could result in an overflow error. Therefore, they are forced to uint32 to avoid this add_op.set_activation_lut( - create_const_tensor(f"{name}_exp_lut", [1, 1, 1, 512], DataType.int32, self.EXP_LUT, TensorPurpose.LUT) + create_const_tensor(f"{name}_exp_lut", [1, 1, 1, 512], DataType.uint32, self.EXP_LUT, TensorPurpose.LUT) ) ifm_exp = add_op_get_ofm(add_op) @@ -550,11 +552,13 @@ class SoftMax: f"{name}_const", [1, 1, 1, 1], DataType.int32, [32768], quantization=no_scale_quant ) sub11_op = create_sub(name, shifted_sum_minus_one_16, sub11_const, no_scale_quant, dtype=DataType.int16) + # lut activation values are int32 type however they are defined as Python ints. If these are converted to + # numpy.int32 it could result in an overflow error. Therefore, they are forced to uint32 to avoid this sub11_op.set_activation_lut( create_const_tensor( f"{name}_one_over_one_plus_x_lut", [1, 1, 1, 512], - DataType.int32, + DataType.uint32, self.ONE_OVER_ONE_PLUS_X_LUT, TensorPurpose.LUT, ) diff --git a/ethosu/vela/test/test_lut.py b/ethosu/vela/test/test_lut.py index 712be7a2..58e72bbf 100644 --- a/ethosu/vela/test/test_lut.py +++ b/ethosu/vela/test/test_lut.py @@ -35,7 +35,7 @@ from ethosu.vela.test import testutil def set_256_lut(op, key, arch): random.seed(key) values = random.choices(range(256), k=256) - lut_tensor = create_const_tensor(op.name + "_lut", [1, 1, 1, 256], DataType.int8, values, TensorPurpose.LUT) + lut_tensor = create_const_tensor(op.name + "_lut", [1, 1, 1, 256], DataType.uint8, values, TensorPurpose.LUT) scratch_lut_tensor = lut_tensor.clone_into_fast_storage(arch) op.set_activation_lut(scratch_lut_tensor) |