aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim Hall <tim.hall@arm.com>2023-01-26 17:27:00 +0000
committerTim Hall <tim.hall@arm.com>2023-02-09 12:19:46 +0000
commit1c5904891b51ff8fa90c7fafbd067b39655d1505 (patch)
tree5d1d432424b315103931b67f5fcc601b8d99a695
parent090f18a55fcd4f7ae8ca1ae633418d05c62cbb6e (diff)
downloadethos-u-vela-1c5904891b51ff8fa90c7fafbd067b39655d1505.tar.gz
MLBEDSW-7281: create_const_tensor OverflowError on Microsoft Windows
- Additional overflow checks are performed when running under Microsoft Windows compared to Linux. These checks happen when converting from Python int to NumPy int/uint - The problem is that the lut activation values are int32 type, however they are defined as Python ints. If these are converted to numpy.int32 it could result in an overflow error - The fix is to convert these values to uint32 but keep the operator's IFM tensor type the same (as this will allow them to be interpreted correctly) - Fixing this highlighted another problem where convert_to_lut always calls create_lut_tensor() with an int8 datatype, whereas it should be using the IFM datatype Change-Id: I781a9d850f654267aa4a67754438607c4bb95685 Signed-off-by: Tim Hall <tim.hall@arm.com>
-rw-r--r--ethosu/vela/graph_optimiser_util.py13
-rw-r--r--ethosu/vela/softmax.py10
-rw-r--r--ethosu/vela/test/test_lut.py2
3 files changed, 18 insertions, 7 deletions
diff --git a/ethosu/vela/graph_optimiser_util.py b/ethosu/vela/graph_optimiser_util.py
index 2822feb8..24a55836 100644
--- a/ethosu/vela/graph_optimiser_util.py
+++ b/ethosu/vela/graph_optimiser_util.py
@@ -417,7 +417,8 @@ def convert_depthwise_to_conv(op, arch, nng):
def convert_to_lut(op, lut_values, lut_name):
# Rewrite the operation by Add with scalar 0 + LUT activation
- ifm = op.inputs[0]
+ ifm = op.ifm
+ ofm = op.ofm
if ifm is None:
return op
assert ifm.dtype.size_in_bytes() == 1
@@ -429,7 +430,7 @@ def convert_to_lut(op, lut_values, lut_name):
quantization = QuantizationParameters(0.0, 255.0)
quantization.scale_f32 = ifm.quantization.scale_f32
quantization.zero_point = 0
- tens = create_const_tensor(op.inputs[0].name + "_scalar0", [], ifm.dtype, [0], quantization=quantization)
+ tens = create_const_tensor(ifm.name + "_scalar0", [], ifm.dtype, [0], quantization=quantization)
op.add_input_tensor(tens)
op.ifm_shapes.append(Shape4D(tens.shape)) # TODO no shape?
@@ -437,7 +438,13 @@ def convert_to_lut(op, lut_values, lut_name):
# so even if the OFM has a different scale than the IFM, the generated OFM scale instructions
# should be the same as the IFM
op.forced_output_quantization = ifm.quantization
- lut_tensor = lut.create_lut_tensor(op.name + "_values", lut_values, DataType.int8)
+
+ # the lut tensor datatype needs to match both; the ofm datatype, because these are the values output; and the
+ # datatype used to generate the lut values (which is probably the ifm datatype), because we want to avoid any
+ # potential overflow errors in create_lut_tensor() caused by converting Python int (which could represent a uint)
+ # to NumPy int. this can be guaranteed by checking that the ifm and ofm datatypes are the same
+ assert ifm.dtype == ofm.dtype
+ lut_tensor = lut.create_lut_tensor(op.name + "_values", lut_values, ofm.dtype)
op.set_activation_lut(lut_tensor)
op.set_ifm_ofm_shapes()
DebugDatabase.add_optimised(op, op)
diff --git a/ethosu/vela/softmax.py b/ethosu/vela/softmax.py
index 575e1e66..5a06c1bd 100644
--- a/ethosu/vela/softmax.py
+++ b/ethosu/vela/softmax.py
@@ -270,7 +270,7 @@ class SoftMax:
ifm2_shape=ifm_max_shape,
)
sub_op.set_activation_lut(
- create_const_tensor(f"{sub_op.name}_exp_lut", [1, 1, 1, 256], DataType.int32, exp_lut, TensorPurpose.LUT)
+ create_const_tensor(f"{sub_op.name}_exp_lut", [1, 1, 1, 256], DataType.uint32, exp_lut, TensorPurpose.LUT)
)
ifm_exp = add_op_get_ofm(sub_op)
# Note: activation.min/max are non-quantized values
@@ -505,8 +505,10 @@ class SoftMax:
f"{name}_const", [1, 1, 1, 1], DataType.int32, [32767], quantization=no_scale_quant
)
add_op = create_add(name, mul2_ofm, const_add, mul2_ofm.quantization.clone(), dtype=DataType.int16)
+ # lut activation values are int32 type however they are defined as Python ints. If these are converted to
+ # numpy.int32 it could result in an overflow error. Therefore, they are forced to uint32 to avoid this
add_op.set_activation_lut(
- create_const_tensor(f"{name}_exp_lut", [1, 1, 1, 512], DataType.int32, self.EXP_LUT, TensorPurpose.LUT)
+ create_const_tensor(f"{name}_exp_lut", [1, 1, 1, 512], DataType.uint32, self.EXP_LUT, TensorPurpose.LUT)
)
ifm_exp = add_op_get_ofm(add_op)
@@ -550,11 +552,13 @@ class SoftMax:
f"{name}_const", [1, 1, 1, 1], DataType.int32, [32768], quantization=no_scale_quant
)
sub11_op = create_sub(name, shifted_sum_minus_one_16, sub11_const, no_scale_quant, dtype=DataType.int16)
+ # lut activation values are int32 type however they are defined as Python ints. If these are converted to
+ # numpy.int32 it could result in an overflow error. Therefore, they are forced to uint32 to avoid this
sub11_op.set_activation_lut(
create_const_tensor(
f"{name}_one_over_one_plus_x_lut",
[1, 1, 1, 512],
- DataType.int32,
+ DataType.uint32,
self.ONE_OVER_ONE_PLUS_X_LUT,
TensorPurpose.LUT,
)
diff --git a/ethosu/vela/test/test_lut.py b/ethosu/vela/test/test_lut.py
index 712be7a2..58e72bbf 100644
--- a/ethosu/vela/test/test_lut.py
+++ b/ethosu/vela/test/test_lut.py
@@ -35,7 +35,7 @@ from ethosu.vela.test import testutil
def set_256_lut(op, key, arch):
random.seed(key)
values = random.choices(range(256), k=256)
- lut_tensor = create_const_tensor(op.name + "_lut", [1, 1, 1, 256], DataType.int8, values, TensorPurpose.LUT)
+ lut_tensor = create_const_tensor(op.name + "_lut", [1, 1, 1, 256], DataType.uint8, values, TensorPurpose.LUT)
scratch_lut_tensor = lut_tensor.clone_into_fast_storage(arch)
op.set_activation_lut(scratch_lut_tensor)