aboutsummaryrefslogtreecommitdiff
path: root/ethosu
diff options
context:
space:
mode:
Diffstat (limited to 'ethosu')
-rw-r--r--ethosu/vela/graph_optimiser_util.py13
-rw-r--r--ethosu/vela/softmax.py10
-rw-r--r--ethosu/vela/test/test_lut.py2
3 files changed, 18 insertions, 7 deletions
diff --git a/ethosu/vela/graph_optimiser_util.py b/ethosu/vela/graph_optimiser_util.py
index 2822feb8..24a55836 100644
--- a/ethosu/vela/graph_optimiser_util.py
+++ b/ethosu/vela/graph_optimiser_util.py
@@ -417,7 +417,8 @@ def convert_depthwise_to_conv(op, arch, nng):
def convert_to_lut(op, lut_values, lut_name):
# Rewrite the operation by Add with scalar 0 + LUT activation
- ifm = op.inputs[0]
+ ifm = op.ifm
+ ofm = op.ofm
if ifm is None:
return op
assert ifm.dtype.size_in_bytes() == 1
@@ -429,7 +430,7 @@ def convert_to_lut(op, lut_values, lut_name):
quantization = QuantizationParameters(0.0, 255.0)
quantization.scale_f32 = ifm.quantization.scale_f32
quantization.zero_point = 0
- tens = create_const_tensor(op.inputs[0].name + "_scalar0", [], ifm.dtype, [0], quantization=quantization)
+ tens = create_const_tensor(ifm.name + "_scalar0", [], ifm.dtype, [0], quantization=quantization)
op.add_input_tensor(tens)
op.ifm_shapes.append(Shape4D(tens.shape)) # TODO no shape?
@@ -437,7 +438,13 @@ def convert_to_lut(op, lut_values, lut_name):
# so even if the OFM has a different scale than the IFM, the generated OFM scale instructions
# should be the same as the IFM
op.forced_output_quantization = ifm.quantization
- lut_tensor = lut.create_lut_tensor(op.name + "_values", lut_values, DataType.int8)
+
+ # the lut tensor datatype needs to match both; the ofm datatype, because these are the values output; and the
+ # datatype used to generate the lut values (which is probably the ifm datatype), because we want to avoid any
+ # potential overflow errors in create_lut_tensor() caused by converting Python int (which could represent a uint)
+ # to NumPy int. this can be guaranteed by checking that the ifm and ofm datatypes are the same
+ assert ifm.dtype == ofm.dtype
+ lut_tensor = lut.create_lut_tensor(op.name + "_values", lut_values, ofm.dtype)
op.set_activation_lut(lut_tensor)
op.set_ifm_ofm_shapes()
DebugDatabase.add_optimised(op, op)
diff --git a/ethosu/vela/softmax.py b/ethosu/vela/softmax.py
index 575e1e66..5a06c1bd 100644
--- a/ethosu/vela/softmax.py
+++ b/ethosu/vela/softmax.py
@@ -270,7 +270,7 @@ class SoftMax:
ifm2_shape=ifm_max_shape,
)
sub_op.set_activation_lut(
- create_const_tensor(f"{sub_op.name}_exp_lut", [1, 1, 1, 256], DataType.int32, exp_lut, TensorPurpose.LUT)
+ create_const_tensor(f"{sub_op.name}_exp_lut", [1, 1, 1, 256], DataType.uint32, exp_lut, TensorPurpose.LUT)
)
ifm_exp = add_op_get_ofm(sub_op)
# Note: activation.min/max are non-quantized values
@@ -505,8 +505,10 @@ class SoftMax:
f"{name}_const", [1, 1, 1, 1], DataType.int32, [32767], quantization=no_scale_quant
)
add_op = create_add(name, mul2_ofm, const_add, mul2_ofm.quantization.clone(), dtype=DataType.int16)
+ # lut activation values are int32 type however they are defined as Python ints. If these are converted to
+ # numpy.int32 it could result in an overflow error. Therefore, they are forced to uint32 to avoid this
add_op.set_activation_lut(
- create_const_tensor(f"{name}_exp_lut", [1, 1, 1, 512], DataType.int32, self.EXP_LUT, TensorPurpose.LUT)
+ create_const_tensor(f"{name}_exp_lut", [1, 1, 1, 512], DataType.uint32, self.EXP_LUT, TensorPurpose.LUT)
)
ifm_exp = add_op_get_ofm(add_op)
@@ -550,11 +552,13 @@ class SoftMax:
f"{name}_const", [1, 1, 1, 1], DataType.int32, [32768], quantization=no_scale_quant
)
sub11_op = create_sub(name, shifted_sum_minus_one_16, sub11_const, no_scale_quant, dtype=DataType.int16)
+ # lut activation values are int32 type however they are defined as Python ints. If these are converted to
+ # numpy.int32 it could result in an overflow error. Therefore, they are forced to uint32 to avoid this
sub11_op.set_activation_lut(
create_const_tensor(
f"{name}_one_over_one_plus_x_lut",
[1, 1, 1, 512],
- DataType.int32,
+ DataType.uint32,
self.ONE_OVER_ONE_PLUS_X_LUT,
TensorPurpose.LUT,
)
diff --git a/ethosu/vela/test/test_lut.py b/ethosu/vela/test/test_lut.py
index 712be7a2..58e72bbf 100644
--- a/ethosu/vela/test/test_lut.py
+++ b/ethosu/vela/test/test_lut.py
@@ -35,7 +35,7 @@ from ethosu.vela.test import testutil
def set_256_lut(op, key, arch):
random.seed(key)
values = random.choices(range(256), k=256)
- lut_tensor = create_const_tensor(op.name + "_lut", [1, 1, 1, 256], DataType.int8, values, TensorPurpose.LUT)
+ lut_tensor = create_const_tensor(op.name + "_lut", [1, 1, 1, 256], DataType.uint8, values, TensorPurpose.LUT)
scratch_lut_tensor = lut_tensor.clone_into_fast_storage(arch)
op.set_activation_lut(scratch_lut_tensor)