From 608588390a90e6e337a592b62cf811d1ea8ea0fd Mon Sep 17 00:00:00 2001 From: Eric Kunze Date: Mon, 22 Jan 2024 16:54:29 -0800 Subject: Handle CAST from fp-type to narrow integer types Be explicit that the value is converted to int32 and then clamped to the output type range. Also remove use of apply_clip, replacing with either apply_clip_s or apply_clip_u Signed-off-by: Eric Kunze Change-Id: I0efc7f4d653c1bef13c9989cfe2647208dad9617 --- chapters/introduction.adoc | 2 +- chapters/pseudocode.adoc | 15 +++++++++++++++ pseudocode/operators/CAST.tosac | 4 ++-- pseudocode/operators/CLAMP.tosac | 2 +- pseudocode/operators/RESCALE.tosac | 4 ++-- 5 files changed, 21 insertions(+), 6 deletions(-) diff --git a/chapters/introduction.adoc b/chapters/introduction.adoc index 8b5be21..ae5c7b1 100644 --- a/chapters/introduction.adoc +++ b/chapters/introduction.adoc @@ -739,7 +739,7 @@ void generate_lookup_table(int16_t *table, int32_t (*reference)(int32_t)) { for (int i = -256; i <= 256; i++) { int32_t value = (*reference)(i); - table[i + 256] = static_cast(apply_clip(value, -32768, +32767)); + table[i + 256] = static_cast(apply_clip_s(value, -32768, +32767)); } } ---- diff --git a/chapters/pseudocode.adoc b/chapters/pseudocode.adoc index 0de7f39..9e3b7bd 100644 --- a/chapters/pseudocode.adoc +++ b/chapters/pseudocode.adoc @@ -262,6 +262,13 @@ in_t apply_clip_s(in_t value, in_t min_val, in_t max_val) { return value; } +in_t apply_clip_u(in_t value, in_t min_val, in_t max_val) { + REQUIRE(zero_extend(min_val) <= zero_extend(max_val)); + value = apply_max_u(value, min_val); + value = apply_min_u(value, max_val); + return value; +} + in_t apply_exp(in_t input) { return e to the power input } @@ -296,6 +303,10 @@ in_t apply_max_s(in_t a, in_t b) { if (sign_extend(a) >= sign_extend(b)) return a; else return b; } +in_t apply_max_u(in_t a, in_t b) { + if (zero_extend(a) >= zero_extend(b)) return a; else return b; +} + in_t apply_min_s(in_t a, in_t b) { if (is_floating_point(in_t)) { if (isNaN(a) || isNaN(b)) { @@ -307,6 +318,10 @@ in_t apply_min_s(in_t a, in_t b) { if (sign_extend(a) < sign_extend(b)) return a; else return b; } +in_t apply_min_u(in_t a, in_t b) { + if (zero_extend(a) < zero_extend(b)) return a; else return b; +} + in_t apply_mul_s(in_t a, in_t b) { if (is_floating_point(in_t)) return a * b; int64_t c = sign_extend(a) * sign_extend(b); diff --git a/pseudocode/operators/CAST.tosac b/pseudocode/operators/CAST.tosac index 8d816ca..fac73e3 100644 --- a/pseudocode/operators/CAST.tosac +++ b/pseudocode/operators/CAST.tosac @@ -17,11 +17,11 @@ for_each(index in shape) { } else if (out_t == fp16_t || out_t == bf16_t || out_t == fp32_t) { out = round_to_nearest_float(in); } else if (in_t == fp16_t || in_t == bf16_t || in_t == fp32_t) { - out = apply_clip(round_to_nearest_int(in), minimum, maximum); + out = truncate(apply_clip_s(round_to_nearest_int(in), minimum, maximum)); } else if (sizeof(out_t) >= sizeof(in_t)) { out = sign_extend(in); } else { - out = truncate(in); + out = truncate(in); } tensor_write(output, shape, index, out); } diff --git a/pseudocode/operators/CLAMP.tosac b/pseudocode/operators/CLAMP.tosac index 7a26d50..cfac632 100644 --- a/pseudocode/operators/CLAMP.tosac +++ b/pseudocode/operators/CLAMP.tosac @@ -10,6 +10,6 @@ ERROR_IF(max_val < min_val); for_each(index in shape) { in_out_t value = tensor_read(input, shape, index); - value = apply_clip(value, min_val, max_val); + value = apply_clip_s(value, min_val, max_val); tensor_write(output, shape, index, value); } diff --git a/pseudocode/operators/RESCALE.tosac b/pseudocode/operators/RESCALE.tosac index c29bddd..3ebb8d6 100644 --- a/pseudocode/operators/RESCALE.tosac +++ b/pseudocode/operators/RESCALE.tosac @@ -43,14 +43,14 @@ for_each(index in shape) { if (output_unsigned) { int32_t extended_out_zp = zero_extend(output_zp); result = apply_add_s(result, extended_out_zp); - out_t out = static_cast(apply_clip(result, + out_t out = static_cast(apply_clip_u(result, minimum_u, maximum_u)); } else { int32_t extended_out_zp = sign_extend(output_zp); result = apply_add_s(result, extended_out_zp); - out_t out = static_cast(apply_clip(result, + out_t out = static_cast(apply_clip_s(result, minimum_s, maximum_s)); } -- cgit v1.2.1