aboutsummaryrefslogtreecommitdiff
path: root/chapters
diff options
context:
space:
mode:
authorEric Kunze <eric.kunze@arm.com>2024-01-22 16:54:29 -0800
committerEric Kunze <eric.kunze@arm.com>2024-01-25 13:19:44 -0800
commit608588390a90e6e337a592b62cf811d1ea8ea0fd (patch)
treece73d71540cf6e58fb94b6a12b90d5c8c04cdd4c /chapters
parent0808d63e4db076fbbd02e806272ae4ccae6ed72c (diff)
downloadspecification-608588390a90e6e337a592b62cf811d1ea8ea0fd.tar.gz
Handle CAST from fp-type to narrow integer types
Be explicit that the value is converted to int32 and then clamped to the output type range. Also remove use of apply_clip, replacing with either apply_clip_s or apply_clip_u Signed-off-by: Eric Kunze <eric.kunze@arm.com> Change-Id: I0efc7f4d653c1bef13c9989cfe2647208dad9617
Diffstat (limited to 'chapters')
-rw-r--r--chapters/introduction.adoc2
-rw-r--r--chapters/pseudocode.adoc15
2 files changed, 16 insertions, 1 deletions
diff --git a/chapters/introduction.adoc b/chapters/introduction.adoc
index 8b5be21..ae5c7b1 100644
--- a/chapters/introduction.adoc
+++ b/chapters/introduction.adoc
@@ -739,7 +739,7 @@ void generate_lookup_table(int16_t *table, int32_t (*reference)(int32_t))
{
for (int i = -256; i <= 256; i++) {
int32_t value = (*reference)(i);
- table[i + 256] = static_cast<int16_t>(apply_clip<int32_t>(value, -32768, +32767));
+ table[i + 256] = static_cast<int16_t>(apply_clip_s<int32_t>(value, -32768, +32767));
}
}
----
diff --git a/chapters/pseudocode.adoc b/chapters/pseudocode.adoc
index 0de7f39..9e3b7bd 100644
--- a/chapters/pseudocode.adoc
+++ b/chapters/pseudocode.adoc
@@ -262,6 +262,13 @@ in_t apply_clip_s<in_t>(in_t value, in_t min_val, in_t max_val) {
return value;
}
+in_t apply_clip_u<in_t>(in_t value, in_t min_val, in_t max_val) {
+ REQUIRE(zero_extend<int64_t>(min_val) <= zero_extend<int64_t>(max_val));
+ value = apply_max_u<in_t>(value, min_val);
+ value = apply_min_u<in_t>(value, max_val);
+ return value;
+}
+
in_t apply_exp<in_t>(in_t input) {
return e to the power input
}
@@ -296,6 +303,10 @@ in_t apply_max_s<in_t>(in_t a, in_t b) {
if (sign_extend<int64_t>(a) >= sign_extend<int64_t>(b)) return a; else return b;
}
+in_t apply_max_u<in_t>(in_t a, in_t b) {
+ if (zero_extend<uint64_t>(a) >= zero_extend<int64_t>(b)) return a; else return b;
+}
+
in_t apply_min_s<in_t>(in_t a, in_t b) {
if (is_floating_point(in_t)) {
if (isNaN(a) || isNaN(b)) {
@@ -307,6 +318,10 @@ in_t apply_min_s<in_t>(in_t a, in_t b) {
if (sign_extend<int64_t>(a) < sign_extend<int64_t>(b)) return a; else return b;
}
+in_t apply_min_u<in_t>(in_t a, in_t b) {
+ if (zero_extend<int64_t>(a) < zero_extend<int64_t>(b)) return a; else return b;
+}
+
in_t apply_mul_s<in_t>(in_t a, in_t b) {
if (is_floating_point(in_t)) return a * b;
int64_t c = sign_extend<int64_t>(a) * sign_extend<int64_t>(b);