Add the uint16_t data type

An unsigned 16-bit integer data type for use with image networks. Limited to only operating with the RESCALE operator for conversion to signed int16. Zero point can be 0 or 32768 in the RESCALE to allow for no loss of precision (by subtracting 32768), or keeping all values as positive, (zero point=0) with scaling/clipping as defined in the other RESCALE arguments. Change-Id: Id1aebab68fa207f8f8cc235fc3fa5d050307198e Signed-off-by: Eric Kunze <eric.kunze@arm.com>
author: Eric Kunze <eric.kunze@arm.com> 2022-05-13 14:54:06 -0700
committer: Eric Kunze <eric.kunze@arm.com> 2022-05-16 11:44:15 -0700
commit: eef012e19898ca86a8b9f0e6c1b2f30692bc6860 (patch)
tree: 4112426ff04a0e299d7fb541388a96a105558aaa
parent: 6de978203f071082afcc9090a6ca4c39e0273051 (diff)
download: specification-eef012e19898ca86a8b9f0e6c1b2f30692bc6860.tar.gz
2 files changed, 24 insertions, 6 deletions
diff --git a/chapters/introduction.adoc b/chapters/introduction.adoc
index 4263135..eafaaca 100644
--- a/chapters/introduction.adoc
+++ b/chapters/introduction.adoc
@@ -199,12 +199,12 @@ For details of interpreting the quantized data, see the <<Quantization Scaling>>
 |int4_t
 | -7
 | +7
-|Signed 4-bit two's-complement values. Excludes -8 to maintain a symmetric about zero range for weights.
+|Signed 4-bit two's-complement value. Excludes -8 to maintain a symmetric about zero range for weights.
 
 |int8_t
 | -128
 | +127
-|Signed 8-bit two's-complement values.
+|Signed 8-bit two's-complement value.
 
 |uint8_t
 | 0
@@ -214,7 +214,12 @@ For details of interpreting the quantized data, see the <<Quantization Scaling>>
 |int16_t
 | -32768
 | +32767
-|Signed 16-bit two's-complement values.
+|Signed 16-bit two's-complement value.
+
+|uint16_t
+| 0
+| 65535
+|Unsigned 16-bit value.
 
 |int32_t
 | -(1<<31)
diff --git a/chapters/type_conversion.adoc b/chapters/type_conversion.adoc
index 7d0682a..c19d834 100644
--- a/chapters/type_conversion.adoc
+++ b/chapters/type_conversion.adoc
@@ -96,8 +96,17 @@ Rescale quantized values into a new domain. This function scales by factor: mult
 [source,c++]
 ----
 for_each(index in shape) {
-    ERROR_IF(in_t != int8_t && in_t != uint8_t && input_zp != 0);
-    ERROR_IF(out_t != int8_t && out_t != uint8_t && output_zp != 0);
+    // uint16 values can have zero_point 0 or 32768
+    // int8/uint8 can have zero point within their valid range
+    // No other types can have zero point != 0
+    ERROR_IF(in_t != int8_t &&
+             in_t != uint8_t &&
+             in_t != uint16_t && input_zp != 0);
+    ERROR_IF(out_t != int8_t &&
+             out_t != uint8_t &&
+             out_t != uint16_t && output_zp != 0);
+    ERROR_IF(in_t == uint16_t && (input_zp != 0 || input_zp != 32768));
+    ERROR_IF(out_t == uint16_t && (output_zp != 0 || output_zp != 32768));
     ERROR_IF(scale32 && in_t == int48_t);
     ERROR_IF(!scale32 && double_round);
     int48_t value = tensor_read<in_t>(input, shape, index);
@@ -119,9 +128,12 @@ for_each(index in shape) {
 |Any|signed 8 to signed 8|int8_t|int8_t
 |Any|signed 8 to signed 16|int8_t|int16_t
 |Any|signed 8 to signed 32|int8_t|int32_t
+|Any|signed 8 to unsigned 8|int8_t|uint8_t
 |Any|signed 16 to signed 8|int16_t|int8_t
 |Any|signed 16 to signed 16|int16_t|int16_t
 |Any|signed 16 to signed 32|int16_t|int32_t
+|Any|signed 16 to unsigned 8|int16_t|uint8_t
+|Any|signed 16 to unsigned 16|int16_t|uint16_t
 |Any|signed 32 to signed 8|int32_t|int8_t
 |Any|signed 32 to signed 16|int32_t|int16_t
 |Any|signed 32 to signed 32|int32_t|int32_t
@@ -129,5 +141,6 @@ for_each(index in shape) {
 |Any|signed 48 to signed 16|int48_t|int16_t
 |Any|signed 48 to signed 32|int48_t|int32_t
 |Any|unsigned 8 to signed 8|uint8_t|int8_t
-|Any|signed 8 to unsigned 8|int8_t|uint8_t
+|Any|unsigned 8 to signed 16|uint8_t|int16_t
+|Any|unsigned 16 to signed 16|uint16_t|int16_t
 |===
author	Eric Kunze <eric.kunze@arm.com>	2022-05-13 14:54:06 -0700
committer	Eric Kunze <eric.kunze@arm.com>	2022-05-16 11:44:15 -0700
commit	eef012e19898ca86a8b9f0e6c1b2f30692bc6860 (patch)
tree	4112426ff04a0e299d7fb541388a96a105558aaa
parent	6de978203f071082afcc9090a6ca4c39e0273051 (diff)
download	specification-eef012e19898ca86a8b9f0e6c1b2f30692bc6860.tar.gz