aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Kunze <eric.kunze@arm.com>2022-05-13 14:54:06 -0700
committerEric Kunze <eric.kunze@arm.com>2022-05-16 11:44:15 -0700
commiteef012e19898ca86a8b9f0e6c1b2f30692bc6860 (patch)
tree4112426ff04a0e299d7fb541388a96a105558aaa
parent6de978203f071082afcc9090a6ca4c39e0273051 (diff)
downloadspecification-eef012e19898ca86a8b9f0e6c1b2f30692bc6860.tar.gz
Add the uint16_t data type
An unsigned 16-bit integer data type for use with image networks. Limited to only operating with the RESCALE operator for conversion to signed int16. Zero point can be 0 or 32768 in the RESCALE to allow for no loss of precision (by subtracting 32768), or keeping all values as positive, (zero point=0) with scaling/clipping as defined in the other RESCALE arguments. Change-Id: Id1aebab68fa207f8f8cc235fc3fa5d050307198e Signed-off-by: Eric Kunze <eric.kunze@arm.com>
-rw-r--r--chapters/introduction.adoc11
-rw-r--r--chapters/type_conversion.adoc19
2 files changed, 24 insertions, 6 deletions
diff --git a/chapters/introduction.adoc b/chapters/introduction.adoc
index 4263135..eafaaca 100644
--- a/chapters/introduction.adoc
+++ b/chapters/introduction.adoc
@@ -199,12 +199,12 @@ For details of interpreting the quantized data, see the <<Quantization Scaling>>
|int4_t
| -7
| +7
-|Signed 4-bit two's-complement values. Excludes -8 to maintain a symmetric about zero range for weights.
+|Signed 4-bit two's-complement value. Excludes -8 to maintain a symmetric about zero range for weights.
|int8_t
| -128
| +127
-|Signed 8-bit two's-complement values.
+|Signed 8-bit two's-complement value.
|uint8_t
| 0
@@ -214,7 +214,12 @@ For details of interpreting the quantized data, see the <<Quantization Scaling>>
|int16_t
| -32768
| +32767
-|Signed 16-bit two's-complement values.
+|Signed 16-bit two's-complement value.
+
+|uint16_t
+| 0
+| 65535
+|Unsigned 16-bit value.
|int32_t
| -(1<<31)
diff --git a/chapters/type_conversion.adoc b/chapters/type_conversion.adoc
index 7d0682a..c19d834 100644
--- a/chapters/type_conversion.adoc
+++ b/chapters/type_conversion.adoc
@@ -96,8 +96,17 @@ Rescale quantized values into a new domain. This function scales by factor: mult
[source,c++]
----
for_each(index in shape) {
- ERROR_IF(in_t != int8_t && in_t != uint8_t && input_zp != 0);
- ERROR_IF(out_t != int8_t && out_t != uint8_t && output_zp != 0);
+ // uint16 values can have zero_point 0 or 32768
+ // int8/uint8 can have zero point within their valid range
+ // No other types can have zero point != 0
+ ERROR_IF(in_t != int8_t &&
+ in_t != uint8_t &&
+ in_t != uint16_t && input_zp != 0);
+ ERROR_IF(out_t != int8_t &&
+ out_t != uint8_t &&
+ out_t != uint16_t && output_zp != 0);
+ ERROR_IF(in_t == uint16_t && (input_zp != 0 || input_zp != 32768));
+ ERROR_IF(out_t == uint16_t && (output_zp != 0 || output_zp != 32768));
ERROR_IF(scale32 && in_t == int48_t);
ERROR_IF(!scale32 && double_round);
int48_t value = tensor_read<in_t>(input, shape, index);
@@ -119,9 +128,12 @@ for_each(index in shape) {
|Any|signed 8 to signed 8|int8_t|int8_t
|Any|signed 8 to signed 16|int8_t|int16_t
|Any|signed 8 to signed 32|int8_t|int32_t
+|Any|signed 8 to unsigned 8|int8_t|uint8_t
|Any|signed 16 to signed 8|int16_t|int8_t
|Any|signed 16 to signed 16|int16_t|int16_t
|Any|signed 16 to signed 32|int16_t|int32_t
+|Any|signed 16 to unsigned 8|int16_t|uint8_t
+|Any|signed 16 to unsigned 16|int16_t|uint16_t
|Any|signed 32 to signed 8|int32_t|int8_t
|Any|signed 32 to signed 16|int32_t|int16_t
|Any|signed 32 to signed 32|int32_t|int32_t
@@ -129,5 +141,6 @@ for_each(index in shape) {
|Any|signed 48 to signed 16|int48_t|int16_t
|Any|signed 48 to signed 32|int48_t|int32_t
|Any|unsigned 8 to signed 8|uint8_t|int8_t
-|Any|signed 8 to unsigned 8|int8_t|uint8_t
+|Any|unsigned 8 to signed 16|uint8_t|int16_t
+|Any|unsigned 16 to signed 16|uint16_t|int16_t
|===