diff options
author | Eric Kunze <eric.kunze@arm.com> | 2022-05-13 14:54:06 -0700 |
---|---|---|
committer | Eric Kunze <eric.kunze@arm.com> | 2022-05-16 11:44:15 -0700 |
commit | eef012e19898ca86a8b9f0e6c1b2f30692bc6860 (patch) | |
tree | 4112426ff04a0e299d7fb541388a96a105558aaa /chapters/type_conversion.adoc | |
parent | 6de978203f071082afcc9090a6ca4c39e0273051 (diff) | |
download | specification-eef012e19898ca86a8b9f0e6c1b2f30692bc6860.tar.gz |
Add the uint16_t data type
An unsigned 16-bit integer data type for use with image networks.
Limited to only operating with the RESCALE operator for conversion
to signed int16.
Zero point can be 0 or 32768 in the RESCALE to allow for no loss of
precision (by subtracting 32768), or keeping all values as positive,
(zero point=0) with scaling/clipping as defined in the other RESCALE
arguments.
Change-Id: Id1aebab68fa207f8f8cc235fc3fa5d050307198e
Signed-off-by: Eric Kunze <eric.kunze@arm.com>
Diffstat (limited to 'chapters/type_conversion.adoc')
-rw-r--r-- | chapters/type_conversion.adoc | 19 |
1 files changed, 16 insertions, 3 deletions
diff --git a/chapters/type_conversion.adoc b/chapters/type_conversion.adoc index 7d0682a..c19d834 100644 --- a/chapters/type_conversion.adoc +++ b/chapters/type_conversion.adoc @@ -96,8 +96,17 @@ Rescale quantized values into a new domain. This function scales by factor: mult [source,c++] ---- for_each(index in shape) { - ERROR_IF(in_t != int8_t && in_t != uint8_t && input_zp != 0); - ERROR_IF(out_t != int8_t && out_t != uint8_t && output_zp != 0); + // uint16 values can have zero_point 0 or 32768 + // int8/uint8 can have zero point within their valid range + // No other types can have zero point != 0 + ERROR_IF(in_t != int8_t && + in_t != uint8_t && + in_t != uint16_t && input_zp != 0); + ERROR_IF(out_t != int8_t && + out_t != uint8_t && + out_t != uint16_t && output_zp != 0); + ERROR_IF(in_t == uint16_t && (input_zp != 0 || input_zp != 32768)); + ERROR_IF(out_t == uint16_t && (output_zp != 0 || output_zp != 32768)); ERROR_IF(scale32 && in_t == int48_t); ERROR_IF(!scale32 && double_round); int48_t value = tensor_read<in_t>(input, shape, index); @@ -119,9 +128,12 @@ for_each(index in shape) { |Any|signed 8 to signed 8|int8_t|int8_t |Any|signed 8 to signed 16|int8_t|int16_t |Any|signed 8 to signed 32|int8_t|int32_t +|Any|signed 8 to unsigned 8|int8_t|uint8_t |Any|signed 16 to signed 8|int16_t|int8_t |Any|signed 16 to signed 16|int16_t|int16_t |Any|signed 16 to signed 32|int16_t|int32_t +|Any|signed 16 to unsigned 8|int16_t|uint8_t +|Any|signed 16 to unsigned 16|int16_t|uint16_t |Any|signed 32 to signed 8|int32_t|int8_t |Any|signed 32 to signed 16|int32_t|int16_t |Any|signed 32 to signed 32|int32_t|int32_t @@ -129,5 +141,6 @@ for_each(index in shape) { |Any|signed 48 to signed 16|int48_t|int16_t |Any|signed 48 to signed 32|int48_t|int32_t |Any|unsigned 8 to signed 8|uint8_t|int8_t -|Any|signed 8 to unsigned 8|int8_t|uint8_t +|Any|unsigned 8 to signed 16|uint8_t|int16_t +|Any|unsigned 16 to signed 16|uint16_t|int16_t |=== |