From eef012e19898ca86a8b9f0e6c1b2f30692bc6860 Mon Sep 17 00:00:00 2001 From: Eric Kunze Date: Fri, 13 May 2022 14:54:06 -0700 Subject: Add the uint16_t data type An unsigned 16-bit integer data type for use with image networks. Limited to only operating with the RESCALE operator for conversion to signed int16. Zero point can be 0 or 32768 in the RESCALE to allow for no loss of precision (by subtracting 32768), or keeping all values as positive, (zero point=0) with scaling/clipping as defined in the other RESCALE arguments. Change-Id: Id1aebab68fa207f8f8cc235fc3fa5d050307198e Signed-off-by: Eric Kunze --- chapters/introduction.adoc | 11 ++++++++--- chapters/type_conversion.adoc | 19 ++++++++++++++++--- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/chapters/introduction.adoc b/chapters/introduction.adoc index 4263135..eafaaca 100644 --- a/chapters/introduction.adoc +++ b/chapters/introduction.adoc @@ -199,12 +199,12 @@ For details of interpreting the quantized data, see the <> |int4_t | -7 | +7 -|Signed 4-bit two's-complement values. Excludes -8 to maintain a symmetric about zero range for weights. +|Signed 4-bit two's-complement value. Excludes -8 to maintain a symmetric about zero range for weights. |int8_t | -128 | +127 -|Signed 8-bit two's-complement values. +|Signed 8-bit two's-complement value. |uint8_t | 0 @@ -214,7 +214,12 @@ For details of interpreting the quantized data, see the <> |int16_t | -32768 | +32767 -|Signed 16-bit two's-complement values. +|Signed 16-bit two's-complement value. + +|uint16_t +| 0 +| 65535 +|Unsigned 16-bit value. |int32_t | -(1<<31) diff --git a/chapters/type_conversion.adoc b/chapters/type_conversion.adoc index 7d0682a..c19d834 100644 --- a/chapters/type_conversion.adoc +++ b/chapters/type_conversion.adoc @@ -96,8 +96,17 @@ Rescale quantized values into a new domain. This function scales by factor: mult [source,c++] ---- for_each(index in shape) { - ERROR_IF(in_t != int8_t && in_t != uint8_t && input_zp != 0); - ERROR_IF(out_t != int8_t && out_t != uint8_t && output_zp != 0); + // uint16 values can have zero_point 0 or 32768 + // int8/uint8 can have zero point within their valid range + // No other types can have zero point != 0 + ERROR_IF(in_t != int8_t && + in_t != uint8_t && + in_t != uint16_t && input_zp != 0); + ERROR_IF(out_t != int8_t && + out_t != uint8_t && + out_t != uint16_t && output_zp != 0); + ERROR_IF(in_t == uint16_t && (input_zp != 0 || input_zp != 32768)); + ERROR_IF(out_t == uint16_t && (output_zp != 0 || output_zp != 32768)); ERROR_IF(scale32 && in_t == int48_t); ERROR_IF(!scale32 && double_round); int48_t value = tensor_read(input, shape, index); @@ -119,9 +128,12 @@ for_each(index in shape) { |Any|signed 8 to signed 8|int8_t|int8_t |Any|signed 8 to signed 16|int8_t|int16_t |Any|signed 8 to signed 32|int8_t|int32_t +|Any|signed 8 to unsigned 8|int8_t|uint8_t |Any|signed 16 to signed 8|int16_t|int8_t |Any|signed 16 to signed 16|int16_t|int16_t |Any|signed 16 to signed 32|int16_t|int32_t +|Any|signed 16 to unsigned 8|int16_t|uint8_t +|Any|signed 16 to unsigned 16|int16_t|uint16_t |Any|signed 32 to signed 8|int32_t|int8_t |Any|signed 32 to signed 16|int32_t|int16_t |Any|signed 32 to signed 32|int32_t|int32_t @@ -129,5 +141,6 @@ for_each(index in shape) { |Any|signed 48 to signed 16|int48_t|int16_t |Any|signed 48 to signed 32|int48_t|int32_t |Any|unsigned 8 to signed 8|uint8_t|int8_t -|Any|signed 8 to unsigned 8|int8_t|uint8_t +|Any|unsigned 8 to signed 16|uint8_t|int16_t +|Any|unsigned 16 to signed 16|uint16_t|int16_t |=== -- cgit v1.2.1