aboutsummaryrefslogtreecommitdiff
path: root/chapters/introduction.adoc
diff options
context:
space:
mode:
Diffstat (limited to 'chapters/introduction.adoc')
-rw-r--r--chapters/introduction.adoc141
1 files changed, 78 insertions, 63 deletions
diff --git a/chapters/introduction.adoc b/chapters/introduction.adoc
index 7039e27..d410121 100644
--- a/chapters/introduction.adoc
+++ b/chapters/introduction.adoc
@@ -194,7 +194,9 @@ The following pseudocode represents the operations that will happen to data elem
If in_t is 8-bit then out_t=int16_t. Otherwise out_t is set to the same as in_t.
If padding is specified, the size of the padding array should be 2 times the size of the shape.
The padding array represents the before and after pair for each dimension.
-....
+
+[source,c++]
+----
assert((pad == NULL) || size(pad) == 2 * size(shape));
out_t tensor_read<in_t>(in_t *address, dim_t shape, dim_t index, in_t zero_point=0, dim_t pad=NULL) {
assert(in_t == int8_t || zero_point == 0)
@@ -212,11 +214,12 @@ out_t tensor_read<in_t>(in_t *address, dim_t shape, dim_t index, in_t zero_point
}
return address[offset] - zero_point;
}
-....
+----
*Functionality of tensor write*
-....
+[source,c++]
+----
tensor_write<type>(<type> *address, dim_t shape, dim_t index, <type> value) {
unsigned offset = 0;
for (i = 0; i < rank(shape); i++) {
@@ -225,7 +228,7 @@ tensor_write<type>(<type> *address, dim_t shape, dim_t index, <type> value) {
}
address[offset] = value;
}
-....
+----
==== Broadcasting
@@ -236,18 +239,19 @@ to be the same. A RESHAPE can be done to create a compatible tensor with appropr
The following function maps an index in the output tensor to an index in the input tensor.
-....
+[source,c++]
+----
dim_t apply_broadcast(dim_t out_shape, dim_t in_shape, dim_t index) {
- assert(rank(out_shape) == rank(in_shape));
- for (i = 0; i < rank(out_shape); i++) {
- if (out_shape[i] != in_shape[i]) {
- assert(in_shape[i] == 1);
- index[i] = 0;
+ assert(rank(out_shape) == rank(in_shape));
+ for (i = 0; i < rank(out_shape); i++) {
+ if (out_shape[i] != in_shape[i]) {
+ assert(in_shape[i] == 1);
+ index[i] = 0;
+ }
}
- }
- return index;
+ return index;
}
-....
+----
=== Quantization
@@ -278,54 +282,57 @@ Most operations in TOSA do not contain quantization scaling in the operation, bu
The apply_scale functions provide a scaling of approximately (multiplier * 2^-shift^). The shift range is limited to allow a variety of implementations. The upper limit of 62 allows it to be decomposed as two right shifts of 31. The lower limit removes special cases in the rounding. These restrictions have little practical impact since the shift value to achieve a scaling of 1.0 is 30 for apply_scale_32 with multiplier=1<<30 and 14 for apply_scale_16 with scale=1<<14. It follows that a scaling range of 2^+12^ down to 2^-32^ is supported for both functions with normalized multiplier. (Smaller scales can be obtained by denormalizing the multiplier).
-....
+[source,c++]
+----
int32_t apply_scale_32(int32_t value, int32_t multipler, uint6_t shift, bool_t double_round=false) {
- assert(multiplier >= 0);
- assert(2 <= shift && shift <= 62);
- int64_t round = 1 << (shift - 1);
- if (double_round) {
- if (shift > 31 && value >= 0) round += 1<<30;
- if (shift > 31 && value < 0) round -= 1<<30;
- }
- int64_t result = (int64_t)value * multiplier + round;
- result = result >> shift;
- assert(result >= minimum<int32_t> && result <= maximum<int32_t>);
- return (int32_t)result;
+ assert(multiplier >= 0);
+ assert(2 <= shift && shift <= 62);
+ int64_t round = 1 << (shift - 1);
+ if (double_round) {
+ if (shift > 31 && value >= 0) round += 1<<30;
+ if (shift > 31 && value < 0) round -= 1<<30;
+ }
+ int64_t result = (int64_t)value * multiplier + round;
+ result = result >> shift;
+ assert(result >= minimum<int32_t> && result <= maximum<int32_t>);
+ return (int32_t)result;
}
int32_t apply_scale_16(int48_t value, int16_t multipler, uint6_t shift) {
- assert(multiplier >= 0);
- assert(2 <= shift && shift <= 62);
- int64_t round = (1 << (shift - 1));
- int64_t result = (int64_t)value * multiplier + round;
- result = result >> shift;
- assert(result >= minimum<int32_t> && result <= maximum<int32_t>);
- return (int32_t)result;
+ assert(multiplier >= 0);
+ assert(2 <= shift && shift <= 62);
+ int64_t round = (1 << (shift - 1));
+ int64_t result = (int64_t)value * multiplier + round;
+ result = result >> shift;
+ assert(result >= minimum<int32_t> && result <= maximum<int32_t>);
+ return (int32_t)result;
}
-....
+----
In some functions, the multiplier and shift are combined into a scale_t structure:
-....
+[source,c++]
+----
typedef struct {
- int32_t multiplier;
- uint6_t shift;
+ int32_t multiplier;
+ uint6_t shift;
} scale_t;
-....
+----
In places where a divide is required, we also use the function below to calculate an appropriate scaling value.
-....
+[source,c++]
+----
scale_t reciprocal_scale(uint32_t value) {
- assert(value > 0);
- scale_t scale;
- int k = 32 - count_leading_zeros(value - 1); // (1 << k) / 2 < value <= (1 << k)
- int64_t numerator = ((1 << 30) + 1) << k;
- scale.multiplier = numerator / value; // (1 << 30) <= multiplier < (1 << 31)
- scale.shift = 30 + k;
- return scale;
+ assert(value > 0);
+ scale_t scale;
+ int k = 32 - count_leading_zeros(value - 1); // (1 << k) / 2 < value <= (1 << k)
+ int64_t numerator = ((1 << 30) + 1) << k;
+ scale.multiplier = numerator / value; // (1 << 30) <= multiplier < (1 << 31)
+ scale.shift = 30 + k;
+ return scale;
}
-....
+----
==== Quantized Convolutions
@@ -349,7 +356,8 @@ General unary functions such as sigmoid(), tanh(), exp() for integer inputs are
This also allows for other operations with the addition of user-supplied tables (the TABLE operation).
All table lookups are based on the following reference lookup function that takes as input a table of 513 entries of 16 bits each.
-....
+[source,c++]
+----
int32_t apply_lookup(int16_t *table, int32_t value)
{
int16_t clipped_value = (int16_t)apply_clip<int32_t>(value, -32768, +32767);
@@ -360,12 +368,13 @@ int32_t apply_lookup(int16_t *table, int32_t value)
int32_t return_value = (base << 7) + (next - base) * fraction;
return return_value; // return interpolated value of 16 + 7 = 23 bits
}
-....
+----
Note that although the table lookup defined here has 16-bit precision, for 8-bit only operations an 8-bit table can be derived by applying the reference function to each of the possible 256 input values.
The following code constructs a 513-entry table based on a reference function.
-....
+[source,c++]
+----
void generate_lookup_table(int16_t *table, int32_t (*reference)(int32_t))
{
for (int i = -256; i <= 256; i++) {
@@ -373,7 +382,7 @@ void generate_lookup_table(int16_t *table, int32_t (*reference)(int32_t))
table[i + 256] = (int16_t)apply_clip<int32_t>(value, -32768, +32767)
}
}
-....
+----
=== Floating-point
@@ -416,7 +425,8 @@ This section contains general pseudocode utility functions used throughout the s
The following functions provide basic arithmetic with asserts that values stay in the valid range supported by TOSA.
-....
+[source,c++]
+----
acc_t apply_add<acc_t>(acc_t a, acc_t b) {
if (acc_t == float_t) return a + b;
int64_t c = (int64_t)a + (int64_t)b;
@@ -430,12 +440,13 @@ acc_t apply_sub<acc_t>(acc_t a, acc_t b) {
assert(c >= minimum<acc_t> && c <= maximum<acc_t>);
return (acc_t)c;
}
-....
+----
The following functions are used in the pseudocode to take maximum,
minimum, clip values to a range, or count leading zeros.
[[count_leading_zeros]]
-....
+[source,c++]
+----
<type> apply_max<type>(<type> a, <type> b) {
if (a >= b) return a; else return b;
}
@@ -464,10 +475,12 @@ int32_t count_leading_zeros(int32_t a) {
}
return acc;
}
-....
+----
The following definitions are used in pseudocode to do numeric conversions.
-....
+
+[source,c++]
+----
int round_to_nearest_int(float_t f)
Converts the floating-point value to f, with rounding to the nearest integer value.
@@ -482,16 +495,18 @@ out_t sign_extend(in_t input)
out_t truncate(in_t input)
output is the sizeof(out_t) least significant bits in input.
-....
+----
The following definition is used to flatten a list of lists into a single list
-....
+
+[source,c++]
+----
in_t* flatten(in_t lists[]) {
- in_t output = [];
- for_each(list in lists) {
- for_each(element in list) {
- output.append(element);
+ in_t output = [];
+ for_each(list in lists) {
+ for_each(element in list) {
+ output.append(element);
+ }
}
- }
}
-....
+----