From 2dce0d022b5103f93bf4393e5cd9a3009fbf5add Mon Sep 17 00:00:00 2001
From: Eric Kunze <eric.kunze@arm.com>
Date: Tue, 12 Jan 2021 16:19:50 -0800
Subject: Remove aint8 type

The aint8 type represented an asymmetrically quantized range.
With the change to move scaling fully into the RESCALE operator,
the aint8 and int8 types didn't have significant differences in
their use with operators. Unifying to a single data type makes
things simpler.

Signed-off-by: Eric Kunze <eric.kunze@arm.com>
Change-Id: I3ee89cbdafdc61293ef1f1bec52398d744e716a1
---
 chapters/activation_funcs.adoc |  4 +-
 chapters/data_layout.adoc      | 16 ++++----
 chapters/data_nodes.adoc       | 10 ++---
 chapters/ewise_binary.adoc     |  8 ++--
 chapters/ewise_ternary.adoc    |  4 +-
 chapters/ewise_unary.adoc      | 10 ++---
 chapters/introduction.adoc     | 41 ++++++++++---------
 chapters/reduction.adoc        |  6 +--
 chapters/scatter_gather.adoc   |  6 +--
 chapters/tensor_ops.adoc       | 91 +++++++++++++++++++++---------------------
 chapters/type_conversion.adoc  | 22 +++++-----
 11 files changed, 112 insertions(+), 106 deletions(-)

diff --git a/chapters/activation_funcs.adoc b/chapters/activation_funcs.adoc
index 8743b13..3fc8bc0 100644
--- a/chapters/activation_funcs.adoc
+++ b/chapters/activation_funcs.adoc
@@ -1,7 +1,7 @@
 //
 // This confidential and proprietary software may be used only as
 // authorised by a licensing agreement from ARM Limited
-// (C) COPYRIGHT 2020 ARM Limited
+// (C) COPYRIGHT 2020-2021 ARM Limited
 // ALL RIGHTS RESERVED
 // The entire notice above must be reproduced on all authorised
 // copies and copies may only be made to the extent permitted
@@ -39,7 +39,7 @@ for_each (index in shape) {
 |===
 |Profile|Mode|in_t|out_t
 
-|Any|signed 8|aint8 |aint8
+|Any|signed 8|int8 |int8
 |Any|signed 16|int16|int16
 |MI, MT|float|float|float
 |===
diff --git a/chapters/data_layout.adoc b/chapters/data_layout.adoc
index e764be9..5d54e62 100644
--- a/chapters/data_layout.adoc
+++ b/chapters/data_layout.adoc
@@ -1,7 +1,7 @@
 //
 // This confidential and proprietary software may be used only as
 // authorised by a licensing agreement from ARM Limited
-// (C) COPYRIGHT 2020 ARM Limited
+// (C) COPYRIGHT 2020-2021 ARM Limited
 // ALL RIGHTS RESERVED
 // The entire notice above must be reproduced on all authorised
 // copies and copies may only be made to the extent permitted
@@ -43,7 +43,7 @@ for_each (index1 in shape) {
 |Profile|Mode|in_t
 
 |Any|Boolean|bool
-|Any|signed 8|int8/aint8
+|Any|signed 8|int8
 |Any|signed 16|int16
 |Any|signed 32|int32
 |MI, MT|float|float
@@ -91,7 +91,7 @@ for_each (index in shape) {
 |Profile|Mode|in_t
 
 |Any|Boolean|bool
-|Any|signed 8|int8/aint8
+|Any|signed 8|int8
 |Any|signed 16|int16
 |Any|signed 32|int32
 |MI, MT|float|float
@@ -127,7 +127,7 @@ for (i=0; i<tensor_size(shape); i++) {
 |Profile|Mode|in_t
 
 |Any|Boolean|bool
-|Any|signed 8|int8/aint8
+|Any|signed 8|int8
 |Any|signed 16|int16
 |Any|signed 32|int32
 |MI, MT|float|float
@@ -166,7 +166,7 @@ for_each (index in shape) {
 |Profile|Mode|in_t
 
 |Any|Boolean|bool
-|Any|signed 8|int8/aint8
+|Any|signed 8|int8
 |Any|signed 16|int16
 |Any|signed 32|int32
 |MI, MT|float|float
@@ -206,7 +206,7 @@ for_each (index in shape) {
 |Profile|Mode|in_t
 
 |Any|Boolean|bool
-|Any|signed 8|int8/aint8
+|Any|signed 8|int8
 |Any|signed 16|int16
 |Any|signed 32|int32
 |MI, MT|float|float
@@ -247,7 +247,7 @@ for_each (index in shape) {
 |Profile|Mode|in_t
 
 |Any|Boolean|bool
-|Any|signed 8|int8/aint8
+|Any|signed 8|int8
 |Any|signed 16|int16
 |Any|signed 32|int32
 |MI, MT|float|float
@@ -288,7 +288,7 @@ for_each (index in shape) {
 |Profile|Mode|in_t
 
 |Any|Boolean|bool
-|Any|signed 8|int8/aint8
+|Any|signed 8|int8
 |Any|signed 16|int16
 |Any|signed 32|int32
 |MI, MT|float|float
diff --git a/chapters/data_nodes.adoc b/chapters/data_nodes.adoc
index 6164a95..4297a9a 100644
--- a/chapters/data_nodes.adoc
+++ b/chapters/data_nodes.adoc
@@ -1,7 +1,7 @@
 //
 // This confidential and proprietary software may be used only as
 // authorised by a licensing agreement from ARM Limited
-// (C) COPYRIGHT 2020 ARM Limited
+// (C) COPYRIGHT 2020-2021 ARM Limited
 // ALL RIGHTS RESERVED
 // The entire notice above must be reproduced on all authorised
 // copies and copies may only be made to the extent permitted
@@ -28,7 +28,7 @@ A node containing constant data for use as the input to an operation. May hold d
 |Profile|Mode|out_t
 
 |Any|Boolean|bool
-|Any|signed 8|int8/aint8
+|Any|signed 8|int8
 |Any|signed 16|int16
 |Any|signed 32|int32
 |MI, MT|float|float
@@ -53,7 +53,7 @@ Returns a tensor with the same shape, type, and contents as the input.
 |Profile|Mode|in_t
 
 |Any|Boolean|bool
-|Any|signed 8|int8/aint8
+|Any|signed 8|int8
 |Any|signed 16|int16
 |Any|signed 32|int32
 |MI, MT|float|float
@@ -78,7 +78,7 @@ Returns a list of tensors with the same shape, type, and contents as the input l
 |Profile|Mode|in_t
 
 |Any|Boolean|bool
-|Any|signed 8|int8/aint8
+|Any|signed 8|int8
 |Any|signed 16|int16
 |Any|signed 32|int32
 |MI, MT|float|float
@@ -102,7 +102,7 @@ A node where data will be inserted into the network at runtime. Generally used f
 
 |Any|Boolean|bool
 |Any|unsigned 8|uint8
-|Any|signed 8|int8/aint8
+|Any|signed 8|int8
 |Any|signed 16|int16
 |Any|signed 32|int32
 |MI, MT|float|float
diff --git a/chapters/ewise_binary.adoc b/chapters/ewise_binary.adoc
index f3c9fbe..e9d76f8 100644
--- a/chapters/ewise_binary.adoc
+++ b/chapters/ewise_binary.adoc
@@ -1,7 +1,7 @@
 //
 // This confidential and proprietary software may be used only as
 // authorised by a licensing agreement from ARM Limited
-// (C) COPYRIGHT 2020 ARM Limited
+// (C) COPYRIGHT 2020-2021 ARM Limited
 // ALL RIGHTS RESERVED
 // The entire notice above must be reproduced on all authorised
 // copies and copies may only be made to the extent permitted
@@ -126,7 +126,7 @@ for_each (index in shape) {
 |===
 |Profile|Mode|in_t
 
-|Any|signed 8|aint8
+|Any|signed 8|int8
 |Any|signed 16|int16
 |Any|signed 32|int32
 |===
@@ -165,7 +165,7 @@ for_each (index in shape) {
 |===
 |Profile|Mode|in_t
 
-|Any|signed 8|aint8
+|Any|signed 8|int8
 |Any|signed 16|int16
 |Any|signed 32|int32
 |===
@@ -204,7 +204,7 @@ for_each (index in shape) {
 |===
 |Profile|Mode|in_t
 
-|Any|signed 8|aint8
+|Any|signed 8|int8
 |Any|signed 16|int16
 |Any|signed 32|int32
 |===
diff --git a/chapters/ewise_ternary.adoc b/chapters/ewise_ternary.adoc
index 8dc9172..17e966c 100644
--- a/chapters/ewise_ternary.adoc
+++ b/chapters/ewise_ternary.adoc
@@ -1,7 +1,7 @@
 //
 // This confidential and proprietary software may be used only as
 // authorised by a licensing agreement from ARM Limited
-// (C) COPYRIGHT 2020 ARM Limited
+// (C) COPYRIGHT 2020-2021 ARM Limited
 // ALL RIGHTS RESERVED
 // The entire notice above must be reproduced on all authorised
 // copies and copies may only be made to the extent permitted
@@ -54,7 +54,7 @@ for_each (index in shape) {
 |Profile|Mode|bool_t|in_t
 
 |Any|Boolean|bool|bool
-|Any|signed 8|bool|aint8/int8
+|Any|signed 8|bool|int8
 |Any|signed 16|bool|int16
 |Any|signed 32|bool|int32
 |MI, MT|float|bool|float
diff --git a/chapters/ewise_unary.adoc b/chapters/ewise_unary.adoc
index 8cdce3d..c9b0922 100644
--- a/chapters/ewise_unary.adoc
+++ b/chapters/ewise_unary.adoc
@@ -1,7 +1,7 @@
 //
 // This confidential and proprietary software may be used only as
 // authorised by a licensing agreement from ARM Limited
-// (C) COPYRIGHT 2020 ARM Limited
+// (C) COPYRIGHT 2020-2021 ARM Limited
 // ALL RIGHTS RESERVED
 // The entire notice above must be reproduced on all authorised
 // copies and copies may only be made to the extent permitted
@@ -76,7 +76,7 @@ for_each (index in shape) {
 |===
 |Profile|Mode|in_t
 
-|Any|signed 8|aint8
+|Any|signed 8|int8
 |Any|signed 16|int16
 |Any|signed 32|int32
 |===
@@ -259,8 +259,8 @@ Elementwise negation operation
 
 [source,c]
 ----
-assert(in_t == aint8_t || input_zp == 0) // Zero point only for asymmetric int8
-assert(in_t == aint8_t || output_zp == 0) // Zero point only for asymmetric int8
+assert(in_t == int8_t || input_zp == 0) // Zero point only for int8
+assert(in_t == int8_t || output_zp == 0) // Zero point only for int8
 for_each (index in shape) {
     in_t value1 = tensor_read<in_t>(input1, shape, index)
     in_t acc = appl_sub<in_t>(0, value1 - input1_zp)
@@ -274,7 +274,7 @@ for_each (index in shape) {
 |===
 |Profile|Mode|in_t
 
-|Any|signed 8|aint8
+|Any|signed 8|int8
 |Any|signed 16|int16
 |Any|signed 32|int32
 |MI, MT|float|float
diff --git a/chapters/introduction.adoc b/chapters/introduction.adoc
index da1c1b1..408faa4 100644
--- a/chapters/introduction.adoc
+++ b/chapters/introduction.adoc
@@ -1,7 +1,7 @@
 //
 // This confidential and proprietary software may be used only as
 // authorised by a licensing agreement from ARM Limited
-// (C) COPYRIGHT 2020 ARM Limited
+// (C) COPYRIGHT 2020-2021 ARM Limited
 // ALL RIGHTS RESERVED
 // The entire notice above must be reproduced on all authorised
 // copies and copies may only be made to the extent permitted
@@ -125,7 +125,11 @@ The base inference profile of TOSA requires support for the quantized integer op
 
 ==== Number formats
 
-The following number formats are defined in TOSA. See section 2.3 for details on quantization within TOSA. The number formats supported by an operator are listed in a per-operator table of supported types.
+The following number formats are defined in TOSA. See section 2.3 for details on
+quantization within TOSA. The number formats supported by an operator are listed
+in a per-operator table of supported types. The integer types may be used to
+represent quantized data. For details of interpreting the quantized data, see
+the <<Quantization Scaling>> section.
 
 .Number formats
 [cols="1,1,1,6"]
@@ -137,40 +141,36 @@ The following number formats are defined in TOSA. See section 2.3 for details on
 | -
 |Boolean value. Size implementation defined.
 
-|aint8
-| -128
-| +127
-|Asymmetric 8-bit quantized values. Operators using this data type will require a zero point value and a scale factor. See <<Quantization Scaling>> for details on quantization parameters and their use in operators.
-
 |int4
 | -7
 | +7
-|Signed 4-bit values. These values are symmetrically quantized, with values from -7, 7 as the range. These are quantized per-channel. No zero point is used, scale factor is provided as part of the operation.
+|Signed 4-bit values.
 
 |int8
 | -128
 | +127
-|Signed 8-bit twos-complement values. These values are quantized. Symmetric per-channel or per-tensor quantization. No zero point is used, scale factor is provided in the operation.
+|Signed 8-bit twos-complement values.
 
 |uint8
 | 0
 | 255
-|Unsigned 8-bit value quantized value with zero point. This data type is only used for input/output conversion by the RESCALE operator and not supported by other operators.
+|Unsigned 8-bit value. This data type is only used for input/output conversion by the
+RESCALE operator and not supported by other operators.
 
 |int16
 | -32768
-| +32768
-|Signed  16-bit twos-complement values. Symmetric per-tensor quantization. No zero point is used, scale factor is provided in the operation.
+| +32767
+|Signed  16-bit twos-complement values.
 
 |int32
-| (1<<31)-1
 | -(1<<31)
-|32-bit twos-complement value. No scale factor used.
+| (1<<31)-1
+|32-bit twos-complement value.
 
 |int48
-| (1<<47)-1
 | -(1<<47)
-|48-bit twos-complement value. No scale factor used.
+| (1<<47)-1
+|48-bit twos-complement value.
 
 |float
 | -infinity
@@ -196,7 +196,7 @@ The following pseudocode represents the operations that will happen to data elem
 If in_t is 8-bit then out_t=int16_t. Otherwise out_t is set to the same as in_t.
 ....
 out_t tensor_read<in_t>(in_t *address, dim_t shape, dim_t index, in_t zero_point=0, dim_t pad=NULL) {
-  assert(in_t == aint8_t || zero_point==0)
+  assert(in_t == int8_t || zero_point == 0)
   unsigned offset = 0;
   for (i = 0; i < rank(shape); i++)
     if (index[i] < 0) { assert(pad && pad[2 * i] + index[i] >= 0); return 0; }
@@ -255,7 +255,12 @@ To remove this dependency on floating point values, there are two design choices
 * Quantization parameters will be given in integer values, as multiplicands and shifts. Specific bit widths and signed/unsignedness will be provided with each operator.
 
 When compiling a network to TOSA, we expect that a compiler would lower all possible subgraphs to TOSA, keeping the quantization parameters with the tensors, and then do an additional pass where the quantization values for the operators are calculated based on the input and output tensors for the operation.
-TOSA currently supports signed 8-bit quantization, unsigned 8-bit quantization, and signed 16-bit quantization. Signed 8-bit values can be used with a zero point, where they are noted as aint8, or without a zero point, where they are noted as int8. TOSA also supports 32-bit computations, however the inputs to these 32-bit operations are not scaled, and input quantization parameters are ignored. Quantization parameters may be provided for 32-bit operations, as the output may be scaled down to 8 or 16 bits.
+
+TOSA currently supports signed 8-bit quantization, unsigned 8-bit quantization, and
+signed 16-bit quantization. 8-bit values support an optional zero point, denoting
+which value in the 8-bit range represents the value zero. Unsigned 8-bit values
+are only allowed in the RESCALE operation, to allow for compatibility with
+networks which expect unsigned 8-bit input tensors.
 
 ==== Quantization Scaling
 
diff --git a/chapters/reduction.adoc b/chapters/reduction.adoc
index 9aaf9a1..21cdec5 100644
--- a/chapters/reduction.adoc
+++ b/chapters/reduction.adoc
@@ -1,7 +1,7 @@
 //
 // This confidential and proprietary software may be used only as
 // authorised by a licensing agreement from ARM Limited
-// (C) COPYRIGHT 2020 ARM Limited
+// (C) COPYRIGHT 2020-2021 ARM Limited
 // ALL RIGHTS RESERVED
 // The entire notice above must be reproduced on all authorised
 // copies and copies may only be made to the extent permitted
@@ -129,7 +129,7 @@ for_each (index in in_shape) {
 |===
 |Profile|Mode|in_t|out_t
 
-|Any|signed 8|aint8|aint8
+|Any|signed 8|int8|int8
 |Any|signed 16|int16|int16
 |Any|signed 32|int32|int32
 |MI, MT|float|float|float
@@ -176,7 +176,7 @@ for_each (index in in_shape) {
 |===
 |Profile|Mode|in_t|out_t
 
-|Any|signed 8|aint8|aint8
+|Any|signed 8|int8|int8
 |Any|signed 16|int16|int16
 |Any|signed 32|int32|int32
 |MI, MT|float|float|float
diff --git a/chapters/scatter_gather.adoc b/chapters/scatter_gather.adoc
index 65c0fe3..a026335 100644
--- a/chapters/scatter_gather.adoc
+++ b/chapters/scatter_gather.adoc
@@ -1,7 +1,7 @@
 //
 // This confidential and proprietary software may be used only as
 // authorised by a licensing agreement from ARM Limited
-// (C) COPYRIGHT 2020 ARM Limited
+// (C) COPYRIGHT 2020-2021 ARM Limited
 // ALL RIGHTS RESERVED
 // The entire notice above must be reproduced on all authorised
 // copies and copies may only be made to the extent permitted
@@ -45,7 +45,7 @@ for_each(0<=n<N, 0<=w<W, 0<=c<C) {
 |===
 |Profile|Mode|index_t|value_t
 
-|Any|signed 8|int32|aint8
+|Any|signed 8|int32|int8
 |Any|signed 16|int32|int16
 |Any|signed 32|int32|int32
 |MI,MT|float|int32|float
@@ -106,7 +106,7 @@ for_each(0<=n<N, 0<=w<W, 0<=c<C) {
 |===
 |Profile|Mode|index_t|value_t
 
-|Any|signed 8|int32|aint8
+|Any|signed 8|int32|int8
 |Any|signed 16|int32|int16
 |Any|signed 32|int32|int32
 |MI,MT|float|int32|float
diff --git a/chapters/tensor_ops.adoc b/chapters/tensor_ops.adoc
index 7d84ae6..76f39ca 100644
--- a/chapters/tensor_ops.adoc
+++ b/chapters/tensor_ops.adoc
@@ -1,7 +1,7 @@
 //
 // This confidential and proprietary software may be used only as
 // authorised by a licensing agreement from ARM Limited
-// (C) COPYRIGHT 2020 ARM Limited
+// (C) COPYRIGHT 2020-2021 ARM Limited
 // ALL RIGHTS RESERVED
 // The entire notice above must be reproduced on all authorised
 // copies and copies may only be made to the extent permitted
@@ -53,7 +53,7 @@ for_each ( left_index in left_shape, right_index in right_shape )
 |===
 |Profile|Mode|in_t|out_t
 
-|Any|signed 8|aint8|int32
+|Any|signed 8|int8|int32
 |Any|signed 16|int16|int32
 |MI, MT|float|float|int32
 |===
@@ -87,15 +87,15 @@ This performs an average pooling over the given input tensor. A sliding window o
 
 [source,c]
 ----
-assert(in_t == aint8_t || input_zp == 0) // Zero point only for asymmetric int8
-assert(out_t == aint8_t || output_zp == 0) // Zero point only for asymmetric int8
+assert(in_t == int8_t || input_zp == 0) // Zero point only for int8
+assert(out_t == int8_t || output_zp == 0) // Zero point only for int8
 pad=concat([0,0],pad,[0,0])
-for_each ( 0<=n<N, 0<=oy<H, 0<=ox<W, 0<=c<C ) {
+for_each ( 0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) {
     acc_t acc = 0;
     int count = 0;
     iy = oy * stride_y - pad_top
     ix = ox * stride_x - pad_left
-    for_each ( 0<=ky<kernel_y, 0<=kx<kernel_x) {
+    for_each ( 0 <= ky < kernel_y, 0 <= kx < kernel_x) {
         y = iy + ky
         x = ix + kx
         in_t value  = tensor_read<in_t>(input, [N,IH,IW,IC], [n,y,x,c], input_zp, pad)
@@ -117,7 +117,7 @@ for_each ( 0<=n<N, 0<=oy<H, 0<=ox<W, 0<=c<C ) {
 |===
 |Profile|Mode|in_t|acc_t|out_t
 
-|Any|signed 8|aint8|int32_t|aint8
+|Any|signed 8|int8|int32_t|int8
 |Any|signed 16|int16|int32_t|int16
 |MI, MT|float|float|float|float
 |===
@@ -153,14 +153,14 @@ Performs a 2D convolution over the given tensor input, using the weight tensor.
 
 [source,c]
 ----
-assert(in_t == aint8_t || input_zp == 0) // Zero point only for asymmetric int8
-assert(weight_t == aint8_t || weight_zp == 0)
-pad=concat([0,0],pad,[0,0])
-for_each (0<=n<N, 0<=oy<H, 0<=ox<W; 0<=oc<OC) {
+assert(in_t == int8_t || input_zp == 0) // Zero point only for int8
+assert(weight_t == int8_t || weight_zp == 0)
+pad=concat([0,0], pad, [0,0])
+for_each (0 <= n < N, 0 <= oy < H, 0 <= ox < W; 0 <= oc < OC) {
     acc_t acc = 0
     iy = oy * stride_y - pad_top
     ix = ox * stride_x - pad_left
-    for_each (0<=ky<KH, 0<=kx<KW, 0<=ic<IC) {
+    for_each (0 <= ky < KH, 0 <= kx < KW, 0 <= ic < IC) {
         y = iy + ky * dilation_y
         x = ix + kx * dilation_x
         in_t value  = tensor_read<in_t>(input, [N,IH,IW,IC], [n,y,x,ic], input_zp, pad)
@@ -177,8 +177,8 @@ for_each (0<=n<N, 0<=oy<H, 0<=ox<W; 0<=oc<OC) {
 |===
 |Profile|Mode|in_t|weight_t|acc_t
 
-|Any|signed 8x8|aint8|int8,aint8|int32
-|Any|signed 8x4|aint8|int4|int32
+|Any|signed 8x8|int8|int8|int32
+|Any|signed 8x4|int8|int4|int32
 |Any|signed 16x8|int16|int8|int48
 |MI, MT|float|float|float|float
 |===
@@ -214,15 +214,15 @@ Performs a 3D convolution over the given input tensor.
 
 [source,c]
 ----
-assert(in_t == aint8_t || input_zp == 0) // Zero point only for asymmetric int8
-assert(weight_t == aint8_t || weight_zp == 0)
-pad=concat([0,0],pad,[0,0])
-for_each (0<=n<N, 0<=od<D, 0<=oy<H, 0<=ox<W; 0<=oc<OC) {
+assert(in_t == int8_t || input_zp == 0) // Zero point only for int8
+assert(weight_t == int8_t || weight_zp == 0)
+pad=concat([0,0], pad, [0,0])
+for_each (0 <= n < N, 0 <= od < D, 0 <= oy < H, 0 <= ox < W; 0 <= oc < OC) {
     acc_t acc = 0
     id = od * stride_d - pad_d0
     iy = oy * stride_y - pad_top
     ix = ox * stride_x - pad_left
-    for_each (0<=kd<KD, 0<=ky<KH, 0<=kx<KW, 0<=ic<IC) {
+    for_each (0 <= kd < KD, 0 <= ky < KH, 0 <= kx < KW, 0 <= ic < IC) {
         d = id + kd * dilation_d
         y = iy + ky * dilation_y
         x = ix + kx * dilation_x
@@ -240,8 +240,8 @@ for_each (0<=n<N, 0<=od<D, 0<=oy<H, 0<=ox<W; 0<=oc<OC) {
 |===
 |Profile|Mode|in_t|weight_t|acc_t
 
-|Any|signed 8x8|aint8|int8,aint8|int32
-|Any|signed 8x4|aint8|int4|int32
+|Any|signed 8x8|int8|int8|int32
+|Any|signed 8x4|int8|int4|int32
 |Any|signed 16x8|int16|int8|int48
 |MI, MT|float|float|float|float
 |===
@@ -278,14 +278,14 @@ Performs 2D convolutions separately over each channel of the given tensor input,
 
 [source,c]
 ----
-assert(in_t==aint8_t || input_zp==0) // Zero point only for asymmetric int8
-assert(weight_t==aint8_t || weight_zp==0)
-pad=concat([0,0],pad,[0,0])
+assert(in_t == int8_t || input_zp == 0) // Zero point only for int8
+assert(weight_t == int8_t || weight_zp == 0)
+pad=concat([0,0], pad, [0,0])
 for_each (0 <= n<N, 0 <= oy < H, 0 <= ox < W; 0 <= c < (C * M), 0 <= m < M) {
     acc_t acc = 0
     iy = oy * stride_y - pad_top
     ix = ox * stride_x - pad_left
-    for_each (0<=ky<KH, 0<=kx<KW) {
+    for_each (0 <= ky < KH, 0 <= kx < KW) {
         y = iy + ky * dilation_y
         x = ix + kx * dilation_x
         in_t value  = tensor_read<in_t>(input, [N,H,W,C], [n,y,x,c], input_zp, pad)
@@ -302,8 +302,8 @@ for_each (0 <= n<N, 0 <= oy < H, 0 <= ox < W; 0 <= c < (C * M), 0 <= m < M) {
 |===
 |Profile|Mode|in_t|weight_t|acc_t
 
-|Any|signed 8x8|aint8|int8,aint8|int32
-|Any|signed 8x4|aint8|int4|int32
+|Any|signed 8x8|int8|int8|int32
+|Any|signed 8x4|int8|int4|int32
 |Any|signed 16x8|int16|int8|int48
 |MI, MT|float|float|float|float
 |===
@@ -336,11 +336,11 @@ Performs a fully connected network.
 
 [source,c]
 ----
-assert(in_t == aint8_t || input_zp == 0) // Zero point only for asymmetric int8
-assert(weight_t == aint8_t || weight_zp == 0)
-for_each (0<=n<N, 0<=oc<OC) {
+assert(in_t == int8_t || input_zp == 0) // Zero point only for int8
+assert(weight_t == int8_t || weight_zp == 0)
+for_each (0 <= n < N, 0 <= oc < OC) {
     acc_t acc = 0
-    for_each (0<=ic<IC) {
+    for_each (0 <= ic < IC) {
         in_t value  = tensor_read<in_t>(input, [N,IC], [n,ic], input_zp)
         weight_t weight = tensor_read<weight_t>(weight, [OC,IC], [oc,ic], weight_zp)
         acc = apply_add<acc_t>(acc, value * weight)
@@ -355,8 +355,8 @@ for_each (0<=n<N, 0<=oc<OC) {
 |===
 |Profile|Mode|in_t|weight_t|acc_t
 
-|Any|signed 8x8|aint8|int8,aint8|int32
-|Any|signed 8x4|aint8|int4|int32
+|Any|signed 8x8|int8|int8|int32
+|Any|signed 8x4|int8|int4|int32
 |Any|signed 16x8 |int16|int8|int48
 |MI, MT|float|float|float|float
 |===
@@ -387,10 +387,10 @@ Performs a two dimensional matrix multiplication. This allows both inputs to be
 
 [source,c]
 ----
-assert(in_t==aint8_t || (A_zp==0 && B_zp==0)) // Zero point only for asymmetric int8
-for_each (0<=m<M, 0<=n<N) {
+assert(in_t == int8_t || (A_zp == 0 && B_zp == 0)) // Zero point only for int8
+for_each (0 <= m < M, 0 <= n < N) {
     acc_t acc = 0
-    for_each (0<=k<K) {
+    for_each (0 <= k < K) {
         in_t value1 = tensor_read<in_t>(A, [M,K], [m,k], A_zp)
         in_t value2 = tensor_read<in_t>(B, [K,N], [k,n], B_zp)
         acc = apply_add<acc_t>(acc, value1 * value2)
@@ -404,7 +404,7 @@ for_each (0<=m<M, 0<=n<N) {
 |===
 |Profile|Mode|in_t|acc_t
 
-|Any|signed 8x8|aint8|int32
+|Any|signed 8x8|int8|int32
 |Any|signed 16x16|int16|int48
 |MI, MT|float|float|float
 |===
@@ -432,8 +432,8 @@ None
 
 [source,c]
 ----
-pad=concat([0,0],pad,[0,0])
-for_each ( 0<=n<N, 0<=oy<H, 0<=ox<W, 0<=c<C ) {
+pad=concat([0,0], pad, [0,0])
+for_each (0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) {
     in_t acc = minimum_value<in_t>;
     iy = oy * stride_y - pad_top
     ix = ox * stride_x - pad_left
@@ -452,7 +452,7 @@ for_each ( 0<=n<N, 0<=oy<H, 0<=ox<W, 0<=c<C ) {
 |===
 |Profile|Mode|in_t|out_t
 
-|Any|signed 8|aint8|aint8
+|Any|signed 8|int8|int8
 |Any|16-bit|int16|int16
 |MI, MT|float|float|float
 |===
@@ -488,12 +488,13 @@ Performs a 2D transposed convolution over the given tensor input, using the weig
 
 [source,c]
 ----
-assert(in_t==aint8_t  || input_zp==0) // Zero point only for asymmetric int8
-assert(weight_t == aint8_t || weight_zp == 0)
+assert(in_t == int8_t  || input_zp == 0) // Zero point only allowed for int8
+assert(weight_t == int8_t || weight_zp == 0)
 for_each (index in out_shape) {
     tensor_write<acc_t>(output, [N,OH,OW,OC], index, bias[index[3]])
 }
-for_each (0<=n<N, 0<=iy<IH, 0<=ix<IW, 0<=oc<OC, 0<=ic<IC, 0<=ky<KH,  0<=kx<KW) {
+for_each (0 <= n < N, 0 <= iy < IH, 0 <= ix < IW, 0 <= oc < OC,
+          0 <= ic < IC, 0 <= ky < KH,  0 <= kx < KW) {
     oy = iy * stride_y - out_pad_top  + ky
     ox = ix * stride_x - out_pad_left + kx
     if (oy>=0 && oy<OH && ox>=0 && ox<OW) {
@@ -511,8 +512,8 @@ for_each (0<=n<N, 0<=iy<IH, 0<=ix<IW, 0<=oc<OC, 0<=ic<IC, 0<=ky<KH,  0<=kx<KW) {
 |===
 |Profile|Mode|in_t|weight_t|acc_t
 
-|Any|signed 8x8|aint8|int8,aint8|int32
-|Any|signed 8x4|aint8|int4|int32
+|Any|signed 8x8|int8|int8|int32
+|Any|signed 8x4|int8|int4|int32
 |Any|signed 16x8|int16|int8|int48
 |MI, MT|float|float|float|float
 |===
diff --git a/chapters/type_conversion.adoc b/chapters/type_conversion.adoc
index 2689818..37630e6 100644
--- a/chapters/type_conversion.adoc
+++ b/chapters/type_conversion.adoc
@@ -1,7 +1,7 @@
 //
 // This confidential and proprietary software may be used only as
 // authorised by a licensing agreement from ARM Limited
-// (C) COPYRIGHT 2020 ARM Limited
+// (C) COPYRIGHT 2020-2021 ARM Limited
 // ALL RIGHTS RESERVED
 // The entire notice above must be reproduced on all authorised
 // copies and copies may only be made to the extent permitted
@@ -103,8 +103,8 @@ Rescale quantized values into a new domain. This function scales by factor: mult
 [source,c]
 ....
 for_each (index in shape) {
-    assert(in_t==aint8  || in_t==uint8  || input_zp==0);
-    assert(out_t==aint8 || out_t==uint8 || output_zp==0);
+    assert(in_t == int8  || in_t == uint8  || input_zp == 0);
+    assert(out_t == int8 || out_t == uint8 || output_zp == 0);
     assert((scale32 && in_t!=int48_t) || (!scale32 && !double_round));
     int48_t value = tensor_read<in_t>(input, shape, index, input_zp);
     int c = (per_channel) ? index[dims-1] : 0;
@@ -121,18 +121,18 @@ for_each (index in shape) {
 |===
 |Profile|Mode|in_t|out_t
 
-|Any|signed 8 to signed 8|aint8|aint8
-|Any|signed 8 to signed 16|aint8|int16
-|Any|signed 8 to signed 32|aint8|int32
-|Any|signed 16 to signed 8|int16|aint8
+|Any|signed 8 to signed 8|int8|int8
+|Any|signed 8 to signed 16|int8|int16
+|Any|signed 8 to signed 32|int8|int32
+|Any|signed 16 to signed 8|int16|int8
 |Any|signed 16 to signed 16|int16|int16
 |Any|signed 16 to signed 32|int16|int32
-|Any|signed 32 to signed 8|int32|aint8
+|Any|signed 32 to signed 8|int32|int8
 |Any|signed 32 to signed 16|int32|int16
 |Any|signed 32 to signed 32|int32|int32
-|Any|signed 48 to signed 8|int48|aint8
+|Any|signed 48 to signed 8|int48|int8
 |Any|signed 48 to signed 16|int48|int16
 |Any|signed 48 to signed 32|int48|int32
-|Any|unsigned 8 to signed 8|uint8|aint8
-|Any|signed 8 to unsigned 8|aint8|uint8
+|Any|unsigned 8 to signed 8|uint8|int8
+|Any|signed 8 to unsigned 8|int8|uint8
 |===
-- 
cgit v1.2.1