aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Kunze <eric.kunze@arm.com>2020-10-01 18:50:46 -0700
committerEric Kunze <eric.kunze@arm.com>2020-10-01 18:50:46 -0700
commit3309a5362a13f840e84a2f67b9ba7141aae58cc4 (patch)
tree74bde543bc11bf43208fa4b3789e2def23919890
parentc92cb063a2ebfad2fd9ffd4c4a73184327b864ed (diff)
downloadspecification-3309a5362a13f840e84a2f67b9ba7141aae58cc4.tar.gz
TOSA spec 0.20.0, initial public versionv0.20.0
Change-Id: I7c528ebf7e78759ada4bb951b2471c367d3f4525
-rw-r--r--Makefile43
-rw-r--r--chapters/activation_funcs.adoc156
-rw-r--r--chapters/comparison.adoc119
-rw-r--r--chapters/control_flow.adoc48
-rw-r--r--chapters/custom.adoc20
-rw-r--r--chapters/data_layout.adoc296
-rw-r--r--chapters/data_nodes.adoc108
-rw-r--r--chapters/ewise_binary.adoc611
-rw-r--r--chapters/ewise_ternary.adoc60
-rw-r--r--chapters/ewise_unary.adoc332
-rw-r--r--chapters/image.adoc86
-rw-r--r--chapters/introduction.adoc378
-rw-r--r--chapters/notice.adoc26
-rw-r--r--chapters/operators.adoc43
-rw-r--r--chapters/reduction.adoc303
-rw-r--r--chapters/scatter_gather.adoc40
-rw-r--r--chapters/tensor_ops.adoc519
-rw-r--r--chapters/type_conversion.adoc138
-rw-r--r--tosa.css407
-rw-r--r--tosa_spec.adoc21
20 files changed, 3754 insertions, 0 deletions
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..6718528
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,43 @@
+#
+# This confidential and proprietary software may be used only as
+# authorised by a licensing agreement from ARM Limited
+# (C) COPYRIGHT 2020 ARM Limited
+# ALL RIGHTS RESERVED
+# The entire notice above must be reproduced on all authorised
+# copies and copies may only be made to the extent permitted
+# by a licensing agreement from ARM Limited.
+#
+
+TOSAREVISION=0.20.0
+MKDIR=mkdir -p
+ASCIIDOC=asciidoctor
+
+HTMLDIR=out/html
+PDFDIR=out/pdf
+
+COMMON_ARGS= -a revnumber="$(TOSAREVISION)"
+
+SPECSRC := tosa_spec.adoc
+SPECFILES = $(wildcard chapters/[A-Za-z]*.adoc) tosa.css
+
+.DELETE_ON_ERROR:
+
+.PHONY: all html pdf clean
+
+all: html pdf
+
+html: $(HTMLDIR)/tosa_spec.html
+
+pdf: $(PDFDIR)/tosa_spec.pdf
+
+clean:
+ $(RM) $(HTMLDIR)/tosa_spec.html
+ $(RM) $(PDFDIR)/tosa_spec.pdf
+
+$(HTMLDIR)/tosa_spec.html: $(SPECSRC) $(SPECFILES)
+ $(MKDIR) $(HTMLDIR)
+ $(ASCIIDOC) -b html5 -a stylesheet=tosa.css $(COMMON_ARGS) -o $@ $<
+
+$(PDFDIR)/tosa_spec.pdf: $(SPECSRC) $(SPECFILES)
+ $(MKDIR) $(PDFDIR)
+ $(ASCIIDOC) -r asciidoctor-pdf -b pdf $(COMMON_ARGS) -o $@ $(SPECSRC)
diff --git a/chapters/activation_funcs.adoc b/chapters/activation_funcs.adoc
new file mode 100644
index 0000000..65266b3
--- /dev/null
+++ b/chapters/activation_funcs.adoc
@@ -0,0 +1,156 @@
+//
+// This confidential and proprietary software may be used only as
+// authorised by a licensing agreement from ARM Limited
+// (C) COPYRIGHT 2020 ARM Limited
+// ALL RIGHTS RESERVED
+// The entire notice above must be reproduced on all authorised
+// copies and copies may only be made to the extent permitted
+// by a licensing agreement from ARM Limited.
+
+
+=== Activation Functions
+
+==== CLAMP
+Clamp to an arbitrary minimum and maximum value. Note that the maximum and minimum values are specified as signed quantized values, no scaling happens before or after this operation.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|Input|shape|Input tensor from 1 to 4 dims
+|Attribute|in_t|min_val|-|minimum clip value
+|Attribute|in_t|max_val|-|maximum clip value
+|Output|out_t*|Output|shape|Output tensor of same type and shape as input
+|===
+
+*Operation Function:*
+....
+assert(dimensions(shape)<=4)
+for_each (index in shape) {
+ value = tensor_read<in_t>(input, shape, index)
+ acc = apply_clip(value, min_val, max_val)
+ tensor_write<out_t>(output, shape, index, acc)
+}
+....
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t|out_t
+
+|Any|signed 8|aint8 |aint8
+|Any|signed 16|int16|int16
+|MI, MT|float|float|float
+|===
+
+==== RELUN
+
+ReLU with a scalar maximum value.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|Input|shape|Input tensor
+|Attribute|in_t|max_val|-|maximum clip value
+|Output|out_t*|Output|shape|Output tensor of same type and shape as input
+|===
+
+*Operation Function:*
+
+[source,c]
+----
+for_each (index in shape) {
+ in_t value = tensor_read<in_t>(input, shape, index)
+ acc = apply_clip<in_t>(value, 0, max_val)
+ tensor_write<in_t>(output, shape, index, acc)
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t
+
+|Any|signed 32|int32
+|MI, MT|float|float
+|===
+
+==== SIGMOID
+
+Sigmoid function: output = 1 / (1 + exp(-input))
+
+For quantized integer data types, the TABLE operator should be used instead with
+the following definition.
+
+The sigmoid table has 513 entries each of 16-bit precision and covering the input range -16.0 to +16.0 in steps of 1/16.
+
+[source,c]
+....
+int sigmoid_reference(int x) {|// input x range is -256 to + 256 inclusive
+ F64 v = (double)x/(double)16;
+ v = 1.0/(1.0+exp(-v));
+ return round_to_nearest(32768.0 * v);
+}
+
+generate_lookup_table(&sigmoid_table, &sigmoid_reference);
+....
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|Input|shape|Input tensor from 1 to 4 dims
+|Output|out_t*|Output|shape|Output tensor of same type and shape as input
+|===
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t|out_t
+
+|MI, MT|float|float|float
+|===
+
+==== TANH
+
+Parameterized hyperbolic tangent.
+
+For quantized integer data types, the TABLE operator should be used instead with
+the following definition.
+
+The tanh_table has 513 entries each of 16-bit precision and covering the input range -8.0 to +8.0 in steps of 1/32. The table is specified by:
+
+[source,c]
+----
+int tanh_reference(int x) { // input x range is -256 to +256 inclusive
+ F64 v = (double)x/(double)32;
+ v = exp(-2.0*v);
+ v = (1.0-v)/(1.0+v);
+ return round_to_nearest(32768.0 * v);
+}
+
+generate_lookup_table(&tanh_table, &tanh_reference);
+----
+
+*Arguments:*
+
+|===
+
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|Input|shape|Input tensor from 1 to 4 dims
+|Output|out_t*|Output|shape|Output tensor of same type and shape as input
+|===
+
+*Supported Data Types:*
+
+|===
+
+|Profile|Mode|in_t|out_t
+
+|MI, MT|float|float|float
+|===
diff --git a/chapters/comparison.adoc b/chapters/comparison.adoc
new file mode 100644
index 0000000..3e69900
--- /dev/null
+++ b/chapters/comparison.adoc
@@ -0,0 +1,119 @@
+//
+// This confidential and proprietary software may be used only as
+// authorised by a licensing agreement from ARM Limited
+// (C) COPYRIGHT 2020 ARM Limited
+// ALL RIGHTS RESERVED
+// The entire notice above must be reproduced on all authorised
+// copies and copies may only be made to the extent permitted
+// by a licensing agreement from ARM Limited.
+
+=== Comparison Operators
+
+==== EQUAL
+
+Elementwise comparison operation
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input1|shape1|Input tensor from 1 to 4 dims
+|Input|in_t*|input2|shape2|Input tensor with the same rank as Input 0
+|Output|out_t*|output|shape|Output tensor of same type, as the input tensors, with broadcast shape if necessary
+|===
+
+*Operation Function:*
+
+[source,c]
+----
+for_each (index in shape) {
+ index1 = apply_broadcast(shape, shape1, index)
+ index2 = apply_broadcast(shape, shape2, index)
+ int32_t value1 = tensor_read<in_t>(input1, shape1, index1)
+ int32_t value2 = tensor_read<in_t>(input2, shape2, index2)
+ bool_t acc = (value1 == value2) ? True : False
+ tensor_write<out_t>(output, shape, index, acc)
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t|out_t
+
+|Any|signed 32|int32|bool
+|MI, MT|float|float|bool
+|===
+
+==== GREATER
+
+Elementwise greater than comparison operation
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+|Input|in_t*|input1|shape1|Input tensor from 1 to 4 dims
+|Input|in_t*|input2|shape2|Input tensor with the same rank as Input 0
+|Output|out_t*|output|shape|Output tensor of same type, as the input tensors, with broadcast shape if necessary
+|===
+
+*Operation Function:*
+
+[source,c]
+----
+for_each (index in shape) {
+ index1 = apply_broadcast(shape, shape1, index)
+ index2 = apply_broadcast(shape, shape2, index)
+ int32_t value1 = tensor_read<in_t>(input1, shape1, index1)
+ int32_t value2 = tensor_read<in_t>(input2, shape2, index2)
+ bool_t acc = (value1 > value2) ? True : False
+ tensor_write<out_t>(output, shape, index, acc)
+}
+----
+
+*Supported Data Types:*
+|===
+|Profile|Mode|in_t|out_t
+
+|Any|signed 32|int32|bool
+|MI, MT|float|float|bool
+|===
+
+==== GREATER_EQUAL
+
+Elementwise comparison operation
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input1|shape1|Input tensor from 1 to 4 dims
+|Input|in_t*|input2|shape2|Input tensor with the same rank as Input 0
+|Output|out_t*|output|shape|Output tensor of same type, as the input tensors, with broadcast shape if necessary
+|===
+
+*Operation Function:*
+
+[source,c]
+----
+for_each (index in shape) {
+ index1 = apply_broadcast(shape, shape1, index)
+ index2 = apply_broadcast(shape, shape2, index)
+ int32_t value1 = tensor_read<in_t>(input1, shape1, index1)
+ int32_t value2 = tensor_read<in_t>(input2, shape2, index2)
+ bool_t acc = (value1 >= value2) ? True : False
+ tensor_write<out_t>(output, shape, index, acc)
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t|out_t
+
+|Any|signed 32|int32|bool
+|MI, MT|float|float|bool
+|===
diff --git a/chapters/control_flow.adoc b/chapters/control_flow.adoc
new file mode 100644
index 0000000..9efa3e7
--- /dev/null
+++ b/chapters/control_flow.adoc
@@ -0,0 +1,48 @@
+//
+// This confidential and proprietary software may be used only as
+// authorised by a licensing agreement from ARM Limited
+// (C) COPYRIGHT 2020 ARM Limited
+// ALL RIGHTS RESERVED
+// The entire notice above must be reproduced on all authorised
+// copies and copies may only be made to the extent permitted
+// by a licensing agreement from ARM Limited.
+
+=== Control Flow Operators
+
+TOSA implements two control flow operators, for conditional branching and loop based transfer of control. Both refer to region labels, which express the address of another operator in the TOSA program, to which control transfers.
+
+==== COND_IF
+
+Evaluates a Boolean condition and then takes one of two distinct execution paths. This implements the semantic If-then-else structure.
+
+*Input Operands:*
+
+* List of Input tensors – up to 4D tensor of any data layout.
+* A Boolean condition.
+
+*Output Operands:*
+
+* List of Input tensors – up to 4D tensor of any data layout. These need not have the same shapes as the list of input tensors.
+
+*Region Labels:*
+
+* Then: a reference to an operator that execution control transfers to if the Boolean condition is TRUE.
+* Else: a reference to an operator that execution control transfers to if the Boolean condition is FALSE.
+
+==== WHILE_LOOP
+
+Generates and evaluates a Bool condition and either executes a loop body or exits to another control point. This action is performed repeatedly after updating and re-evaluating the Boolean condition every iteration. This implements the semantic foreach or while iterative loop structure.
+
+*Input Operands:*
+
+* List of Input tensors – up to 4D tensor of any data layout. The list comprises both the tensors that are used to compute each iteration of the Bool condition, and the inputs to the body of the loop.
+
+*Output Operands:*
+
+* List of Input tensors – up to 4D tensor of any data layout. These must have the same dynamic shapes as the list of input tensors.
+
+*Region Labels:*
+
+* Cond: a reference to an operator that execution control transfers to in order to evaluate the current iteration of the Bool condition, and to update the condition for the next iteration.
+* Body: a reference to an operator that execution control transfers to in order to begin executing the body of the loop.
+
diff --git a/chapters/custom.adoc b/chapters/custom.adoc
new file mode 100644
index 0000000..4804e25
--- /dev/null
+++ b/chapters/custom.adoc
@@ -0,0 +1,20 @@
+//
+// This confidential and proprietary software may be used only as
+// authorised by a licensing agreement from ARM Limited
+// (C) COPYRIGHT 2020 ARM Limited
+// ALL RIGHTS RESERVED
+// The entire notice above must be reproduced on all authorised
+// copies and copies may only be made to the extent permitted
+// by a licensing agreement from ARM Limited.
+
+=== Custom Operators
+
+Hardware implementing TOSA may choose to add additional custom operators that are not expressed in the existing TOSA operations. These operators are not expected to be portable across TOSA implementations. The input and output signatures must be expressed in the corresponding TOSA node.
+
+==== CUSTOM
+Input Operands:
+
+* Num input operands – Scalar number of input operands
+* Num output operands – Scalar number of output operands
+* Operator code – untyped data consisting of the operator data
+* Affine transform description for each tensor
diff --git a/chapters/data_layout.adoc b/chapters/data_layout.adoc
new file mode 100644
index 0000000..8f318ae
--- /dev/null
+++ b/chapters/data_layout.adoc
@@ -0,0 +1,296 @@
+//
+// This confidential and proprietary software may be used only as
+// authorised by a licensing agreement from ARM Limited
+// (C) COPYRIGHT 2020 ARM Limited
+// ALL RIGHTS RESERVED
+// The entire notice above must be reproduced on all authorised
+// copies and copies may only be made to the extent permitted
+// by a licensing agreement from ARM Limited.
+
+=== Data Layout
+
+==== CONCAT
+Concatenate two tensors along a given axis. No data conversion happens during a concat operation.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input1|shape1|Input tensor from 1 to 4 dims
+|Input|in_t*|input2|shape2|Input tensor with rank matching input1
+|Attribute|int|axis|-|Axis along which concatenation is to occur.
+|Output|out_t*|output|shape|Output tensor of same type, size as the input tensor
+|===
+
+*Operation Function:*
+
+[source,c]
+----
+for_each (index1 in shape) {
+ index2 = index1
+ index2[axis] = index1[axis] - shape1[axis]
+ value = (index2[axis] < 0) ?
+ tensor_read<in_t>(input1, shape1, index1) :
+ tensor_read<in_t>(input2, shape2, index2) ;
+ tensor_write<in_t>(output, shape, index1, value);
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t|out_t
+
+|Any|Boolean|bool|bool
+|Any|signed 8|int8/aint8|int8/aint8
+|Any|signed 16|int16|int16
+|Any|signed 32|int32|int32
+|MI, MT|float|float|float
+|===
+
+==== PAD
+
+Zero-pads a tensor along borders of each dimension.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input1|shape1|Input tensor
+|Attribute|int|padding|[rank(input1),2]|Amount of padding to be done
+|Output|out_t*|output|shape|Output tensor of same type as the input tensor
+|===
+
+*Quantization Parameters:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Attribute|in_t|input1_zp|-|Input tensor zero point
+|===
+
+*Operation Function:*
+
+[source,c]
+----
+for_each (index in shape) {
+ index1 = index
+ for (i=0; i<dimensions(shape); i++) {
+ index1[i] = index1[i] - padding[i,0]
+ }
+ value = tensor_read<in_t>(input1, shape1, index1, input1_zp, padding)
+ tensor_write<in_t>(output, shape, index, value + input1_zp);
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t|out_t
+
+|Any|Boolean|bool|bool
+|Any|signed 8|int8/aint8|int8/aint8
+|Any|signed 16|int16|int16
+|Any|signed 32|int32|int32
+|MI, MT|float|float|float
+|===
+
+==== RESHAPE
+
+Returns a tensor with the same type/values as the input, with a new shape specified by the shape argument. Reshape may operate on tensors of any rank. No data conversion happens during a reshape operation.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input1|shape1|Input tensor from 1 to 4 dims
+|Attribute|int|new_shape|[rank(output)]|List of values, with each element giving the size of the result tensor for the given dimension. At most one dimension may be given as-1 to automatically calculate the dimension size.
+|Output|out_t*|output|shape|Output tensor of same type, size as the input tensor
+|===
+
+*Operation Function:*
+
+[source,c]
+----
+assert(tensor_size(shape1)==tensor_size(shape))
+for (i=0; i<tensor_size(shape); i++) {
+ output[i] = input[i]
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t|out_t
+
+|Any|Boolean|bool|bool
+|Any|signed 8|int8/aint8|int8/aint8
+|Any|signed 16|int16|int16
+|Any|signed 32|int32|int32
+|MI, MT|float|float|float
+|===
+
+==== REVERSE
+
+Returns a tensor with the same type/values as the input, with the data reversed along the given axes. No data conversion happens during a reverse operation.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input|shape|Input tensor from 1 to 4 dims
+|Attribute|int|axes|[]|List of axes along which to reverse with size from 1 to rank(input1). Values are integers into the 0-based dimensions of the input tensor. An empty list is not allowed.
+|Output|out_t*|output|shape|Output tensor of same type, size as the input tensor
+|===
+
+*Operation Function:*
+
+[source,c]
+----
+for_each (index in shape) {
+ tmp_index = index;
+ for_each (axis in axes[]) {
+ tmp_index[axis] = shape[axis]-1-index[axis]
+ }
+ value = tensor_read<in_t>(input, shape, tmp_index);
+ tensor_write<in_t>(output, shape, index, value);
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t|out_t
+
+|Any|Boolean|bool|bool
+|Any|signed 8|int8/aint8|int8/aint8
+|Any|signed 16|int16|int16
+|Any|signed 32|int32|int32
+|MI, MT|float|float|float
+|===
+
+==== SLICE
+
+Extracts a slice of the input tensor 0 on the given axis, beginning at the start coordinates, and extending for size elements in each direction. No data conversion happens during a slice operation.
+
+*Arguments:*
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input1|shape1|Input tensor from 1 to 4 dims
+|Attribute|int|start|[rank(input1)]|List of integer coordinates, of length equal to the rank of input 0. Start coordinate for slicing.
+|Attribute|int|size|[rank(input1)]|List of integer size values, of length equal to the rank of input 0. Size of the input to be used.
+|Output|out_t*|output|shape|Output tensor of same type as the input tensor
+|===
+
+*Operation Function:*
+
+[source,c]
+----
+for_each (index in shape) {
+ tmp_index = index;
+ for (i=0; i<dimensions(shape); i++) {
+ tmp_index[i] = index[i] + start[i];
+ }
+ value = tensor_read<in_t>(input, shape1, tmp_index);
+ tensor_write<in_t>(output, shape, index, value);
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t|out_t
+
+|Any|Boolean|bool|bool
+|Any|signed 8|int8/aint8|int8/aint8
+|Any|signed 16|int16|int16
+|Any|signed 32|int32|int32
+|MI, MT|float|float|float
+|===
+
+==== TILE
+
+Replicates input 0 multiplies times along each dimension.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input1|shape1|Input tensor from 1 to 4 dims
+|Attribute|int|multiplies|[rank(shape1)]|Number of times to replicate input1 in each dimension
+|Output|out_t*|output|shape|Output tensor of same type, rank as the input tensor
+|===
+
+*Operation Function:*
+
+[source,c]
+----
+for_each (index in shape) {
+ tmp_index = index;
+ for (i=0; i<dimensions(shape); i++) {
+ assert(shape1[i] * multiplies[i] == shape[i])
+ tmp_index[i] = index[i] % shape1[i]
+ }
+ value = tensor_read<in_t>(input, shape1, tmp_index);
+ tensor_write<in_t>(output, shape, index, value);
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t|out_t
+
+|Any|Boolean|bool|bool
+|Any|signed 8|int8/aint8|int8/aint8
+|Any|signed 16|int16|int16
+|Any|signed 32|int32|int32
+|MI, MT|float|float|float
+|===
+
+==== TRANSPOSE
+
+Permutes the dimensions based on perm.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input1|shape1|Input tensor from 1 to 4 dims
+|Attribute|int|perms|[rank(input1)]|List of integers of length equal to the rank of input1.
+|Output|out_t*|output|shape|Output tensor of same type, rank as the input tensor
+|===
+
+*Operation Function:*
+
+[source,c]
+----
+for_each (index in shape) {
+ tmp_index = index;
+ for (i=0; i<dimensions(shape); i++) {
+ assert(shape1[perm[i]] == shape[i])
+ tmp_index[perm[i]] = index[i]
+ }
+ value = tensor_read<in_t>(input, shape1, tmp_index);
+ tensor_write<in_t>(output, shape, index, value);
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t|out_t
+
+|Any|Boolean|bool|bool
+|Any|signed 8|int8/aint8|int8/aint8
+|Any|signed 16|int16|int16
+|Any|signed 32|int32|int32
+|MI, MT|float|float|float
+|===
diff --git a/chapters/data_nodes.adoc b/chapters/data_nodes.adoc
new file mode 100644
index 0000000..a7886ff
--- /dev/null
+++ b/chapters/data_nodes.adoc
@@ -0,0 +1,108 @@
+//
+// This confidential and proprietary software may be used only as
+// authorised by a licensing agreement from ARM Limited
+// (C) COPYRIGHT 2020 ARM Limited
+// ALL RIGHTS RESERVED
+// The entire notice above must be reproduced on all authorised
+// copies and copies may only be made to the extent permitted
+// by a licensing agreement from ARM Limited.
+
+=== Data Nodes
+
+==== CONST
+
+A node containing constant data for use as the input to an operation. May hold data in any of the supported data formats.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Output|out_t*|output|shape|Output tensor of same type, size as the input tensor
+|===
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|out_t
+
+|Any|Boolean|bool
+|Any|signed 8|int8/aint8
+|Any|signed 16|int16
+|Any|signed 32|int32
+|MI, MT|float|float
+|===
+
+==== PLACEHOLDER
+
+A node where data will be inserted into the network at runtime. Generally used for inputs to the network.
+
+*Arguments:*
+|===
+|Argument|Type|Name|Shape|Description
+
+|Output|out_t*|output|shape|Output tensor of same type, size as the input tensor
+|===
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|out_t
+
+|Any|Boolean|bool
+|Any|unsigned 8|uint8
+|Any|signed 8|int8/aint8
+|Any|signed 16|int16
+|Any|signed 32|int32
+|MI, MT|float|float
+|===
+
+==== IDENTITY
+
+Returns a tensor with the same shape, type, and contents as the input.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t|input1|shape|Input tensor
+|Output|out_t*|output|shape|Output tensor of same type, size as the input tensor
+|===
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t|out_t
+
+|Any|Boolean|bool|bool
+|Any|signed 8|int8/aint8|int8/aint8
+|Any|signed 16|int16|int16
+|Any|signed 32|int32|int32
+|MI, MT|float|float|float
+|===
+
+==== IDENTITYN
+
+Returns a list of tensors with the same shape, type, and contents as the input list of tensors.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t|input1|[shape1, shape2, …]|List of input tensors
+|Output|out_t*|output|[shape1, shape2, …]|List of output tensors of same type, size as the input tensors
+|===
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t|out_t
+
+|Any|Boolean|bool|bool
+|Any|signed 8|int8/aint8|int8/aint8
+|Any|signed 16|int16|int16
+|Any|signed 32|int32|int32
+|MI, MT|float|float|float
+|===
diff --git a/chapters/ewise_binary.adoc b/chapters/ewise_binary.adoc
new file mode 100644
index 0000000..92c4926
--- /dev/null
+++ b/chapters/ewise_binary.adoc
@@ -0,0 +1,611 @@
+//
+// This confidential and proprietary software may be used only as
+// authorised by a licensing agreement from ARM Limited
+// (C) COPYRIGHT 2020 ARM Limited
+// ALL RIGHTS RESERVED
+// The entire notice above must be reproduced on all authorised
+// copies and copies may only be made to the extent permitted
+// by a licensing agreement from ARM Limited.
+
+=== Elementwise Binary Operators
+
+==== ADD
+
+Elementwise addition of input1 and input2. Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input1|shape1|Input tensor
+|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
+|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary
+|===
+
+*Operation Function:*
+
+[source,c]
+----
+for_each (index in shape) {
+ index1 = apply_broadcast(shape, shape1, index)
+ index2 = apply_broadcast(shape, shape2, index)
+ in_t value1 = tensor_read<in_t>(input1, shape1, index1)
+ in_t value2 = tensor_read<in_t>(input2, shape2, index2)
+ in_t acc = apply_add<in_t>(value1, value2)
+ tensor_write<in_t>(output, shape, index, acc)
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t
+
+|Any|signed 32|int32
+|MI, MT|float|float
+|===
+
+==== ARITHMETIC_RIGHT_SHIFT
+
+Elementwise arithmetic right shift of input1 by the amount specified in input2. Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input1|shape1|Input tensor
+|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
+|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary
+|===
+
+*Operation Function:*
+
+[source,c]
+----
+for_each (index in shape) {
+ index1 = apply_broadcast(shape, shape1, index)
+ index2 = apply_broadcast(shape, shape2, index)
+ in_t value1 = tensor_read<in_t>(input1, shape1, index1)
+ in_t value2 = tensor_read<in_t>(input2, shape2, index2)
+ assert(value2 <= 31)
+ in_t acc = value1 >> value2
+ acc = apply_clip(acc, minimum<in_t>, maximum<in_t>)
+ tensor_write<in_t>(output, shape, index, acc)
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t
+
+|Any|signed 8|int8
+|Any|signed 16|int16
+|Any|signed 32|int32
+|===
+
+==== BITWISE_AND
+
+Elementwise bitwise AND of input tensor 0 and input tensor 1. Axis of size 1 will be broadcast as necessary. Rank of input tensors must match.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input1|shape1|Input tensor
+|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
+|Output|in_t*|output|shape|Output tensor of same type as the input tensors, with broadcast shape if necessary
+|===
+
+*Operation Function:*
+
+[source,c]
+----
+for_each (index in shape) {
+ index1 = apply_broadcast(shape, shape1, index)
+ index2 = apply_broadcast(shape, shape2, index)
+ in_t value1 = tensor_read<in_t>(input1, shape1, index1)
+ in_t value2 = tensor_read<in_t>(input2, shape2, index2)
+ in_t acc = value1 & value2
+ tensor_write<in_t>(output, shape, index, acc)
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t
+
+|Any|signed 8|aint8
+|Any|signed 16|int16
+|Any|signed 32|int32
+|===
+
+==== BITWISE_OR
+
+Elementwise bitwise OR of input1 and input2. Axis of size 1 will be broadcast as necessary. Rank of input tensors must match.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input1|shape1|Input tensor
+|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
+|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary
+|===
+
+*Operation Function:*
+
+[source,c]
+----
+for_each (index in shape) {
+ index1 = apply_broadcast(shape, shape1, index)
+ index2 = apply_broadcast(shape, shape2, index)
+ in_t value1 = tensor_read<in_t>(input1, shape1, index1)
+ in_t value2 = tensor_read<in_t>(input2, shape2, index2)
+ in_t acc = value1 | value2
+ tensor_write<in_t>(output, shape, index, acc)
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t
+
+|Any|signed 8|aint8
+|Any|signed 16|int16
+|Any|signed 32|int32
+|===
+
+==== BITWISE_XOR
+
+Elementwise bitwise XOR of input1 and input2. Axis of size 1 will be broadcast as necessary. Rank of input tensors must match.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input1|shape1|Input tensor
+|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
+|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary
+|===
+
+*Operation Function:*
+
+[source,c]
+----
+for_each (index in shape) {
+ index1 = apply_broadcast(shape, shape1, index)
+ index2 = apply_broadcast(shape, shape2, index)
+ in_t value1 = tensor_read<in_t>(input1, shape1, index1)
+ in_t value2 = tensor_read<in_t>(input2, shape2, index2)
+ in_t acc = value1 ^ value2
+ tensor_write<in_t>(output, shape, index, acc)
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t
+
+|Any|signed 8|aint8
+|Any|signed 16|int16
+|Any|signed 32|int32
+|===
+
+==== LOGICAL_AND
+
+Elementwise logical AND of input1 and input2. Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input1|shape1|Input tensor
+|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
+|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary
+|===
+
+*Quantization Parameters:*
+
+None
+
+*Operation Function:*
+
+[source,c]
+----
+for_each (index in shape) {
+ index1 = apply_broadcast(shape, shape1, index)
+ index2 = apply_broadcast(shape, shape2, index)
+ in_t value1 = tensor_read<in_t>(input1, shape1, index1)
+ in_t value2 = tensor_read<in_t>(input2, shape2, index2)
+ in_t acc = value1 && value2
+ tensor_write<in_t>(output, shape, index, acc)
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t
+
+|Any|Bool|Bool
+|===
+
+==== LOGICAL_LEFT_SHIFT
+
+Elementwise left shift of input1 and input2. Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input1|shape1|Input tensor
+|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
+|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary
+|===
+
+*Operation Function:*
+
+[source,c]
+----
+for_each (index in shape) {
+ index1 = apply_broadcast(shape, shape1, index)
+ index2 = apply_broadcast(shape, shape2, index)
+ in_t value1 = tensor_read<in_t>(input1, shape1, index1)
+ in_t value2 = tensor_read<in_t>(input2, shape2, index2)
+ assert(value2 <= 31)
+ in_t acc = value1 << value2
+ tensor_write<in_t>(output, shape, index, acc)
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t
+
+|Any|signed 8|int8
+|Any|signed 16|int16
+|Any|signed 32|int32
+|===
+
+==== LOGICAL_RIGHT_SHIFT
+
+Elementwise logical right shift of input1 by the amount specified in input2. Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input1|shape1|Input tensor
+|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
+|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary
+|===
+
+*Operation Function:*
+
+[source,c]
+----
+for_each (index in shape) {
+ index1 = apply_broadcast(shape, shape1, index)
+ index2 = apply_broadcast(shape, shape2, index)
+ in_t value1 = tensor_read<in_t>(input1, shape1, index1)
+ in_t value2 = tensor_read<in_t>(input2, shape2, index2)
+ assert(value2 <= 31)
+ in_t acc = (unsigned in_t)value1 >> value2
+ tensor_write<in_t>(output, shape, index, acc)
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t
+
+|Any|signed 8|int8
+|Any|signed 16|int16
+|Any|signed 32|int32
+|===
+
+==== LOGICAL_OR
+
+Elementwise logical OR of input1 and input2. Axis of size 1 will be broadcast as necessary. Rank of input tensors must match.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input1|shape1|Input tensor
+|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
+|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary
+|===
+
+*Operation Function:*
+
+[source,c]
+----
+for_each (index in shape) {
+ index1 = apply_broadcast(shape, shape1, index)
+ index2 = apply_broadcast(shape, shape2, index)
+ in_t value1 = tensor_read<in_t>(input1, shape1, index1)
+ in_t value2 = tensor_read<in_t>(input2, shape2, index2)
+ in_t acc = value1 || value2
+ tensor_write<in_t>(output, shape, index, acc)
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t
+
+|Any|Bool|Bool
+|===
+
+==== LOGICAL_XOR
+
+Elementwise logical XOR of input tensor 0 and input tensor 1. Axis of size 1 will be broadcast as necessary. Rank of input tensors must match.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input1|shape1|Input tensor from 1 to 4 dims
+|Input|in_t*|input2|shape2|Input tensor with the same rank as Input 0
+|Output|in_t*|output|shape|Output tensor of same type as the input tensors, with broadcast shape if necessary
+|===
+
+*Operation Function:*
+
+[source,c]
+----
+for_each (index in shape) {
+ index1 = apply_broadcast(shape, shape1, index)
+ index2 = apply_broadcast(shape, shape2, index)
+ in_t value1 = tensor_read<in_t>(input1, shape1, index1)
+ in_t value2 = tensor_read<in_t>(input2, shape2, index2)
+ in_t acc = value1 != value2
+ tensor_write<in_t>(output, shape, index, acc)
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t
+
+|Any|Bool|Bool
+|===
+
+==== MAXIMUM
+
+Elementwise max of input1 and input2. Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input1|shape1|Input tensor
+|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
+|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary
+|===
+
+*Operation Function:*
+
+[source,c]
+----
+for_each (index in shape) {
+ index1 = apply_broadcast(shape, shape1, index)
+ index2 = apply_broadcast(shape, shape2, index)
+ in_t value1 = tensor_read<in_t>(input1, shape1, index1)
+ in_t value2 = tensor_read<in_t>(input2, shape2, index2)
+ in_t acc = apply_max(value1, value2)
+ tensor_write<in_t>(output, shape, index, acc)
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t
+
+|Any|signed 32|int32
+|MI, MT|float|float
+|===
+
+==== MINIMUM
+
+Elementwise minimum of input tensor 0 and input tensor 1. Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input1|shape1|Input tensor
+|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
+|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary
+|===
+
+*Operation Function:*
+
+[source,c]
+----
+for_each (index in shape) {
+ index1 = apply_broadcast(shape, shape1, index)
+ index2 = apply_broadcast(shape, shape2, index)
+ in_t value1 = tensor_read<in_t>(input1, shape1, index1)
+ in_t value2 = tensor_read<in_t>(input2, shape2, index2)
+ in_t acc = apply_min(value1, value2)
+ tensor_write<in_t>(output, shape, index, acc)
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t
+
+|Any|signed 32|int32
+|MI, MT|float|float
+|===
+
+==== MUL
+
+Elementwise multiplication (Hadamard product) of input tensor 0 and input tensor 1. Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input1|shape1|Input tensor
+|Input|in_t*|input2|shape2|Input tensor with the same rank as Input 0
+|Output|out_t*|output|shape|Output tensor with broadcast shape if necessary
+|===
+
+*Operation Function:*
+
+[source,c]
+----
+for_each (index in shape) {
+ index1 = apply_broadcast(shape, shape1, index)
+ index2 = apply_broadcast(shape, shape2, index)
+ in_t value1 = tensor_read<in_t>(input1, shape1, index1)
+ in_t value2 = tensor_read<in_t>(input2, shape2, index2)
+ in_t acc = value1 * value2 // takes low bits for int32_t
+ tensor_write<out_t>(output, shape, index, acc)
+}
+----
+
+*Supported Data Types:*
+|===
+|Profile|Mode|in_t|out_t
+
+|Any|signed 8|int8|int32
+|Any|signed 16|int16|int32
+|Any|signed 32|int32|int32
+|MI, MT|float|float|float
+|===
+
+==== POW
+
+Elementwise input tensor 0 value raised to the power of input 1 tensor. Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input1|shape1|Input tensor from 1 to 4 dims
+|Input|in_t*|input2|shape2|Input tensor with the same rank as Input 0
+|Output|in_t*|output|shape|Output tensor of same type as the input tensors, with broadcast shape if necessary
+|===
+
+*Quantization Parameters:*
+
+Only supported with floating point values.
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t
+
+|MI, MT|float|float
+|===
+
+==== SUB
+
+Elementwise subtraction of input tensor 0 and input tensor 1. Axis of size 1 will be broadcast as necessary. Rank of input tensors must match.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input1|shape1|Input tensor
+|Input|in_t*|input2|shape2|Input tensor with the same rank as Input 0
+|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary
+|===
+
+*Operation Function:*
+
+[source,c]
+----
+for_each (index in shape) {
+ index1 = apply_broadcast(shape, shape1, index)
+ index2 = apply_broadcast(shape, shape2, index)
+ in_t value1 = tensor_read<in_t>(input1, shape1, index1)
+ in_t value2 = tensor_read<in_t>(input2, shape2, index2)
+ in_t acc = apply_sub<out_t>(value1, value2);
+ tensor_write<in_t>(output, shape, index, acc)
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t
+
+|Any|signed 32|int32
+|MI, MT|float|float
+|===
+
+==== TABLE
+
+Interpolated table lookup operation. Input values are scaled to create a fixed-point 9.7 value. The high 9 bits are used to index into the table. The fractional bits are used to interpolate based on the looked up value and the index+1 value in the table. The TABLE operator then returns a 16.7 interpolated value. Note that there must be 513 values to handle the full range of inputs.
+
+The TABLE operator is expected to be used as follows:
+
+* A RECALE node is expected before the TABLE operator to scale the input to a full int16_t range for the table lookup
+* If an int16_t result is required then follow the TABLE operator with a RESCALE with a right shift of 7
+* If an int8_t result is required then follow the TABLE operator with a RESCALE with a right shift of 15
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|Input|shape|Input tensor
+|Input|table_t*|table|[513]|Lookup table tensor
+|Output|out_t*|output|shape|Output tensor
+|===
+
+*Quantization Parameters:*
+
+None
+
+*Operation Function:*
+
+[source,c]
+----
+assert(dimensions(shape)<=4)
+for_each (index in shape) {
+ in_t value = tensor_read<in_t>(input, shape, index)
+ out_t acc = apply_lookup(table, value)
+ tensor_write<out_t>(output, shape, index, acc)
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t|table_t|out_t
+
+|Any|signed 16|int16|int16|int32
+|===
+
diff --git a/chapters/ewise_ternary.adoc b/chapters/ewise_ternary.adoc
new file mode 100644
index 0000000..1724777
--- /dev/null
+++ b/chapters/ewise_ternary.adoc
@@ -0,0 +1,60 @@
+//
+// This confidential and proprietary software may be used only as
+// authorised by a licensing agreement from ARM Limited
+// (C) COPYRIGHT 2020 ARM Limited
+// ALL RIGHTS RESERVED
+// The entire notice above must be reproduced on all authorised
+// copies and copies may only be made to the extent permitted
+// by a licensing agreement from ARM Limited.
+
+=== Elementwise Ternary Operators
+
+==== SELECT
+
+Elementwise select of the output based on a condition.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|bool_t|input1|shape1|
+|Input|in_t*|input2|shape2|Input tensor from 1 to 4 dims
+|Input|in_t*|input3|shape3|Input tensor with the same rank as Input 0
+|Output|out_t*|output|shape|Output tensor of same type, as the input tensors, with broadcast shape if necessary
+|===
+
+*Quantization Parameters:*
+
+None
+
+*Operation Function:*
+
+[source,c]
+----
+for_each (index in shape) {
+ index1 = apply_broadcast(shape, shape1, index)
+ index2 = apply_broadcast(shape, shape2, index)
+ index3 = apply_broadcast(shape, shpae3, index)
+ bool_t value1 = tensor_read<in_t>(input1, shape1, index1)
+ in_t value2 = tensor_read<in_t>(input2, shape2, index2)
+ in_t value3 = tensor_read<in_t>(input3, shape3, index3)
+ if (value1 == True){
+ in_t acc = value2
+ } else {
+ in_t acc = value3
+ }
+ tensor_write<out_t>(output, shape, index, acc)
+}
+----
+
+*Supported Data Types:*
+|===
+|Profile|Mode|bool_t|in_t|out_t
+
+|Any|Boolean|bool|bool|bool
+|Any|signed 8|bool|aint8/int8|aint8/int8
+|Any|signed 16|bool|int16|int16
+|Any|signed 32|bool|int32|int32
+|MI, MT|float|bool|float|float
+|===
diff --git a/chapters/ewise_unary.adoc b/chapters/ewise_unary.adoc
new file mode 100644
index 0000000..34b64a9
--- /dev/null
+++ b/chapters/ewise_unary.adoc
@@ -0,0 +1,332 @@
+//
+// This confidential and proprietary software may be used only as
+// authorised by a licensing agreement from ARM Limited
+// (C) COPYRIGHT 2020 ARM Limited
+// ALL RIGHTS RESERVED
+// The entire notice above must be reproduced on all authorised
+// copies and copies may only be made to the extent permitted
+// by a licensing agreement from ARM Limited.
+
+=== Elementwise Unary Operators
+
+==== ABS
+
+Elementwise absolute value operation
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input1|shape|Input tensor
+|Output|out_t*|output|shape|Output tensor of same type, size as the input tensor
+|===
+
+*Operation Function:*
+
+[source,c]
+----
+for_each (index in shape) {
+ int32_t value1 = tensor_read<in_t>(input1, shape, index)
+ if (value1 < 0)
+ value1 = apply_sub<out_t>(0, value1)
+ tensor_write<out_t>(output, shape, index, value1)
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t|out_t
+
+|Any|signed 32|int32|int32
+|MI, MT|float|float|float
+|===
+
+==== BITWISE_NOT
+
+Elementwise bitwise NOT of input tensor.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input1|shape|Input tensor
+|Output|out_t*|output|shape|Output tensor of same type, size as the input tensor
+|===
+
+*Quantization Parameters:*
+
+None
+
+*Operation Function:*
+
+[source,c]
+----
+for_each (index in shape) {
+ int32_t value1 = tensor_read<in_t>(input1, shape, index)
+ int32_t acc = ~value1
+ tensor_write<out_t>(output, shape, index, acc)
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t|out_t
+
+|Any|signed 8|aint8|aint8
+|Any|signed 16|int16|int16
+|Any|signed 32|int32|int32
+|===
+
+==== CEIL
+
+Elementwise ceiling operation
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input1|shape|Input tensor
+|Output|out_t*|output|shape|Output tensor of same type, size as the input tensor
+|===
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t|out_t
+
+|MI, MT|float|float|float
+|===
+
+==== CLZ
+
+Elementwise count leading zeros operation
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input1|shape|Input tensor
+|Output|out_t*|output|shape|Output tensor of same type, size as the input tensor
+|===
+
+*Operation Function:*
+
+[source,c]
+----
+for_each (index in shape) {
+ int32_t value1 = tensor_read<in_t>(input1, shape, index)
+ if (value1 == 0) {
+ acc = 32 // input1_width
+ }
+ else {
+ mask = 1 << (32 - 1) // input1_width - 1
+ acc = 0
+ while (mask & value1 == 0) {
+ mask = mask >> 1
+ acc = acc + 1
+ }
+ }
+ tensor_write<out_t>(output, shape, index, acc)
+}
+----
+
+*Supported Data Types:*
+|===
+|Profile|Mode|in_t|out_t
+
+|Any|signed 32|int32|int32
+|===
+
+==== EXP
+
+Elementwise e to the x operation
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input1|shape|Input tensor
+|Output|out_t*|output|shape|Output tensor of same type, size as the input tensor
+|===
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|Input 0|Output
+
+|Any|float|float|float
+|===
+
+==== FLOOR
+
+Elementwise floor operation
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input1|shape|Input tensor
+|Output|out_t*|output|shape|Output tensor of same type, size as the input tensor
+|===
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t|out_t
+
+|MI, MT|float|float|float
+|===
+
+==== LOG
+
+Elementwise natural logarithm operation
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input1|shape|Input tensor
+|Output|out_t*|output|shape|Output tensor of same type, size as the input tensor
+|===
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t|out_t
+
+|MI, MT|float|float|float
+|===
+
+==== LOGICAL_NOT
+
+Elementwise logical NOT of input.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input1|shape|Input tensor
+|Output|out_t*|output|shape|Output tensor of same type, size as the input tensor
+|===
+
+*Quantization Parameters:*
+
+None
+
+*Operation Function:*
+
+[source,c]
+----
+for_each (index in shape) {
+ in_t value1 = tensor_read<in_t>(input1, shape1, index)
+ in_t acc = !value1
+ tensor_write<in_t>(output, shape, index, acc)
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t
+
+|Any|bool|bool
+|===
+
+==== NEGATE
+
+Elementwise negation operation
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input1|shape|Input tensor
+|Output|out_t*|output|shape|Output tensor of same type, size as the input tensor
+|===
+
+*Quantization Parameters:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Attribute|in_t|input1_zp|-|Input 1 zero point
+|Attribute|out_t|output_zp|-|Output zero point
+|===
+
+*Operation Function:*
+
+[source,c]
+----
+assert(in_t == aint8_t || input_zp == 0) // Zero point only for asymmetric int8
+assert(out_t == aint8_t || output_zp == 0) // Zero point only for asymmetric int8
+for_each (index in shape) {
+ int32_t value1 = tensor_read<in_t>(input1, shape, index)
+ int32_t acc = appl_sub<int32_t>(0, value1 - input1_zp)
+ acc = apply_clip(acc, minimum<out_t>, maximum<out_t>)
+ tensor_write<out_t>(output + output_zp, shape, index, acc)
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|Input 0|Output
+
+|Any|signed 8|aint8|aint8
+|Any|signed 16|int16|int16
+|Any|signed 32|int32|int32
+|MI, MT|float|float|float
+|===
+
+==== RECIPROCAL
+
+Elementwise reciprocal operation. For integer operation, a TABLE should be used with the appropriate ranges.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input1|shape|Input tensor
+|Output|out_t*|output|shape|Output tensor of same type, size as the input tensor
+|===
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|Input 0|Output
+
+|MI, MT|float|float|float
+|===
+
+==== RSQRT
+
+Elementwise reciprocal square root operation. For integer operation, a TABLE should be used with the appropriate ranges.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input1|shape|Input tensor
+|Output|out_t*|output|shape|Output tensor of same type, size as the input tensor
+|===
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|Input 0|Output
+
+|MI, MT|float|float|float
+|=== \ No newline at end of file
diff --git a/chapters/image.adoc b/chapters/image.adoc
new file mode 100644
index 0000000..1e8974c
--- /dev/null
+++ b/chapters/image.adoc
@@ -0,0 +1,86 @@
+//
+// This confidential and proprietary software may be used only as
+// authorised by a licensing agreement from ARM Limited
+// (C) COPYRIGHT 2020 ARM Limited
+// ALL RIGHTS RESERVED
+// The entire notice above must be reproduced on all authorised
+// copies and copies may only be made to the extent permitted
+// by a licensing agreement from ARM Limited.
+
+=== Image Operators
+
+==== RESIZE
+
+Resizes a tensor. Resize is only allowed in the H and W dimensions. In expected use, stride_y is approximately (IH<<shift)/OH and stride_x is approximately (IW<<shift)/OW. OH and OW are also supplied as inputs since there may be off by one errors if calculating OH and OW from the strides.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input|[N,IH,IW,C]|Input tensor
+|Attribute|int*|output_size|[2]|[OH,OW]
+|Attribute|int16*|stride|[2]|[stride_y, stride_x]
+|Attribute|int16*|offset|[2]|[offset_y, offset_x]
+|Attribute|int|shift|Shift value
+|Attribute|mode_t|mode|-|BILINEAR or NEAREST
+|Output|out_t*|output|[N,OH,OW,C]|Output tensor
+|===
+
+*Quantization Parameters:*
+
+None
+
+*Operation Function*
+
+[source,c]
+----
+assert(0<shift && shift<=11); // prevent int32_t accumulator overflow for in_t==int8_t
+assert(-stride_y < offset_y && offset_y < (IH<<shift)-(OH-1)*stride_y)
+assert(-stride_x < offset_x && offset_x < (IH<<shift)-(OW-1)*stride_x)
+for_each (0<=n<N, 0<=oy<OH, 0<=ox<OW; 0<=c<C) {
+ y = oy * stride_y + offset_y
+ x = ox * stride_x + offset_x
+ iy = y >> shift; dy = y - (iy<<shift);
+ ix = x >> shift; dx = x - (ix<<shift);
+ iy0 = apply_max(iy,0);
+ iy1 = apply_mix(iy,IW-1);
+ ix0 = apply_max(ix,0);
+ ix1 = apply_min(ix,IH-1);
+ if (mode==BILINEAR) {
+ v00 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix0,c])
+ v01 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix1,c])
+ v10 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix0,c])
+ v11 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix1,c])
+ acc_t acc = v00*((1<<shift)-dy)*((1<<shift)-dx)
+ acc = acc + v01*((1<<shift)-dy)*dx
+ acc = acc + v10*dy*((1<<shift)-dx)
+ acc = acc + v11*dy*dx
+ tensor_write<acc_t>(output, [N,OH,OW,C], [n,oy,ox,c], acc)
+ } else if (mode==NEAREST) {
+ iy = (dy>>(shift-1))!=0 ? iy1 : iy0;
+ ix = (dx>>(shift-1))!=0 ? ix1 : ix0;
+ v = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy,ix,c]);
+ tensor_write<acc_t>(output, [N,OH,OW,C], [n,oy,ox,c], v)
+ }
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t|out_t
+
+|Any|signed 8, bilinear|int8|int32
+|Any|signed 8, nearest |int8|int8
+|Any|signed 16, bilinear|int16|int48
+|Any|signed 16, nearest |int16|int16
+|===
+
+*Scaling Modes:*
+|===
+|Mode|Description
+
+|NEAREST|Nearest Neighbor
+|BILINEAR|Bilinear interpoloation
+|===
diff --git a/chapters/introduction.adoc b/chapters/introduction.adoc
new file mode 100644
index 0000000..ddc88c0
--- /dev/null
+++ b/chapters/introduction.adoc
@@ -0,0 +1,378 @@
+//
+// This confidential and proprietary software may be used only as
+// authorised by a licensing agreement from ARM Limited
+// (C) COPYRIGHT 2020 ARM Limited
+// ALL RIGHTS RESERVED
+// The entire notice above must be reproduced on all authorised
+// copies and copies may only be made to the extent permitted
+// by a licensing agreement from ARM Limited.
+
+== Introduction
+
+=== Overview
+
+Tensor Operator Set Architecture (TOSA) provides a set of operations that Arm expects to be implemented on its NPUs. Each NPU may implement the operators with a different microarchitecture, however the result at the TOSA level must be consistent. Applications or frameworks which target TOSA can also be deployed on a wide variety of IP, such as CPUs or GPUs, with defined accuracy and compatibility constraints. Most operators from the common ML frameworks (TensorFlow, PyTorch, etc.) should be expressible in TOSA. It is expected that there will be tools to lower from the ML frameworks into TOSA. TOSA is focused on inference, leaving training to the original frameworks.
+
+=== Profiles
+
+TOSA supports three profiles that enable efficient implementation on different classes of device. The Base-Inference profile is intended for embedded integer/fixed-point designs performing inference only. The Main-Inference profile is intended for general inference functionality including integer and floating-point data types. The Main-Training profile adds training operators in addition to inference operators.
+This version of the specification covers the Base and Main inference profiles. Main Training profile is expected in a later version of the specification.
+The following table summarizes the three profiles:
+
+.Profiles
+|===
+|Profile|Name|Integer Inference|Floating-point Inference|Training
+
+|Base Inference|TOSA-BI|Yes|No|No
+|Main Inference|TOSA-MI|Yes|Yes|No
+|Main Training|TOSA-MT|Yes|Yes|Yes
+|===
+
+=== Operator Selection
+
+TOSA defines a set of primitive operators to which higher level operators can be lowered in a consistent way. To remain effective and efficient to implement the set of operators must be constrained to a reasonably small set of primitive operations out of which others can be constructed. The following principles govern the selection of operators within TOSA.
+
+.Principles
+[cols="1,5,5"]
+|===
+|ID|Principle|Reason for this
+
+|P0
+|An operator shall be a primitive operation or building block that cannot be broken down into simpler whole tensor operations
+|If the operator can be broken down, then we should look at the component operators.
+
+|P1
+|An operator shall be a usable as a component out of which more complex operations can be constructed
+|Single use operators have a high architectural cost and a more reusable version should be considered instead.
+
+|P2
+|Precision should be appropriate for the input and output data types
+|Precision higher than that needed to calculate the result leads to extra implementation cost
+
+|P3
+|Numerical definition of common sub-operations should be consistent between operators (for example: value scaling)
+|Consistent sub-operation definition reduces the operator implementation cost
+
+|P4
+|The valid input and output ranges for all operands shall be specified
+|Ranges are required to makes consistent (numerically agreeing) implementations possible
+
+|P5
+|Integer operators shall be implementable in a bit-exact form with good efficiency on CPU, GPU and hardware targets.
+|Reduces implementation cost and gives consistent inference result
+|===
+
+=== Supported Features
+
+==== Data Layouts
+
+The following data layouts are supported in TOSA. Data layouts are specified such that the rightmost dimension is the fastest changing.
+
+.Data Layouts
+[cols="1,4,4"]
+|===
+|Name|Description of dimensions|Usage
+
+|NHWC|Batch, Height, Width, Channels|Feature maps
+|NDHWC|Batch, Depth, Height, Width, Channels|Feature maps for 3D convolution
+|OHWI|Output channels, Filter Height, Filter Width, Input channels|Weights
+|HWIM|Filter Height, Filter Width, Input channels, Channel Multiplier|Weights for depthwise convolutions
+|DOHWI|Depth, Output Channels, Filter Height, Filter Width, Input Channels|Weights for 3D convolution
+|===
+
+==== Floating point
+
+The base inference profile of TOSA requires support for the quantized integer operations. Floating point support is included in the main inference profile.
+
+==== Number formats
+
+The following number formats are defined in TOSA. See section 2.3 for details on quantization within TOSA. The number formats supported by an operator are listed in a per-operator table of supported types.
+
+.Number formats
+[cols="1,1,1,6"]
+|===
+|Format|Minimum|Maximum|Description
+
+|bool
+| -
+| -
+|Boolean value. Size implementation defined.
+
+|aint8
+| -128
+| +127
+|Asymmetric 8-bit quantized values. Operators using this data type will require a zero point value and a scale factor. See <<Quantization Scaling>> for details on quantization parameters and their use in operators.
+
+|int4
+| -7
+| +7
+|Signed 4-bit values. These values are symmetrically quantized, with values from -7, 7 as the range. These are quantized per-channel. No zero point is used, scale factor is provided as part of the operation.
+
+|int8
+| -128
+| +127
+|Signed 8-bit twos-complement values. These values are quantized. Symmetric per-channel or per-tensor quantization. No zero point is used, scale factor is provided in the operation.
+
+|uint8
+| 0
+| 255
+|Unsigned 8-bit value quantized value with zero point. This data type is only used for input/output conversion by the RESCALE operator and not supported by other operators.
+
+|int16
+| -32768
+| +32768
+|Signed 16-bit twos-complement values. Symmetric per-tensor quantization. No zero point is used , scale factor is provided in the operation.
+
+|int32
+| (1<<31)-1
+| -(1<<31)
+|32-bit twos-complement value. No scale factor used.
+
+|float
+| -infinity
+| +infinity
+|floating point number. Must have features defined in the section <<Floating Point>>. (Main inference profile)
+|===
+
+Note: In this specification minimum<type> and maximum<type> will denote the minimum and maximum values of the data as stored in memory (ignoring the zero point). The minimum and maximum values for each type is given in the preceeding table.
+
+Note: Integer number formats smaller than 8 bits may be used provided that the numerical result is the same as using a sequence of 8-bit TOSA operations. For example, a convolution with low precision data must equal that of running the convolution at 8 bits and then clipping the result to the peritted output range. This ensures that a Base Inference profile TOSA implementation can calculate the same result.
+
+==== Tensor Metadata
+
+Tensors have an associated tensorinfo that contains information about the tensor including:
+
+* Data Type
+* Shape
+
+The following pseudocode represents the operations that will happen to data elements as they are read in to be processed, or have their results written out.
+
+*Functionality of tensor read*
+If in_t is 8-bit then out_t=int16_t. Otherwise out_t is set to the same as in_t.
+....
+out_t tensor_read<in_t>(in_t *address, dim_t shape, dim_t index, in_t zero_point=0, dim_t pad=NULL) {
+ assert(in_t==aint8_t || zero_point==0)
+ unsigned offset = 0;
+ for (i=0; i<dimensions(shape); i++)
+ if (index[i]<0) { assert(pad && pad[2*i]+index[i]>=0); return 0; }
+ if (index[i]>=shape[i]) { assert(pad && index[i]<shape[i]+pad[2*i+1]); return 0; }
+ offset = offset * shape[i] + index[i]
+ }
+ return address[offset] - zero_point;
+}
+....
+
+*Functionality of tensor write*
+
+....
+tensor_write<type>(<type> *address, dim_t shape, dim_t index, <type> value) {
+ unsigned offset = 0;
+ for (i=0; i<dimensions(shape); i++)
+ assert (index[i]>=0 && index[i]<shape[i]);
+ offset = offset * shape[i] + index[i];
+ }
+ address[offset] = value;
+}
+....
+
+==== Broadcasting
+
+In operations where broadcasting is supported, an input shape dimension can be broadcast to an output shape dimension if the dimensions are equal or the input shape dimension is 1.
+
+*Functionality of broadcast*
+
+The following function maps an index in the output tensor to an index in the input tensor.
+
+....
+dim_t apply_broadcast(dim_t out_shape, dim_t in_shape, dim_t index) {
+ assert(dimensions(out_shape)==dimensions(in_shape));
+ for (i=0; i<dimensions(out_shape); i++) {
+ if (out_shape[i] != in_shape[i]) {
+ assert(in_shape[i]==1);
+ index[i] = 0;
+ }
+ }
+ return index;
+}
+....
+
+=== Quantization
+
+==== Quantization Basics
+
+When converting the floating-point values used in training to quantized integer values used on devices for inference, we need to know the range of values to be represented by the integers. The frameworks use slightly different parameters and data types to do this conversion. For example, TensorFlow passes a min and max floating-point values for quantization. TensorFlow Lite and PyTorch use a floating-point scale factor, and an integer zero point. TFLite and PyTorch also allow for symmetric quantization where the zero point value is not used.
+In the initial quantization work, tensors were quantized with a single set of parameters for the entire tensor. Recently, frameworks have added support for different quantization parameters on a per channel basis. This per channel quantization thus carries a vector of scales and zero points to be used on each channel. TOSA will support per channel quantization, but only for the weight tensor used in convolution operators.
+Quantization parameters in floating point cause imprecision. In some instances, the software may need to calculate post-op scaling values on hardware that does not have a floating-point unit. Arm NPUs have fixed output scaling hardware that uses fixed point arithmetic to calculate the output values. When calculating these multiplicands and shift amounts, different floating-point precisions may cause results to differ.
+To remove this dependency on floating point values, there are two design choices being made:
+
+* Quantization parameters will be associated with operations rather than tensors. The operations are where the scaling is taking place, and thus can be specified such that the hardware fixed point calculations can be represented exactly, such that any hardware designed to the TOSA specification will return the same quantized values.
+* Quantization parameters will be given in integer values, as multiplicands and shifts. Specific bit widths and signed/unsignedness will be provided with each operator.
+
+When compiling a network to TOSA, we expect that a compiler would lower all possible subgraphs to TOSA, keeping the quantization parameters with the tensors, and then do an additional pass where the quantization values for the operators are calculated based on the input and output tensors for the operation.
+TOSA currently supports signed 8-bit quantization, unsigned 8-bit quantization, and signed 16-bit quantization. Signed 8-bit values can be used with a zero point, where they are noted as aint8, or without a zero point, where they are noted as int8. TOSA also supports 32-bit computations, however the inputs to these 32-bit operations are not scaled, and input quantization parameters are ignored. Quantization parameters may be provided for 32-bit operations, as the output may be scaled down to 8 or 16 bits.
+
+==== Quantization Scaling
+
+Most operations in TOSA do not contain quantization scaling in the operation, but in a separate RESCALE node that performs change in scale using a multipler and shift value. This TOSA specification supports two precisions of multiplier: 16-bit and 32-bit. The 32-bit multiplier version supports two rounding modes to enable simpler lowering of existing frameworks that use two stage rounding. All arithmetic is designed so that it does not overflow a 64-bit accumulator and that the final result fits in 32 bits. In particular a 48-bit value can only be scaled with the 16-bit multiplier.
+
+The apply_scale functions provide a scaling of approximately (multiplier * 2^-shift^). The shift range is limited to allow a variety of implementations. The upper limit of 62 allows it to be decomposed as two right shifts of 31. The lower limit removes special cases in the rounding. These restrictions have little practical impact since the shift value to achieve a scaling of 1.0 is 30 for apply_scale_32 with multiplier=1<<30 and 14 for apply_scale_16 with scale=1<<14. It follows that a scaling range of 2^+12^ down to 2^-32^ is supported for both functions with normalized multiplier. (Smaller scales can be obtained by denormalizing the multiplier).
+
+....
+int32_t apply_scale_32(int32_t value, int32_t multipler, uint6_t shift, bool double_round) {
+ assert(multiplier >= 0);
+ assert(2<=shift && shift<=62);
+ int64_t round = 1<<(shift-1);
+ if (double_round) {
+ if (shift>31 && value>=0) round += 1<<30;
+ if (shift>31 && value<0) round -= 1<<30;
+ }
+ int64_t result = (int64_t)value * multiplier + round;
+ result = result >> shift;
+ assert(result >= minimum<int32_t> && result <= maximum<int32_t>);
+ return (int32_t)result;
+}
+
+int32_t apply_scale_16(int48_t value, int16_t multipler, uint6_t shift) {
+ assert(multiplier >= 0);
+ assert(2<=shift && shift<=62);
+ int64_t round = (1<<(shift-1));
+ int64_t result = (int64_t)value * multiplier + round;
+ result = result >> shift;
+ assert(result >= minimum<int32_t> && result <= maximum<int32_t>);
+ return (int32_t)result;
+}
+....
+
+In some functions, the multiplier and shift are combined into a scale_t structure:
+
+....
+typedef struct {
+ int32_t multiplier;
+ uint6_t shift;
+} scale_t;
+....
+
+In places where a divide is required, we also use the function below to calculate an appropriate scaling value.
+
+....
+scale_t reciprocal_scale(uint32_t value) {
+ assert(value > 0)
+ scale_t scale ;
+ int k = 32-count_leading_zero(value-1); // (1<<k)/2 < value <= (1<<k)
+ int64_t numerator = ((1<<30)+1)<<k;
+ scale.multiplier = numerator/value ; // (1<<30) <= multiplier < (1<<31)
+ scale.shift = 30+k;
+ return scale ;
+}
+....
+
+The following functuons provide basic arithmetic with asserts that values stay in the valid range supported by TOSA.
+
+....
+acc_t apply_add<acc_t>(acc_t a, acc_t b) {
+ if (acc_t==float) return a+b;
+ int64_t c = (int64_t)a + (int64_t)b;
+ assert( c >= minimum<acc_t> && c <= maximum<acc_t> );
+ return (acc_t)c;
+}
+
+acc_t apply_sub<acc_t>(acc_t a, acc_t b) {
+ if (acc_t==float) return a-b;
+ int64_t c = (int64_t)a - (int64_t)b;
+ assert( c >= minimum<acc_t> && c <= maximum<acc_t> );
+ return (acc_t)c;
+}
+....
+
+The following functions are used in the pseudocode to take maximum, minimum or clip values to a range.
+
+....
+<type> apply_max<type>(<type> a, <type> b) {
+ if (a>=b) return a ; else return b ;
+}
+
+<type> apply_min<type>(<type> a, <type> b) {
+ if (a<b) return a ; else return b ;
+}
+
+<type> apply_clip<type>(<type> value, <type> min_val, <type> max_val) {
+ assert(min_val <= max_val)
+ value = apply_max(value, min_val) ;
+ value = apply_min(value, max_val) ;
+ return value ;
+}
+....
+
+==== Quantized Convolutions
+
+For convolution, the input is not required to be scaled before the convolution occurs. The convolution produces an accumulator output of type int32_t or int48_t. This accumulator output is then scaled to the final output range using the RESCALE operator. The scale applied in the RESCALE operator should be set to multiplier and shift values such that: multiplier * 2^-shift^ = (input scale * weight scale) / output_scale. Here, input_scale, weight_scale and output_scale are the conversion factors from integer to floating point for the input, weight and output tensor values respectively. If per-channel scaling is needed then the per-channel option of the RESCALE operation should be used.
+
+==== Elementwise operators
+When two quantized tensors are used in an operation, they must use the same scaling factor for the result to be valid. If the scaling factor for both tensors is equal, implementations will be allowed to optionally skip the scaling process. If the scaling factors are different, then the input with the smaller scaling factor is scaled to match the scaling factor of the input with the larger scaling factor.
+For each input, then, the scaled result = (result * scale + round) >> shift.
+For 8 and 16 bit activations, the scale will be calculated during compilation of the network and provided as a 16-bit scale factor and corresponding shift value. The value for round is 1 << (shift – 1). The scaled result should be 32 bits.
+Once each input has been scaled, the elementwise operation will occur. Then the result must be scaled into the proper output scaling range. The output scaling range will be supplied as a 16-bit scale factor and a 6-bit shift value (other than the comparison operators).
+This applies to the following operations:
+ADD, MAX, MIN, SUB, EQUAL, GREATER, GREATER_EQUAL
+MUL is a special case, where the inputs do not need to be scaled, all the scaling can be done during the output scaling process.
+
+==== General unary functions
+General unary functions such as sigmoid(), tanh(), exp() are expressed using lookup table and interpolation to enable efficient implementation and extension to other operations with the addition of user supplied tables (the TABLE operation). All table lookups are based on the following reference lookup function that takes as input a table of 513 entries of 16-bit each.
+
+....
+int32_t apply_lookup(int16_t *table, int value)
+{
+ value = apply_clip(value, -32768, +32767)
+ index = (value+32768) >> 7
+ fraction = value & 0x7f
+ base = table[index]
+ next = table[index+1]
+ value = (base << 7) + (next - base) * fraction
+ return value; // return interpolated value of 16+7=23 bits
+}
+....
+
+Note that although the table lookup defined here has 16-bit precision, for 8-bit only operations an 8-bit table can be derived by applying the reference function to each of the possible 256 input values.
+The following code constructs a 513-entry table based on a reference function.
+
+....
+void generate_lookup_table(int16_t *table, int (*reference)(int))
+{
+ for (int i=-256; i<=256; i++) {
+ value = (*reference)(i);
+ table[i+256] = clip(value, -32768, +32767)
+ }
+}
+....
+
+=== Floating Point
+
+TOSA does not define bit-exact behaviour of the floating point type, since floating point operation results can vary according to operation order (floating point addition is not associative in general) and rounding behaviour. If a bit defined answer is required then integer operations should be used. TOSA does define that the floating point type must support the following list of features. These features ensure that detection of overflow and other exceptional conditions can be handled consistently.
+
+* The floating point type must have at least 16 total bits including the sign bit
+* The floating point type must support positive and negative infinity values
+* The floating point type must support at least one Not-a-Number encoding (NaN)
+* The floating point type must support signed zero
+* The floating point type must support handling of infinities, NaNs, zeros as in the following table
+
+.Floating point behaviour
+|===
+|Case|Result
+
+|Any input operand is a NaN | a NaN
+
+|(&#177; 0) &#215; (&#177; infinity), (&#177; infinity) &#215; (&#177; 0) | a NaN
+
+|(&#177; 0) / (&#177; 0), (&#177; infinity) / (&#177; infinity) | a NaN
+
+| (+infinity) - (+infinity), (+infinity) + (-infinity) | a NaN
+
+| Any positive overflow | + infinity
+
+| Any negative overflow | - infinity
+
+| Any positive underflow | + 0
+
+| Any negative underflow | - 0
+
+|===
diff --git a/chapters/notice.adoc b/chapters/notice.adoc
new file mode 100644
index 0000000..17affde
--- /dev/null
+++ b/chapters/notice.adoc
@@ -0,0 +1,26 @@
+<<<
+
+*Proprietary Notice*
+
+This document is protected by copyright and other related rights and the practice or implementation of the information contained in this document may be protected by one or more patents or pending patent applications. No part of this document may be reproduced in any form by any means without the express prior written permission of Arm. No license, express or implied, by estoppel or otherwise to any intellectual property rights is granted by this document unless specifically stated.
+
+Your access to the information in this document is conditional upon your acceptance that you will not use or permit others to use the information for the purposes of determining whether implementations infringe any third party patents.
+
+THIS DOCUMENT IS PROVIDED “AS IS”. ARM PROVIDES NO REPRESENTATIONS AND NO WARRANTIES, EXPRESS, IMPLIED OR STATUTORY, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY, SATISFACTORY QUALITY, NON-INFRINGEMENT OR FITNESS FOR A PARTICULAR PURPOSE WITH RESPECT TO THE DOCUMENT. For the avoidance of doubt, Arm makes no representation with respect to, and has undertaken no analysis to identify or understand the scope and content of, patents, copyrights, trade secrets, or other rights.
+
+This document may include technical inaccuracies or typographical errors.
+
+TO THE EXTENT NOT PROHIBITED BY LAW, IN NO EVENT WILL ARM BE LIABLE FOR ANY DAMAGES, INCLUDING WITHOUT LIMITATION ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, PUNITIVE, OR CONSEQUENTIAL DAMAGES, HOWEVER CAUSED AND REGARDLESS OF THE THEORY OF LIABILITY, ARISING OUT OF ANY USE OF THIS DOCUMENT, EVEN IF ARM HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+This document consists solely of commercial items. You shall be responsible for ensuring that any use, duplication or disclosure of this document complies fully with any relevant export laws and regulations to assure that this document or any portion thereof is not exported, directly or indirectly, in violation of such export laws. Use of the word “partner” in reference to Arm’s customers is not intended to create or refer to any partnership relationship with any other company. Arm may make changes to this document at any time and without notice.
+
+If any of the provisions contained in these terms conflict with any of the provisions of any click through or signed written agreement covering this document with Arm, then the click through or signed written agreement prevails over and supersedes the conflicting provisions of these terms. This document may be translated into other languages for convenience, and you agree that if there is any conflict between the English version of this document and any translation, the terms of the English version of the Agreement shall prevail.
+
+The Arm corporate logo and words marked with ® or ™ are registered trademarks or trademarks of Arm Limited (or its subsidiaries) in the US and/or elsewhere. All rights reserved. Other brands and names mentioned in this document may be the trademarks of their respective owners. Please follow Arm’s trademark usage guidelines at http://www.arm.com/company/policies/trademarks.
+
+Copyright © 2020 Arm Limited (or its affiliates). All rights reserved. +
+Arm Limited. Company 02557590 registered in England. +
+110 Fulbourn Road, Cambridge, England CB1 9NJ. +
+LES-PRE-20349 +
+
+<<< \ No newline at end of file
diff --git a/chapters/operators.adoc b/chapters/operators.adoc
new file mode 100644
index 0000000..db252c2
--- /dev/null
+++ b/chapters/operators.adoc
@@ -0,0 +1,43 @@
+//
+// This confidential and proprietary software may be used only as
+// authorised by a licensing agreement from ARM Limited
+// (C) COPYRIGHT 2020 ARM Limited
+// ALL RIGHTS RESERVED
+// The entire notice above must be reproduced on all authorised
+// copies and copies may only be made to the extent permitted
+// by a licensing agreement from ARM Limited.
+
+== Operators
+
+=== Operator Parameters
+
+Each operator has a set of input and output operands. The operands will be referenced during operator execution. Operators may also have a set of operator attributes. Operator attributes are expected to be constant values during compilation and may not be provided at execution time.
+For elementwise operations, the scaling of the output can be independent from the data type of the input. Thus 8-bit operations may output 16-bit data values.
+
+include::tensor_ops.adoc[]
+
+include::activation_funcs.adoc[]
+
+include::ewise_binary.adoc[]
+
+include::ewise_unary.adoc[]
+
+include::ewise_ternary.adoc[]
+
+include::comparison.adoc[]
+
+include::reduction.adoc[]
+
+include::data_layout.adoc[]
+
+include::scatter_gather.adoc[]
+
+include::image.adoc[]
+
+include::type_conversion.adoc[]
+
+include::data_nodes.adoc[]
+
+include::custom.adoc[]
+
+include::control_flow.adoc[]
diff --git a/chapters/reduction.adoc b/chapters/reduction.adoc
new file mode 100644
index 0000000..bef68a3
--- /dev/null
+++ b/chapters/reduction.adoc
@@ -0,0 +1,303 @@
+//
+// This confidential and proprietary software may be used only as
+// authorised by a licensing agreement from ARM Limited
+// (C) COPYRIGHT 2020 ARM Limited
+// ALL RIGHTS RESERVED
+// The entire notice above must be reproduced on all authorised
+// copies and copies may only be made to the extent permitted
+// by a licensing agreement from ARM Limited.
+
+=== Reduction Operators
+
+==== REDUCE_ANY
+
+Reduce a tensor along the given axes with a logical OR operation
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input|in_shape|Input tensor from 1 to 4 dims
+|Attribute|int|axes|[]|List of axes with size from 1 to rank(input1). Values are integers into the 0-based dimensions of the input tensor. An empty list is not allowed.
+|Attribute|int|keep_dims|-|If 1, axes reduced will be retained with length=1.
+|Output|out_t*|output|out_shape|Output tensor. Same rank as the input tensor if keep_dims set, else one less than input tensor rank.
+|===
+
+*Operation Function:*
+
+[source,c]
+----
+tmp_shape = in_shape;
+for_each (axis in axes[]) {
+ tmp_shape[axis]=1;
+}
+if (keep_dims) assert(tmp_shape == out_shape)
+for_each (index in tmp_shape) {
+ tensor_write<in_t>(output, tmp_shape, index, false)
+}
+for_each (index in in_shape) {
+ tmp_index = index;
+ for each (axis in axes[]) {
+ tmp_index[axis]=0;
+ }
+ value = tensor_read<in_t>(input, in_shape, index)
+ acc = tensor_read<in_t>(output, tmp_shape, tmp_index)
+ acc = acc || value
+ tensor_write<in_t>(output, tmp_shape, tmp_index, acc)
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t|out_t
+
+|Any|Boolean|bool|bool
+|===
+
+==== REDUCE_ALL
+
+Reduce a tensor along the given axes with a logical AND operation
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input|in_shape|Input tensor from 1 to 4 dims
+|Attribute|int|axes|[]|List of axes with size from 1 to rank(input1). Values are integers into the 0-based dimensions of the input tensor. An empty list is not allowed.
+|Attribute|int|keep_dims|-|If 1, axes reduced will be retained with length=1.
+|Output|out_t*|output|out_shape|Output tensor. Same rank as the input tensor if keep_dims set, else one less than input tensor rank.
+|===
+
+*Operation Function:*
+
+[source,c]
+----
+tmp_shape = in_shape;
+for_each (axis in axes[]) {
+ tmp_shape[axis]=1;
+}
+if (keep_dims) assert(tmp_shape == out_shape)
+for_each (index in tmp_shape) {
+ tensor_write<in_t>(output, tmp_shape, index, true)
+}
+for_each (index in in_shape) {
+ tmp_index = index;
+ for each (axis in axes[]) {
+ tmp_index[axis]=0;
+ }
+ value = tensor_read<in_t>(input, in_shape, index)
+ acc = tensor_read<in_t>(output, tmp_shape, tmp_index)
+ acc = acc && value
+ tensor_write<in_t>(output, tmp_shape, tmp_index, acc)
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t|out_t
+
+|Any|Boolean|bool|bool
+|===
+
+==== REDUCE_MAX
+
+Reduce a tensor along the given axes with a maximum operation
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input|in_shape|Input tensor from 1 to 4 dims
+|Attribute|int|axes|[]|List of axes with size from 1 to rank(input1). Values are integers into the 0-based dimensions of the input tensor. An empty list is not allowed.
+|Attribute|int|keep_dims|-|If 1, axes reduced will be retained with length=1.
+|Output|out_t*|output|out_shape|Output tensor. Same rank as the input tensor if keep_dims set, else one less than input tensor rank.
+|===
+
+*Operation Function:*
+
+[source,c]
+----
+tmp_shape = in_shape;
+for_each (axis in axes[]) {
+ tmp_shape[axis]=1;
+}
+if (keep_dims) assert(tmp_shape == out_shape)
+for_each (index in tmp_shape) {
+ tensor_write<in_t>(output, tmp_shape, index, minimum<in_t>)
+}
+for_each (index in in_shape) {
+ tmp_index = index;
+ for each (axis in axes[]) {
+ tmp_index[axis]=0;
+ }
+ value = tensor_read<in_t>(input, in_shape, index)
+ acc = tensor_read<in_t>(output, tmp_shape, tmp_index)
+ acc = apply_max<in_t>(acc, value)
+ tensor_write<in_t>(output, tmp_shape, tmp_index, acc)
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t|out_t
+
+|Any|signed 8|aint8|aint8
+|Any|signed 16|int16|int16
+|Any|signed 32|int32|int32
+|MI, MT|float|float|float
+|===
+
+==== REDUCE_MIN
+
+Reduce a tensor along the given axes with a minimum operation
+
+*Arguments:*
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input|in_shape|Input tensor from 1 to 4 dims
+|Attribute|int|axes|[]|List of axes with size from 1 to rank(input1). Values are integers into the 0-based dimensions of the input tensor. An empty list is not allowed.
+|Attribute|int|keep_dims|-|If 1, axes reduced will be retained with length=1.
+|Output|out_t*|output|out_shape|Output tensor. Same rank as the input tensor if keep_dims set, else one less than input tensor rank.
+|===
+
+*Quantization Parameters:*
+
+Quantization is ignored when doing the REDUCE_MIN operation. The input and output must maintain the same parameters.
+
+*Operation Function:*
+
+[source,c]
+----
+tmp_shape = in_shape;
+for_each (axis in axes[]) {
+ tmp_shape[axis]=1;
+}
+if (keep_dims) assert(tmp_shape == out_shape)
+for_each (index in tmp_shape) {
+ tensor_write<in_t>(output, tmp_shape, index, maximum<in_t>)
+}
+for_each (index in in_shape) {
+ tmp_index = index;
+ for each (axis in axes[]) {
+ tmp_index[axis]=0;
+ }
+ value = tensor_read<in_t>(input, in_shape, index)
+ acc = tensor_read<in_t>(output, tmp_shape, tmp_index)
+ acc = apply_min<in_t>(acc, value)
+ tensor_write<in_t>(output, tmp_shape, tmp_index, acc)
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t|out_t
+
+|Any|signed 8|aint8|aint8
+|Any|signed 16|int16|int16
+|Any|signed 32|int32|int32
+|MI, MT|float|float|float
+|===
+
+==== REDUCE_PRODUCT
+
+Reduce a tensor along the given axes by computing the product of the axes.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input|in_shape|Input tensor from 1 to 4 dims
+|Attribute|int|axes|[]|List of axes with size from 1 to rank(input1). Values are integers into the 0-based dimensions of the input tensor. An empty list is not allowed.
+|Attribute|int|keep_dims|-|If 1, axes reduced will be retained with length=1.
+|Output|out_t*|output|out_shape|Output tensor. Same rank as the input tensor if keep_dims set, else one less than input tensor rank.
+|===
+
+*Operation Function:*
+
+[source,c]
+----
+tmp_shape = in_shape;
+for_each (axis in axes[]) {
+ tmp_shape[axis]=1;
+}
+if (keep_dims) assert(tmp_shape == out_shape)
+for_each (index in tmp_shape) {
+ tensor_write<in_t>(output, tmp_shape, index, 1.0)
+}
+for_each (index in in_shape) {
+ tmp_index = index;
+ for each (axis in axes[]) {
+ tmp_index[axis]=0;
+ }
+ value = tensor_read<in_t>(input, in_shape, index)
+ acc = tensor_read<in_t>(output, tmp_shape, tmp_index)
+ acc = acc * value
+ tensor_write<in_t>(output, tmp_shape, tmp_index, acc)
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t|out_t
+
+|MI, MT|float|float|float
+|===
+
+==== REDUCE_SUM
+
+Reduce a tensor along the given axes by computing the sum of the axes.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input|in_shape|Input tensor from 1 to 4 dims
+|Attribute|int|axes|[]|List of axes with size from 1 to rank(input1). Values are integers into the 0-based dimensions of the input tensor. An empty list is not allowed.
+|Attribute|int|keep_dims|-|If 1, axes reduced will be retained with length=1.
+|Output|out_t*|output|out_shape|Output tensor. Same rank as the input tensor if keep_dims set, else one less than input tensor rank.
+|===
+
+*Operation Function:*
+
+[source,c]
+----
+tmp_shape = in_shape;
+for_each (axis in axes[]) {
+ tmp_shape[axis]=1;
+}
+if (keep_dims) assert(tmp_shape == out_shape)
+for_each (index in tmp_shape) {
+ tensor_write<in_t>(output, tmp_shape, index, 0)
+}
+for_each (index in in_shape) {
+ tmp_index = index;
+ for each (axis in axes[]) {
+ tmp_index[axis]=0;
+ }
+ value = tensor_read<in_t>(input, in_shape, index)
+ acc = tensor_read<in_t>(output, tmp_shape, tmp_index)
+ acc = apply_add<in_t>(acc, value)
+ tensor_write<in_t>(output, tmp_shape, tmp_index, acc)
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t|out_t
+
+|Any|signed 32|int32|int32
+|MI, MT|float|float|float
+|===
+
diff --git a/chapters/scatter_gather.adoc b/chapters/scatter_gather.adoc
new file mode 100644
index 0000000..29ba391
--- /dev/null
+++ b/chapters/scatter_gather.adoc
@@ -0,0 +1,40 @@
+//
+// This confidential and proprietary software may be used only as
+// authorised by a licensing agreement from ARM Limited
+// (C) COPYRIGHT 2020 ARM Limited
+// ALL RIGHTS RESERVED
+// The entire notice above must be reproduced on all authorised
+// copies and copies may only be made to the extent permitted
+// by a licensing agreement from ARM Limited.
+
+=== Scatter/Gather Operators
+
+==== GATHER
+
+Generate a tensor for which each element in the output is a subtensor of the values tensor along the given axis, based on the value of indices.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|index_t*|indices|shape|Input tensor from 1 to 4 dims
+|Input|value_t*|values|shape|Input tensor from 1 to 4 dims. Must be of rank at least axis + 1
+|Attribute|int|axis|-|Scalar value denoting which dimension to be used for striding.
+|Output|out_t*|output|shape|Output tensor
+|===
+
+*Quantization Parameters:*
+
+None
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|index_t|value_t|out_t
+
+|Any|signed 8|int8/int16|aint8|aint8
+|Any|signed 16|int8/int16|int16|int16
+|Any|signed 32|int8/int16|int32|int32
+|===
+
diff --git a/chapters/tensor_ops.adoc b/chapters/tensor_ops.adoc
new file mode 100644
index 0000000..2ea4ba8
--- /dev/null
+++ b/chapters/tensor_ops.adoc
@@ -0,0 +1,519 @@
+//
+// This confidential and proprietary software may be used only as
+// authorised by a licensing agreement from ARM Limited
+// (C) COPYRIGHT 2020 ARM Limited
+// ALL RIGHTS RESERVED
+// The entire notice above must be reproduced on all authorised
+// copies and copies may only be made to the extent permitted
+// by a licensing agreement from ARM Limited.
+
+=== Tensor Operators
+
+==== ARGMAX
+
+This returns the index with the largest value across the given axis of the input tensor.
+
+*Arguments*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input|input_shape|Input tensor dimension k \<=4
+|Attribute|int|axis|-|Axis in range 0 to k-1
+|Output|out_t*|output|output_shape|Output tensor dimension k-1
+|===
+
+*Quantization Parameters:*
+
+None
+
+*Operation Function:*
+
+[source,c]
+----
+assert(axis >= 0 && axis < k && k <=4)
+left_shape = input_shape[0:axis-1]
+right_shape = input_shape[axis+1:k-1]
+assert( concat(left_shape, right_shape) == output_shape )
+for_each ( left_index in left_shape, right_index in right_shape )
+ in_t max_value = minimum_value<in_t>
+ int32 max_index = 0;
+ for (i=0; i<shape[axis]; i++) {
+ index = concat(left_index, [i], right_index)
+ value = tensor_read<in_t>(input, input_shape, index)
+ if (value > max_value) { max_value = value; max_index=i; }
+ }
+ index = concat(left_index, right_index)
+ tensor_write<int32_t>(output, output_shape, index, max_index)
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t|out_t
+
+|Any|signed 8|aint8|int32
+|Any|signed 16|int16|int32
+|MI, MT|float|float|int32
+|===
+
+==== AVG_POOL2D
+
+This performs an average pooling over the given input tensor. A sliding window of size given by <kernel size> is passed over the input tensor, with the mean value being placed in the output tensor.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t *|input|[N,H,W,C]|Input tensor 4D
+|Attribute|int *|kernel|[2]|[kernel_y, kernel_x]
+|Attribute|int *|stride|[2]|[stride_y, stride_x]
+|Attribute|int *|pad|[4]|[pad_top, pad_bottom, pad_left, pad_right]
+|Output|out_t *|output|[N,H,W,C]|Output tensor 4D
+|===
+
+*Quantization Parameters:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Attribute|in_t|input_zp|-|Input tensor zero point
+|Attribute|out_t|output_zp|-|Output tensor zero point
+|===
+
+*Operation Function:*
+
+[source,c]
+----
+assert(in_t == aint8_t || input_zp == 0) // Zero point only for asymmetric int8
+assert(out_t == aint8_t || output_zp == 0) // Zero point only for asymmetric int8
+pad=concat([0,0],pad,[0,0])
+for_each ( 0<=n<N, 0<=oy<H, 0<=ox<W, 0<=c<C ) {
+ acc_t acc = 0;
+ int count = 0;
+ iy = oy * stride_y - pad_top
+ ix = ox * stride_x - pad_left
+ for_each ( 0<=ky<kernel_y, 0<=kx<kernel_x) {
+ y = iy + ky
+ x = ix + kx
+ value = tensor_read<in_t>(input, [N,IH,IW,IC], [n,y,x,c], input_zp, pad)
+ acc = apply_add<32>(acc, value)
+ if (0<=y<IH and 0<=x<IW) count++
+ }
+ if (is_float(out_t)) {
+ value = value / (float)count;
+ } else {
+ scale = reciprocal_scale(count)
+ acc = apply_scale_32(acc, scale.multiplier, scale.shift, false)
+ acc = apply_clip(acc + output_zp, output_min, output_max)
+ }
+ tensor_write<out_t>(output, [N,H,W,OC], [n,oy,ox,oc], acc)
+}
+----
+
+*Supported Data Types:*
+|===
+|Profile|Mode|in_t|acc_t|out_t
+
+|Any|signed 8|aint8|int32_t|aint8
+|Any|signed 16|int16|int32_t|int16
+|MI, MT|float|float|float|float
+|===
+
+==== CONV2D
+
+Performs a 2D convolution over the given tensor input, using the weight tensor.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input|[N,IH,IW,IC]|Input tensor
+|Attribute|weight_t*|weight|[OC,KH,KW,IC]|Weight kernel size KH x KW
+|Attribute|acc_t*|bias|[OC]|Per output channel bias data.
+|Attribute|int*|pad|[4]|[pad_top, pad_bottom, pad_left, pad_right]
+|Attribute|int*|stride|[2]|[stride_y, stride_x]
+|Attribute|int*|dilation|[2]|[dilation_y, dilation_x]
+|Output|out_t*|output|[N,H,W,OC]|Output tensor
+|===
+
+*Quantization Parameters:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Attribute|in_t|input_zp|-|Input tensor zero point
+|Attribute|weight_t|weight_zp|-|Weight zero point
+|===
+
+*Operation Function*
+
+[source,c]
+----
+assert(in_t == aint8_t || input_zp == 0) // Zero point only for asymmetric int8
+assert(weight_t == aint8_t || weight_zp == 0)
+pad=concat([0,0],pad,[0,0])
+for_each (0<=n<N, 0<=oy<H, 0<=ox<W; 0<=oc<OC) {
+ acc_t acc = 0
+ iy = oy * stride_y - pad_top
+ ix = ox * stride_x - pad_left
+ for_each (0<=ky<KH, 0<=kx<KW, 0<=ic<IC) {
+ y = iy + ky * dilation_y
+ x = ix + kx * dilation_x
+ value = tensor_read<in_t>(input, [N,IH,IW,IC], [n,y,x,ic], input_zp, pad)
+ weight = tensor_read<weight_t>(weight, [OC,KH,KW,IC], [oc,ky,kx,ic], weight_zp)
+ acc = apply_add<acc_t>(acc, value * weight)
+ }
+ acc = apply_add<acc_t>(acc, bias[oc])
+ tensor_write<acc_t>(output, [N,H,W,OC], [n,oy,ox,oc], acc)
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t|weight_t|acc_t
+
+|Any|signed 8x8|aint8|int8,aint8|int32
+|Any|signed 8x4|aint8|int4|int32
+|Any|signed 16x8|int16|int8|int48
+|MI, MT|float|float|float|float
+|===
+
+==== CONV3D
+
+Performs a 3D convolution over the given input tensor.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input|[N,ID,IH,IW,IC]|Input tensor
+|Attribute|weight_t*|weight|[OC,KD,KH,KW,IC]|Weight kernel size KDxKHxKW
+|Attribute|acc_t*|bias|[OC]|Per output channel bias data.
+|Attribute|int*|pad|[6]|[pad_d0, pad_d1, pad_top, pad_bottom, pad_left, pad_right]
+|Attribute|int*|stride|[3]|[stride_d, stride_y, stride_x]
+|Attribute|int*|dilation|[3]|[dilation_d, dilation_y, dilation_x]
+|Output|out_t*|output|[N,D,H,W,OC]|Output tensor
+|===
+
+*Quantization Parameters:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Attribute|in_t|input_zp|-|Input tensor zero point
+|Attribute|weight_t|weight_zp|-|Weight zero point
+|===
+
+*Operation Function*
+
+[source,c]
+----
+assert(in_t == aint8_t || input_zp == 0) // Zero point only for asymmetric int8
+assert(weight_t == aint8_t || weight_zp == 0)
+pad=concat([0,0],pad,[0,0])
+for_each (0<=n<N, 0<=od<D, 0<=oy<H, 0<=ox<W; 0<=oc<OC) {
+ acc_t acc = 0
+ id = od * stride_d - pad_d0
+ iy = oy * stride_y - pad_top
+ ix = ox * stride_x - pad_left
+ for_each (0<=kd<KD, 0<=ky<KH, 0<=kx<KW, 0<=ic<IC) {
+ d = id + kd * dilation_d
+ y = iy + ky * dilation_y
+ x = ix + kx * dilation_x
+ value = tensor_read<in_t>(input, [N,ID,IH,IW,IC], [n,d,y,x,ic], input_zp, pad)
+ weight = tensor_read<weight_t>(weight,[OC,KD,KH,KW,IC],[oc,kd,ky,kx,ic], weight_zp)
+ acc = apply_add<acc_t>(acc, value * weight)
+ }
+ acc = apply_add<acc_t>(acc, bias[oc])
+ tensor_write<acc_t>(output, [N,D,H,W,OC], [n,od,oy,ox,oc], acc)
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t|weight_t|acc_t
+
+|Any|signed 8x8|aint8|int8,aint8|int32
+|Any|signed 8x4|aint8|int4|int32
+|Any|signed 16x8 |int16|int8|int 48
+|MI, MT|float|float|float|float
+|===
+
+
+==== DEPTHWISE_CONV2D
+
+Performs 2D convolutions separately over each channel of the given tensor input, using the weight tensor.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input|[N,H,W,C]|Input tensor
+|Attribute|weight_t*|weight|[KH,KW,C,M]|Weight kernel size KH x KW
+|Attribute|acc_t*|bias|[C*M]|Per output channel bias data.
+|Attribute|int*|pad|[4]|[pad_top, pad_bottom, pad_left, pad_right]
+|Attribute|int*|stride|[2]|[stride_y, stride_x]
+|Attribute|int*|dilation|[2]|[dilation_y, dilation_x]
+|Output|out_t*|output|[N,H,W,C*M]|Output tensor
+|===
+
+*Quantization Parameters:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Attribute|in_t|input_zp|-|Input tensor zero point
+|Attribute|weight_t|weight_zp|-|Weight zero point
+|===
+
+*Operation Function*
+
+[source,c]
+----
+assert(in_t==aint8_t || input_zp==0) // Zero point only for asymmetric int8
+assert(weight_t==aint8_t || weight_zp==0)
+pad=concat([0,0],pad,[0,0])
+for_each (0 <= n<N, 0 <= oy < H, 0 <= ox < W; 0 <= c < (C * M), 0 <= m < M) {
+ acc_t acc = 0
+ iy = oy * stride_y - pad_top
+ ix = ox * stride_x - pad_left
+ for_each (0<=ky<KH, 0<=kx<KW) {
+ y = iy + ky * dilation_y
+ x = ix + kx * dilation_x
+ value = tensor_read<in_t>(input, [N,H,W,C], [n,y,x,c], input_zp, pad)
+ weight = tensor_read<weight_t>(weight, [KH,KW,C,M], [ky,kx,c,m], weight_zp)
+ acc = apply_add<acc_t>(acc, value * weight)
+ }
+ acc = apply_add<acc_t>(acc, bias[(c*M) + m])
+ tensor_write<acc_t>(output, [N,H,W,C*M], [n,oy,ox,c*M+m], acc)
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t|weight_t|acc_t
+
+|Any|signed 8x8|aint8|int8,aint8|int32
+|Any|signed 8x4|aint8|int4|int32
+|Any|signed 16x8|int16|int8|int48
+|MI, MT|float|float|float|float
+|===
+
+==== FULLY_CONNECTED
+
+Performs a fully connected network.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input|[N,IC]|Input tensor
+|Attribute|weight_t*|weight|[OC,IC]|Weights
+|Attribute|acc_t*|bias|[OC]|Per output channel bias data.
+|Output|out_t*|output|[N,OC]|Output tensor
+|===
+
+*Quantization Parameters:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Attribute|in_t|input_zp|-|Input tensor zero point
+|Attribute|weight_t|weight_zp|-|Weight zero point
+|===
+
+*Operation Function*
+
+[source,c]
+----
+assert(in_t == aint8_t || input_zp == 0) // Zero point only for asymmetric int8
+assert(weight_t == aint8_t || weight_zp == 0)
+for_each (0<=n<N, 0<=oc<OC) {
+ acc_t acc = 0
+ for_each (0<=ic<IC) {
+ value = tensor_read<in_t>(input, [N,IC], [n,ic], input_zp)
+ weight = tensor_read<weight_t>(weight, [OC,IC], [oc,ic], weight_zp)
+ acc = apply_add<acc_t>(acc, value * weight)
+ }
+ acc = apply_add<acc_t>(acc, bias[oc])
+ tensor_write<acc_t>(output, [N,OC], [n,oc], acc)
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t|weight_t|acc_t
+
+|Any|signed 8x8|aint8|int8,aint8|int32
+|Any|signed 8x4|aint8|int4|int32
+|Any|signed 16x8 |int16|int8|int48
+|MI, MT|float|float|float|float
+|===
+
+==== MATMUL
+Performs a two dimensional matrix multiplication. This allows both inputs to be activations, rather than reserving weights as an attribute in the FULLY_CONNECTED operator.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|A|[M,K]|Input tensor A
+|Input|in_t*|B|[K,N]|Input tensor B
+|Output|out_t*|C|[M,N]|Output tensor C
+|===
+
+*Quantization Parameters:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Attribute|in_t|A_zp|-|Input tensor A zero point
+|Attribute|in_t|B_zp|-|Input tensor B zero point
+|===
+
+*Operation Function*
+
+[source,c]
+----
+assert(in_t==aint8_t || (A_zp==0 && B_zp==0) // Zero point only for asymmetric int8
+for_each (0<=m<M, 0<=n<N) {
+ acc_t acc = 0
+ for_each (0<=k<K) {
+ value1 = tensor_read<in_t>(A, [M,K], [m,k], A_zp)
+ value2 = tensor_read<in_t>(B, [K,N], [k,n], B_zp)
+ acc = acc + value1 * value2
+ }
+ tensor_write<acc_t>(C, [M,N], [m,n], acc)
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t|acc_t
+
+|Any|signed 8x8|aint8|int32
+|Any|signed 16x16|int16|int48
+|MI, MT|float|float|float
+|===
+
+==== MAX_POOL2D
+This performs a max pooling over the given input tensor. A sliding window of size given by <kernel size> is passed over the input tensor, with the maximum value being placed in the output tensor.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input|[N,H,W,C]|Input tensor 4D
+|Attribute|int*|kernel|[2]|[kernel_y, kernel_x]
+|Attribute|int*|stride|[2]|[stride_y, stride_x]
+|Attribute|int*|pad|[4]|[pad_top, pad_bottom, pad_left, pad_right]
+|Output|out_t*|output|[N,H,W,C]|Output tensor 4D
+|===
+
+*Quantization Parameters:*
+
+None
+
+*Operation Function:*
+
+[source,c]
+----
+pad=concat([0,0],pad,[0,0])
+for_each ( 0<=n<N, 0<=oy<H, 0<=ox<W, 0<=c<C ) {
+ int32_t acc = minimum_value<in_t>;
+ iy = oy * stride_y - pad_top
+ ix = ox * stride_x - pad_left
+ for_each ( 0<=ky<kernel_y, 0<=kx<kernel_x ) {
+ y = iy + ky
+ x = ix + kx
+ value = tensor_read<in_t>(input, [N,IH,IW,IC], [n,y,x,c], pad)
+ acc = apply_max(acc, value)
+ }
+ tensor_write<out_t>(output, [N,H,W,OC], [n,oy,ox,oc], acc)
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t|out_t
+
+|Any|signed 8|aint8|aint8
+|Any|16-bit|int16|int16
+|MI, MT|float|float|float
+|===
+
+==== TRANSPOSE_CONV2D
+
+Performs a 2D transposed convolution over the given tensor input, using the weights tensor.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input|[N,IH,IW,IC]|Input tensor
+|Attribute|weight_t*|weight|[OC,KH,KW,IC]|Weight kernel size KH x KW
+|Attribute|acc_t*|bias|[OC]|Per output channel bias data.
+|Attribute|int*|outpad|[2]|[outpad_top, outpad_left]
+|Attribute|int*|stride|[2]|[stride_y, stride_x]
+|Attribute|int*|dilation|[2]|[dilation_y, dilation_x]
+|Attribute|int*|out_shape|[4]|[N,OH,OW,OC]
+|Output|out_t*|output|[N,OH,OW,OC]|Output tensor
+|===
+
+*Quantization Parameters:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Attribute|in_t|input_zp|-|Input tensor zero point
+|Attribute|weight_t|weight_zp|-|Weight zero point
+|===
+
+*Operation Function*
+
+[source,c]
+----
+assert(in_t==aint8_t || input_zp==0) // Zero point only for asymmetric int8
+assert(weight_t == aint8_t || weight_zp == 0)
+for_each (index in out_shape) {
+ tensor_write<acc_t>(output, [N,OH,OW,OC], index, bias[index[3]])
+}
+for_each (0<=n<N, 0<=iy<IH, 0<=ix<IW, 0<=oc<OC, 0<=ic<IC, 0<=ky<KH, 0<=kx<KW) {
+ oy = iy * stride_y - outpad_top + ky
+ ox = ix * stride_x - outpad_left + kx
+ if (oy>=0 && oy<OH && ox>=0 && ox<OW) {
+ acc = tensor_read<acc_t>(output, [N,OH,OW,OC], [n,oy,ox,oc])
+ value = tensor_read<in_t>(input, [N,IH,IW,IC], [n,iy,ix,ic], input_zp)
+ weight = tensor_read<weight_t>(weight, [OC,KH,KW,IC], [oc,ky,kx,ic], weight_zp)
+ acc = apply_add<acc_t>(acc, value * weight)
+ tensor_write<acc_t>(output, [N,OH,OW,OC], [n,oy,ox,oc], acc)
+ }
+}
+----
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t|weight_t|acc_t
+
+|Any|signed 8x8|aint8|int8,aint8|int32
+|Any|signed 8x4|aint8|int4|int32
+|Any|signed 16x8|int16|int8|int48
+|MI, MT|float|float|float|float
+|===
diff --git a/chapters/type_conversion.adoc b/chapters/type_conversion.adoc
new file mode 100644
index 0000000..6d3bcd8
--- /dev/null
+++ b/chapters/type_conversion.adoc
@@ -0,0 +1,138 @@
+//
+// This confidential and proprietary software may be used only as
+// authorised by a licensing agreement from ARM Limited
+// (C) COPYRIGHT 2020 ARM Limited
+// ALL RIGHTS RESERVED
+// The entire notice above must be reproduced on all authorised
+// copies and copies may only be made to the extent permitted
+// by a licensing agreement from ARM Limited.
+
+=== Type Conversion
+
+==== CAST
+
+Casts a tensor from one data type to another.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t|input|shape|Input tensor
+|Output|out_t|output|shape|Output tensor
+|===
+
+*Operation Function:*
+
+[source,c]
+....
+for_each (index in shape) {
+ in_t in = tensor_read<in_t>(input, shape, index);
+ out_t out;
+ if (out_t==bool) {
+ out = (in!=0) ? true : false;
+ } else if (in_t==bool) {
+ out = (in) ? 1 : 0;
+ } else if (out_t==float) {
+ out = round_to_nearest_float(in);
+ } else if (in_t==float) {
+ out = apply_clip(round_to_nearest_int(in), minimum<out_t>, maximum<out_t>);
+ } else if (sizeof(out_t)>=sizeof(in_t)) {
+ out = sign_extend(in);
+ } else {
+ out = truncate(in);
+ }
+ tensor_write<out_t>(output, shape, index, out)
+}
+....
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t|out_t
+
+|Any|bool to signed 8|bool|int8
+|Any|bool to signed 16|bool|int16
+|Any|bool to signed 32|bool|int32
+|Any|signed 8 to bool|int8|bool
+|Any|signed 8 to signed 16|int8|int16
+|Any|signed 8 to signed 32|int8|int32
+|Any|signed 8 to float|int8|float
+|Any|signed 16 to bool|int16|bool
+|Any|signed 16 to signed 8|int16|int8
+|Any|signed 16 to signed 32|int16|int32
+|Any|signed 16 to float|int16|float
+|Any|signed 32 to bool|int32|bool
+|Any|signed 32 to signed 8|int32|int8
+|Any|signed 32 to signed 16|int32|int16
+|Any|signed 32 to float|int32|float
+|Any|float to signed 8|float|int8
+|Any|float to signed 16|float|int16
+|Any|float to signed 32|float|int32
+|===
+
+==== RESCALE
+
+Rescale quantized values into a new domain. This function scales by factor: multiplier * 2^-shift^.
+
+*Arguments:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Input|in_t*|input|shape|Input tensor from 1 to 4 dims
+|Output|out_t*|output|shape|Output tensor with the same shape as input
+|===
+
+*Quantization Parameters:*
+
+|===
+|Argument|Type|Name|Shape|Description
+
+|Attribute|in_t|input_zp|-|Input tensor zero point
+|Attribute|out_t|output_zp|-|Output tensor zero point
+|Attribute|mul_t|multiplier[NC]|-|Scaling multiplier array
+|Attribute|uint6_t|shift[NC] |-|Scaling shift array
+|Attribute|bool|scale32|-|if (scale32) mul_t=int32_t else mul_t=int16_t
+|Attribute|bool|double_round|-|Select double round mode
+|Attribute|bool|per_channel|-|if (per_channel) NC=shape[dims-1] else NC=1
+|===
+
+*Operation Function:*
+
+[source,c]
+....
+for_each (index in shape) {
+ assert(in_t==aint8 || in_t==uint8 || input_zp==0);
+ assert(out_t==aint8 || out_t==uint8 || output_zp==0);
+ assert((scale32 && in_t!=int48_t) || (!scale32 && !double_round));
+ int48_t value = tensor_read<in_t>(input, shape, index, input_zp);
+ int c = (per_channel) ? index[dims-1] : 0;
+ int32_t result = (scale32) ?
+ apply_scale_32(value, multiplier[c], shift[c], double_round) :
+ apply_scale_16(value, multiplier[c], shift[c]);
+ result = apply_clip(result + output_zp, minimum<out_t>, maximum<out_t>)
+ tensor_write<out_t>(output, shape, index, result)
+}
+....
+
+*Supported Data Types:*
+
+|===
+|Profile|Mode|in_t|out_t
+
+|Any|signed 8 to signed 8|aint8|aint8
+|Any|signed 8 to signed 16|aint8|int16
+|Any|signed 8 to signed 32|aint8|int32
+|Any|signed 16 to signed 8|int16|aint8
+|Any|signed 16 to signed 16|int16|int16
+|Any|signed 16 to signed 32|int16|int32
+|Any|signed 32 to signed 8|int32|aint8
+|Any|signed 32 to signed 16|int32|int16
+|Any|signed 32 to signed 32|int32|int32
+|Any|signed 48 to signed 8|int48|aint8
+|Any|signed 48 to signed 16|int48|int16
+|Any|signed 48 to signed 32|int48|int32
+|Any|unsigned 8 to signed 8|uint8|aint8
+|Any|signed 8 to unsigned 8|aint8|uint8
+|===
diff --git a/tosa.css b/tosa.css
new file mode 100644
index 0000000..829f79b
--- /dev/null
+++ b/tosa.css
@@ -0,0 +1,407 @@
+/* Asciidoctor default stylesheet | MIT License | https://asciidoctor.org */
+/* Remove comment around @import statement below when using as a custom stylesheet */
+article,aside,details,figcaption,figure,footer,header,hgroup,main,nav,section,summary{display:block}
+audio,canvas,video{display:inline-block}
+audio:not([controls]){display:none;height:0}
+[hidden],template{display:none}
+script{display:none!important}
+html{font-family:sans-serif;-ms-text-size-adjust:100%;-webkit-text-size-adjust:100%}
+body{margin:0}
+a{background:transparent}
+a:focus{outline:thin dotted}
+a:active,a:hover{outline:0}
+h1{font-size:2em;margin:.67em 0}
+abbr[title]{border-bottom:1px dotted}
+b,strong{font-weight:bold}
+dfn{font-style:italic}
+hr{-moz-box-sizing:content-box;box-sizing:content-box;height:0}
+mark{background:#ff0;color:#000}
+code,kbd,pre,samp{font-family:monospace;font-size:1em}
+pre{white-space:pre-wrap}
+q{quotes:"\201C" "\201D" "\2018" "\2019"}
+small{font-size:80%}
+sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline}
+sup{top:-.5em}
+sub{bottom:-.25em}
+img{border:0}
+svg:not(:root){overflow:hidden}
+figure{margin:0}
+fieldset{border:1px solid silver;margin:0 2px;padding:.35em .625em .75em}
+legend{border:0;padding:0}
+button,input,select,textarea{font-family:inherit;font-size:100%;margin:0}
+button,input{line-height:normal}
+button,select{text-transform:none}
+button,html input[type="button"],input[type="reset"],input[type="submit"]{-webkit-appearance:button;cursor:pointer}
+button[disabled],html input[disabled]{cursor:default}
+input[type="checkbox"],input[type="radio"]{box-sizing:border-box;padding:0}
+input[type="search"]{-webkit-appearance:textfield;-moz-box-sizing:content-box;-webkit-box-sizing:content-box;box-sizing:content-box}
+input[type="search"]::-webkit-search-cancel-button,input[type="search"]::-webkit-search-decoration{-webkit-appearance:none}
+button::-moz-focus-inner,input::-moz-focus-inner{border:0;padding:0}
+textarea{overflow:auto;vertical-align:top}
+table{border-collapse:collapse;border-spacing:0}
+*,*:before,*:after{-moz-box-sizing:border-box;-webkit-box-sizing:border-box;box-sizing:border-box}
+html,body{font-size:100%}
+body{background:#fff;color:rgba(0,0,0,.8);padding:0;margin:0;font-family:"Helvetica Neue", "Helvetica", Helvetica, Arial, sans-serif; font-weight: normal;font-style:normal;line-height:1;position:relative;cursor:auto}
+a:hover{cursor:pointer}
+img,object,embed{max-width:100%;height:auto}
+object,embed{height:100%}
+img{-ms-interpolation-mode:bicubic}
+.left{float:left!important}
+.right{float:right!important}
+.text-left{text-align:left!important}
+.text-right{text-align:right!important}
+.text-center{text-align:center!important}
+.text-justify{text-align:justify!important}
+.hide{display:none}
+body{-webkit-font-smoothing:antialiased}
+img,object,svg{display:inline-block;vertical-align:middle}
+textarea{height:auto;min-height:50px}
+select{width:100%}
+.center{margin-left:auto;margin-right:auto}
+.spread{width:100%}
+p.lead,.paragraph.lead>p,#preamble>.sectionbody>.paragraph:first-of-type p{font-size:1.21875em;line-height:1.6}
+.subheader,.admonitionblock td.content>.title,.audioblock>.title,.exampleblock>.title,.imageblock>.title,.listingblock>.title,.literalblock>.title,.stemblock>.title,.openblock>.title,.paragraph>.title,.quoteblock>.title,table.tableblock>.title,.verseblock>.title,.videoblock>.title,.dlist>.title,.olist>.title,.ulist>.title,.qlist>.title,.hdlist>.title{line-height:1.45;color:#002b49;font-weight:400;margin-top:0;margin-bottom:.25em}
+div,dl,dt,dd,ul,ol,li,h1,h2,h3,#toctitle,.sidebarblock>.content>.title,h4,h5,h6,pre,form,p,blockquote,th,td{margin:0;padding:0;direction:ltr}
+a{color:#2156a5;text-decoration:underline;line-height:inherit}
+a:hover,a:focus{color:#1d4b8f}
+a img{border:none}
+p{font-family:inherit;font-weight:400;font-size:1em;line-height:1.6;margin-bottom:1.25em;text-rendering:optimizeLegibility}
+p aside{font-size:.875em;line-height:1.35;font-style:italic}
+h1,h2,h3,#toctitle,.sidebarblock>.content>.title,h4,h5,h6{font-family:"Open Sans","DejaVu Sans",sans-serif;font-weight:300;font-style:normal;color:#002b49;text-rendering:optimizeLegibility;margin-top:1em;margin-bottom:.5em;line-height:1.0125em}
+h1 small,h2 small,h3 small,#toctitle small,.sidebarblock>.content>.title small,h4 small,h5 small,h6 small{font-size:60%;color:#e99b8f;line-height:0}
+h1{font-size:2.125em}
+h2{font-size:1.6875em}
+h3,#toctitle,.sidebarblock>.content>.title{font-size:1.375em}
+h4,h5{font-size:1.125em}
+h6{font-size:1em}
+hr{border:solid #ddddd8;border-width:1px 0 0;clear:both;margin:1.25em 0 1.1875em;height:0}
+em,i{font-style:italic;line-height:inherit}
+strong,b{font-weight:bold;line-height:inherit}
+small{font-size:60%;line-height:inherit}
+code{font-family:Consolas,"Liberation Mono", Courier, monospace;font-weight:normal;color:rgba(0,0,0,.9)}
+ul,ol,dl{font-size:1em;line-height:1.6;margin-bottom:1.25em;list-style-position:outside;font-family:inherit}
+ul,ol,ul.no-bullet,ol.no-bullet{margin-left:1.5em}
+ul li ul,ul li ol{margin-left:1.25em;margin-bottom:0;font-size:1em}
+ul.square li ul,ul.circle li ul,ul.disc li ul{list-style:inherit}
+ul.square{list-style-type:square}
+ul.circle{list-style-type:circle}
+ul.disc{list-style-type:disc}
+ul.no-bullet{list-style:none}
+ol li ul,ol li ol{margin-left:1.25em;margin-bottom:0}
+dl dt{margin-bottom:.3125em;font-weight:bold}
+dl dd{margin-bottom:1.25em}
+abbr,acronym{text-transform:uppercase;font-size:90%;color:rgba(0,0,0,.8);border-bottom:1px dotted #ddd;cursor:help}
+abbr{text-transform:none}
+blockquote{margin:0 0 1.25em;padding:.5625em 1.25em 0 1.1875em;border-left:1px solid #ddd}
+blockquote cite{display:block;font-size:.9375em;color:rgba(0,0,0,.6)}
+blockquote cite:before{content:"\2014 \0020"}
+blockquote cite a,blockquote cite a:visited{color:rgba(0,0,0,.6)}
+blockquote,blockquote p{line-height:1.6;color:rgba(0,0,0,.85)}
+@media only screen and (min-width:768px){h1,h2,h3,#toctitle,.sidebarblock>.content>.title,h4,h5,h6{line-height:1.2}
+h1{font-size:2.75em}
+h2{font-size:2.3125em}
+h3,#toctitle,.sidebarblock>.content>.title{font-size:1.6875em}
+h4{font-size:1.4375em}}
+table{background:#fff;margin-bottom:1.25em;border:solid 1px #dedede}
+table thead,table tfoot{background:#f7f8f7;font-weight:bold}
+table thead tr th,table thead tr td,table tfoot tr th,table tfoot tr td{padding:.5em .625em .625em;font-size:inherit;color:rgba(0,0,0,.8);text-align:left}
+table tr th,table tr td{padding:.5625em .625em;font-size:inherit;color:rgba(0,0,0,.8)}
+table tr.even,table tr.alt,table tr:nth-of-type(even){background:#f8f8f7}
+table thead tr th,table tfoot tr th,table tbody tr td,table tr td,table tfoot tr td{display:table-cell;line-height:1.6}
+body{tab-size:4}
+h1,h2,h3,#toctitle,.sidebarblock>.content>.title,h4,h5,h6{line-height:1.2;word-spacing:-.05em}
+h1 strong,h2 strong,h3 strong,#toctitle strong,.sidebarblock>.content>.title strong,h4 strong,h5 strong,h6 strong{font-weight:400}
+.clearfix:before,.clearfix:after,.float-group:before,.float-group:after{content:" ";display:table}
+.clearfix:after,.float-group:after{clear:both}
+*:not(pre)>code{font-size:.9375em;font-style:normal!important;letter-spacing:0;padding:.1em .5ex;word-spacing:-.15em;background-color:#f7f7f8;-webkit-border-radius:4px;border-radius:4px;line-height:1.45;text-rendering:optimizeSpeed}
+pre,pre>code{line-height:1.45;color:rgba(0,0,0,.9);font-family:Consolas, "Liberation Mono", Courier, monospace;font-weight:normal;text-rendering:optimizeSpeed}
+.keyseq{color:rgba(51,51,51,.8)}
+kbd{font-family:"Droid Sans Mono","DejaVu Sans Mono",monospace;display:inline-block;color:rgba(0,0,0,.8);font-size:.65em;line-height:1.45;background-color:#f7f7f7;border:1px solid #ccc;-webkit-border-radius:3px;border-radius:3px;-webkit-box-shadow:0 1px 0 rgba(0,0,0,.2),0 0 0 .1em white inset;box-shadow:0 1px 0 rgba(0,0,0,.2),0 0 0 .1em #fff inset;margin:0 .15em;padding:.2em .5em;vertical-align:middle;position:relative;top:-.1em;white-space:nowrap}
+.keyseq kbd:first-child{margin-left:0}
+.keyseq kbd:last-child{margin-right:0}
+.menuseq,.menu{color:rgba(0,0,0,.8)}
+b.button:before,b.button:after{position:relative;top:-1px;font-weight:400}
+b.button:before{content:"[";padding:0 3px 0 2px}
+b.button:after{content:"]";padding:0 2px 0 3px}
+p a>code:hover{color:rgba(0,0,0,.9)}
+#header,#content,#footnotes,#footer{width:100%;margin-left:auto;margin-right:auto;margin-top:0;margin-bottom:0;max-width:62.5em;*zoom:1;position:relative;padding-left:.9375em;padding-right:.9375em}
+#header:before,#header:after,#content:before,#content:after,#footnotes:before,#footnotes:after,#footer:before,#footer:after{content:" ";display:table}
+#header:after,#content:after,#footnotes:after,#footer:after{clear:both}
+#content{margin-top:1.25em}
+#content:before{content:none}
+#header>h1:first-child{color:rgba(0,0,0,.85);margin-top:2.25rem;margin-bottom:0}
+#header>h1:first-child+#toc{margin-top:8px;border-top:1px solid #ddddd8}
+#header>h1:only-child,body.toc2 #header>h1:nth-last-child(2){border-bottom:1px solid #ddddd8;padding-bottom:8px}
+#header .details{border-bottom:1px solid #ddddd8;line-height:1.45;padding-top:.25em;padding-bottom:.25em;padding-left:.25em;color:rgba(0,0,0,.6);display:-ms-flexbox;display:-webkit-flex;display:flex;-ms-flex-flow:row wrap;-webkit-flex-flow:row wrap;flex-flow:row wrap}
+#header .details span:first-child{margin-left:-.125em}
+#header .details span.email a{color:rgba(0,0,0,.85)}
+#header .details br{display:none}
+#header .details br+span:before{content:"\00a0\2013\00a0"}
+#header .details br+span.author:before{content:"\00a0\22c5\00a0";color:rgba(0,0,0,.85)}
+#header .details br+span#revremark:before{content:"\00a0|\00a0"}
+#header #revnumber{text-transform:capitalize}
+#header #revnumber:after{content:"\00a0"}
+#content>h1:first-child:not([class]){color:rgba(0,0,0,.85);border-bottom:1px solid #ddddd8;padding-bottom:8px;margin-top:0;padding-top:1rem;margin-bottom:1.25rem}
+#toc{border-bottom:1px solid #efefed;padding-bottom:.5em}
+#toc>ul{margin-left:.125em}
+#toc ul.sectlevel0>li>a{font-style:italic}
+#toc ul.sectlevel0 ul.sectlevel1{margin:.5em 0}
+#toc ul{font-family:"Noto",sans-serif;list-style-type:none}
+#toc li{line-height:1.3334;margin-top:.3334em}
+#toc a{text-decoration:none}
+#toc a:active{text-decoration:underline}
+#toctitle{color:#002b49;font-size:1.2em}
+@media only screen and (min-width:768px){#toctitle{font-size:1.375em}
+body.toc2{padding-left:15em;padding-right:0}
+#toc.toc2{margin-top:0!important;background-color:#f8f8f7;position:fixed;width:15em;left:0;top:0;border-right:1px solid #efefed;border-top-width:0!important;border-bottom-width:0!important;z-index:1000;padding:1.25em 1em;height:100%;overflow:auto}
+#toc.toc2 #toctitle{margin-top:0;margin-bottom:.8rem;font-size:1.2em}
+#toc.toc2>ul{font-size:.9em;margin-bottom:0}
+#toc.toc2 ul ul{margin-left:0;padding-left:1em}
+#toc.toc2 ul.sectlevel0 ul.sectlevel1{padding-left:0;margin-top:.5em;margin-bottom:.5em}
+body.toc2.toc-right{padding-left:0;padding-right:15em}
+body.toc2.toc-right #toc.toc2{border-right-width:0;border-left:1px solid #efefed;left:auto;right:0}}
+@media only screen and (min-width:1280px){body.toc2{padding-left:20em;padding-right:0}
+#toc.toc2{width:20em}
+#toc.toc2 #toctitle{font-size:1.375em}
+#toc.toc2>ul{font-size:.95em}
+#toc.toc2 ul ul{padding-left:1.25em}
+body.toc2.toc-right{padding-left:0;padding-right:20em}}
+#content #toc{border-style:solid;border-width:1px;border-color:#e0e0dc;margin-bottom:1.25em;padding:1.25em;background:#f8f8f7;-webkit-border-radius:4px;border-radius:4px}
+#content #toc>:first-child{margin-top:0}
+#content #toc>:last-child{margin-bottom:0}
+#footer{max-width:100%;background-color:rgba(0,0,0,.8);padding:1.25em}
+#footer-text{color:rgba(255,255,255,.8);line-height:1.44}
+.sect1{padding-bottom:.625em}
+@media only screen and (min-width:768px){.sect1{padding-bottom:1.25em}}
+.sect1+.sect1{border-top:1px solid #efefed}
+#content h1>a.anchor,h2>a.anchor,h3>a.anchor,#toctitle>a.anchor,.sidebarblock>.content>.title>a.anchor,h4>a.anchor,h5>a.anchor,h6>a.anchor{position:absolute;z-index:1001;width:1.5ex;margin-left:-1.5ex;display:block;text-decoration:none!important;visibility:hidden;text-align:center;font-weight:400}
+#content h1>a.anchor:before,h2>a.anchor:before,h3>a.anchor:before,#toctitle>a.anchor:before,.sidebarblock>.content>.title>a.anchor:before,h4>a.anchor:before,h5>a.anchor:before,h6>a.anchor:before{content:"\00A7";font-size:.85em;display:block;padding-top:.1em}
+#content h1:hover>a.anchor,#content h1>a.anchor:hover,h2:hover>a.anchor,h2>a.anchor:hover,h3:hover>a.anchor,#toctitle:hover>a.anchor,.sidebarblock>.content>.title:hover>a.anchor,h3>a.anchor:hover,#toctitle>a.anchor:hover,.sidebarblock>.content>.title>a.anchor:hover,h4:hover>a.anchor,h4>a.anchor:hover,h5:hover>a.anchor,h5>a.anchor:hover,h6:hover>a.anchor,h6>a.anchor:hover{visibility:visible}
+#content h1>a.link,h2>a.link,h3>a.link,#toctitle>a.link,.sidebarblock>.content>.title>a.link,h4>a.link,h5>a.link,h6>a.link{color:#ba3925;text-decoration:none}
+#content h1>a.link:hover,h2>a.link:hover,h3>a.link:hover,#toctitle>a.link:hover,.sidebarblock>.content>.title>a.link:hover,h4>a.link:hover,h5>a.link:hover,h6>a.link:hover{color:#a53221}
+.audioblock,.imageblock,.literalblock,.listingblock,.stemblock,.videoblock{margin-bottom:1.25em}
+.admonitionblock td.content>.title,.audioblock>.title,.exampleblock>.title,.imageblock>.title,.listingblock>.title,.literalblock>.title,.stemblock>.title,.openblock>.title,.paragraph>.title,.quoteblock>.title,table.tableblock>.title,.verseblock>.title,.videoblock>.title,.dlist>.title,.olist>.title,.ulist>.title,.qlist>.title,.hdlist>.title{text-rendering:optimizeLegibility;text-align:left;font-family:"Noto Serif","DejaVu Serif",serif;font-size:1rem}
+table.tableblock>caption.title{white-space:nowrap;overflow:visible;max-width:0}
+.paragraph.lead>p,#preamble>.sectionbody>.paragraph:first-of-type p{color:rgba(0,0,0,.85)}
+table.tableblock #preamble>.sectionbody>.paragraph:first-of-type p{font-size:inherit}
+.admonitionblock>table{border-collapse:separate;border:0;background:none;width:100%}
+.admonitionblock>table td.icon{text-align:center;width:80px}
+.admonitionblock>table td.icon img{max-width:none}
+.admonitionblock>table td.icon .title{font-weight:bold;font-family:"Open Sans","DejaVu Sans",sans-serif;text-transform:uppercase}
+.admonitionblock>table td.content{padding-left:1.125em;padding-right:1.25em;border-left:1px solid #ddddd8;color:rgba(0,0,0,.6)}
+.admonitionblock>table td.content>:last-child>:last-child{margin-bottom:0}
+.exampleblock>.content{border-style:solid;border-width:1px;border-color:#e6e6e6;margin-bottom:1.25em;padding:1.25em;background:#fff;-webkit-border-radius:4px;border-radius:4px}
+.exampleblock>.content>:first-child{margin-top:0}
+.exampleblock>.content>:last-child{margin-bottom:0}
+.sidebarblock{border-style:solid;border-width:1px;border-color:#e0e0dc;margin-bottom:1.25em;padding:1.25em;background:#f8f8f7;-webkit-border-radius:4px;border-radius:4px}
+.sidebarblock>:first-child{margin-top:0}
+.sidebarblock>:last-child{margin-bottom:0}
+.sidebarblock>.content>.title{color:#002b49;margin-top:0;text-align:center}
+.exampleblock>.content>:last-child>:last-child,.exampleblock>.content .olist>ol>li:last-child>:last-child,.exampleblock>.content .ulist>ul>li:last-child>:last-child,.exampleblock>.content .qlist>ol>li:last-child>:last-child,.sidebarblock>.content>:last-child>:last-child,.sidebarblock>.content .olist>ol>li:last-child>:last-child,.sidebarblock>.content .ulist>ul>li:last-child>:last-child,.sidebarblock>.content .qlist>ol>li:last-child>:last-child{margin-bottom:0}
+.literalblock pre,.listingblock pre:not(.highlight),.listingblock pre[class="highlight"],.listingblock pre[class^="highlight "],.listingblock pre.CodeRay,.listingblock pre.prettyprint{background:#f7f7f8}
+.sidebarblock .literalblock pre,.sidebarblock .listingblock pre:not(.highlight),.sidebarblock .listingblock pre[class="highlight"],.sidebarblock .listingblock pre[class^="highlight "],.sidebarblock .listingblock pre.CodeRay,.sidebarblock .listingblock pre.prettyprint{background:#f2f1f1}
+.literalblock pre,.literalblock pre[class],.listingblock pre,.listingblock pre[class]{-webkit-border-radius:4px;border-radius:4px;word-wrap:break-word;padding:1em;font-size:.8125em}
+.literalblock pre.nowrap,.literalblock pre[class].nowrap,.listingblock pre.nowrap,.listingblock pre[class].nowrap{overflow-x:auto;white-space:pre;word-wrap:normal}
+@media only screen and (min-width:768px){.literalblock pre,.literalblock pre[class],.listingblock pre,.listingblock pre[class]{font-size:.90625em}}
+@media only screen and (min-width:1280px){.literalblock pre,.literalblock pre[class],.listingblock pre,.listingblock pre[class]{font-size:1em}}
+.literalblock.output pre{color:#f7f7f8;background-color:rgba(0,0,0,.9)}
+.listingblock pre.highlightjs{padding:0}
+.listingblock pre.highlightjs>code{padding:1em;-webkit-border-radius:4px;border-radius:4px}
+.listingblock pre.prettyprint{border-width:0}
+.listingblock>.content{position:relative}
+.listingblock code[data-lang]:before{display:none;content:attr(data-lang);position:absolute;font-size:.75em;top:.425rem;right:.5rem;line-height:1;text-transform:uppercase;color:#999}
+.listingblock:hover code[data-lang]:before{display:block}
+.listingblock.terminal pre .command:before{content:attr(data-prompt);padding-right:.5em;color:#999}
+.listingblock.terminal pre .command:not([data-prompt]):before{content:"$"}
+table.pyhltable{border-collapse:separate;border:0;margin-bottom:0;background:none}
+table.pyhltable td{vertical-align:top;padding-top:0;padding-bottom:0;line-height:1.45}
+table.pyhltable td.code{padding-left:.75em;padding-right:0}
+pre.pygments .lineno,table.pyhltable td:not(.code){color:#999;padding-left:0;padding-right:.5em;border-right:1px solid #ddddd8}
+pre.pygments .lineno{display:inline-block;margin-right:.25em}
+table.pyhltable .linenodiv{background:none!important;padding-right:0!important}
+.quoteblock{margin:0 1em 1.25em 1.5em;display:table}
+.quoteblock>.title{margin-left:-1.5em;margin-bottom:.75em}
+.quoteblock blockquote,.quoteblock blockquote p{color:rgba(0,0,0,.85);font-size:1.15rem;line-height:1.75;word-spacing:.1em;letter-spacing:0;font-style:italic;text-align:justify}
+.quoteblock blockquote{margin:0;padding:0;border:0}
+.quoteblock blockquote:before{content:"\201c";float:left;font-size:2.75em;font-weight:bold;line-height:.6em;margin-left:-.6em;color:#002b49;text-shadow:0 1px 2px rgba(0,0,0,.1)}
+.quoteblock blockquote>.paragraph:last-child p{margin-bottom:0}
+.quoteblock .attribution{margin-top:.5em;margin-right:.5ex;text-align:right}
+.quoteblock .quoteblock{margin-left:0;margin-right:0;padding:.5em 0;border-left:3px solid rgba(0,0,0,.6)}
+.quoteblock .quoteblock blockquote{padding:0 0 0 .75em}
+.quoteblock .quoteblock blockquote:before{display:none}
+.verseblock{margin:0 1em 1.25em 1em}
+.verseblock pre{font-family:"Open Sans","DejaVu Sans",sans;font-size:1.15rem;color:rgba(0,0,0,.85);font-weight:300;text-rendering:optimizeLegibility}
+.verseblock pre strong{font-weight:400}
+.verseblock .attribution{margin-top:1.25rem;margin-left:.5ex}
+.quoteblock .attribution,.verseblock .attribution{font-size:.9375em;line-height:1.45;font-style:italic}
+.quoteblock .attribution br,.verseblock .attribution br{display:none}
+.quoteblock .attribution cite,.verseblock .attribution cite{display:block;letter-spacing:-.025em;color:rgba(0,0,0,.6)}
+.quoteblock.abstract{margin:0 0 1.25em 0;display:block}
+.quoteblock.abstract blockquote,.quoteblock.abstract blockquote p{text-align:left;word-spacing:0}
+.quoteblock.abstract blockquote:before,.quoteblock.abstract blockquote p:first-of-type:before{display:none}
+table.tableblock{max-width:100%;border-collapse:separate}
+table.tableblock td>.paragraph:last-child p>p:last-child,table.tableblock th>p:last-child,table.tableblock td>p:last-child{margin-bottom:0}
+table.tableblock,th.tableblock,td.tableblock{border:0 solid #dedede}
+table thead tr th{background:rgba(0,145,189,0.6); color:white}
+table.grid-all th.tableblock,table.grid-all td.tableblock{border-width:0 1px 1px 0}
+table.grid-all tfoot>tr>th.tableblock,table.grid-all tfoot>tr>td.tableblock{border-width:1px 1px 0 0}
+table.grid-cols th.tableblock,table.grid-cols td.tableblock{border-width:0 1px 0 0}
+table.grid-all *>tr>.tableblock:last-child,table.grid-cols *>tr>.tableblock:last-child{border-right-width:0}
+table.grid-rows th.tableblock,table.grid-rows td.tableblock{border-width:0 0 1px 0}
+table.grid-all tbody>tr:last-child>th.tableblock,table.grid-all tbody>tr:last-child>td.tableblock,table.grid-all thead:last-child>tr>th.tableblock,table.grid-rows tbody>tr:last-child>th.tableblock,table.grid-rows tbody>tr:last-child>td.tableblock,table.grid-rows thead:last-child>tr>th.tableblock{border-bottom-width:0}
+table.grid-rows tfoot>tr>th.tableblock,table.grid-rows tfoot>tr>td.tableblock{border-width:1px 0 0 0}
+table.frame-all{border-width:1px}
+table.frame-sides{border-width:0 1px}
+table.frame-topbot{border-width:1px 0}
+th.halign-left,td.halign-left{text-align:left}
+th.halign-right,td.halign-right{text-align:right}
+th.halign-center,td.halign-center{text-align:center}
+th.valign-top,td.valign-top{vertical-align:top}
+th.valign-bottom,td.valign-bottom{vertical-align:bottom}
+th.valign-middle,td.valign-middle{vertical-align:middle}
+table thead th,table tfoot th{font-weight:bold}
+tbody tr th{display:table-cell;line-height:1.6;background:#f7f8f7}
+tbody tr th,tbody tr th p,tfoot tr th,tfoot tr th p{color:rgba(0,0,0,.8);font-weight:bold}
+p.tableblock>code:only-child{background:none;padding:0}
+p.tableblock{font-size:1em}
+td>div.verse{white-space:pre}
+ol{margin-left:1.75em}
+ul li ol{margin-left:1.5em}
+dl dd{margin-left:1.125em}
+dl dd:last-child,dl dd:last-child>:last-child{margin-bottom:0}
+ol>li p,ul>li p,ul dd,ol dd,.olist .olist,.ulist .ulist,.ulist .olist,.olist .ulist{margin-bottom:.625em}
+ul.unstyled,ol.unnumbered,ul.checklist,ul.none{list-style-type:none}
+ul.unstyled,ol.unnumbered,ul.checklist{margin-left:.625em}
+ul.checklist li>p:first-child>.fa-square-o:first-child,ul.checklist li>p:first-child>.fa-check-square-o:first-child{width:1em;font-size:.85em}
+ul.checklist li>p:first-child>input[type="checkbox"]:first-child{width:1em;position:relative;top:1px}
+ul.inline{margin:0 auto .625em auto;margin-left:-1.375em;margin-right:0;padding:0;list-style:none;overflow:hidden}
+ul.inline>li{list-style:none;float:left;margin-left:1.375em;display:block}
+ul.inline>li>*{display:block}
+.unstyled dl dt{font-weight:400;font-style:normal}
+ol.arabic{list-style-type:decimal}
+ol.decimal{list-style-type:decimal-leading-zero}
+ol.loweralpha{list-style-type:lower-alpha}
+ol.upperalpha{list-style-type:upper-alpha}
+ol.lowerroman{list-style-type:lower-roman}
+ol.upperroman{list-style-type:upper-roman}
+ol.lowergreek{list-style-type:lower-greek}
+.hdlist>table,.colist>table{border:0;background:none}
+.hdlist>table>tbody>tr,.colist>table>tbody>tr{background:none}
+td.hdlist1,td.hdlist2{vertical-align:top;padding:0 .625em}
+td.hdlist1{font-weight:bold;padding-bottom:1.25em}
+.literalblock+.colist,.listingblock+.colist{margin-top:-.5em}
+.colist>table tr>td:first-of-type{padding:0 .75em;line-height:1}
+.colist>table tr>td:last-of-type{padding:.25em 0}
+.thumb,.th{line-height:0;display:inline-block;border:solid 4px #fff;-webkit-box-shadow:0 0 0 1px #ddd;box-shadow:0 0 0 1px #ddd}
+.imageblock.left,.imageblock[style*="float: left"]{margin:.25em .625em 1.25em 0}
+.imageblock.right,.imageblock[style*="float: right"]{margin:.25em 0 1.25em .625em}
+.imageblock>.title{margin-bottom:0}
+.imageblock.thumb,.imageblock.th{border-width:6px}
+.imageblock.thumb>.title,.imageblock.th>.title{padding:0 .125em}
+.image.left,.image.right{margin-top:.25em;margin-bottom:.25em;display:inline-block;line-height:0}
+.image.left{margin-right:.625em}
+.image.right{margin-left:.625em}
+a.image{text-decoration:none;display:inline-block}
+a.image object{pointer-events:none}
+sup.footnote,sup.footnoteref{font-size:.875em;position:static;vertical-align:super}
+sup.footnote a,sup.footnoteref a{text-decoration:none}
+sup.footnote a:active,sup.footnoteref a:active{text-decoration:underline}
+#footnotes{padding-top:.75em;padding-bottom:.75em;margin-bottom:.625em}
+#footnotes hr{width:20%;min-width:6.25em;margin:-.25em 0 .75em 0;border-width:1px 0 0 0}
+#footnotes .footnote{padding:0 .375em 0 .225em;line-height:1.3334;font-size:.875em;margin-left:1.2em;text-indent:-1.05em;margin-bottom:.2em}
+#footnotes .footnote a:first-of-type{font-weight:bold;text-decoration:none}
+#footnotes .footnote:last-of-type{margin-bottom:0}
+#content #footnotes{margin-top:-.625em;margin-bottom:0;padding:.75em 0}
+.gist .file-data>table{border:0;background:#fff;width:100%;margin-bottom:0}
+.gist .file-data>table td.line-data{width:99%}
+div.unbreakable{page-break-inside:avoid}
+.big{font-size:larger}
+.small{font-size:smaller}
+.underline{text-decoration:underline}
+.overline{text-decoration:overline}
+.line-through{text-decoration:line-through}
+.aqua{color:#00bfbf}
+.aqua-background{background-color:#00fafa}
+.black{color:#000}
+.black-background{background-color:#000}
+.blue{color:#0000bf}
+.blue-background{background-color:#0000fa}
+.fuchsia{color:#bf00bf}
+.fuchsia-background{background-color:#fa00fa}
+.gray{color:#606060}
+.gray-background{background-color:#7d7d7d}
+.green{color:#006000}
+.green-background{background-color:#007d00}
+.lime{color:#00bf00}
+.lime-background{background-color:#00fa00}
+.maroon{color:#600000}
+.maroon-background{background-color:#7d0000}
+.navy{color:#000060}
+.navy-background{background-color:#00007d}
+.olive{color:#606000}
+.olive-background{background-color:#7d7d00}
+.purple{color:#600060}
+.purple-background{background-color:#7d007d}
+.red{color:#bf0000}
+.red-background{background-color:#fa0000}
+.silver{color:#909090}
+.silver-background{background-color:#bcbcbc}
+.teal{color:#006060}
+.teal-background{background-color:#007d7d}
+.white{color:#bfbfbf}
+.white-background{background-color:#fafafa}
+.yellow{color:#bfbf00}
+.yellow-background{background-color:#fafa00}
+span.icon>.fa{cursor:default}
+.admonitionblock td.icon [class^="fa icon-"]{font-size:2.5em;text-shadow:1px 1px 2px rgba(0,0,0,.5);cursor:default}
+.admonitionblock td.icon .icon-note:before{content:"\f05a";color:#19407c}
+.admonitionblock td.icon .icon-tip:before{content:"\f0eb";text-shadow:1px 1px 2px rgba(155,155,0,.8);color:#111}
+.admonitionblock td.icon .icon-warning:before{content:"\f071";color:#bf6900}
+.admonitionblock td.icon .icon-caution:before{content:"\f06d";color:#bf3400}
+.admonitionblock td.icon .icon-important:before{content:"\f06a";color:#bf0000}
+.conum[data-value]{display:inline-block;color:#fff!important;background-color:rgba(0,0,0,.8);-webkit-border-radius:100px;border-radius:100px;text-align:center;font-size:.75em;width:1.67em;height:1.67em;line-height:1.67em;font-family:"Open Sans","DejaVu Sans",sans-serif;font-style:normal;font-weight:bold}
+.conum[data-value] *{color:#fff!important}
+.conum[data-value]+b{display:none}
+.conum[data-value]:after{content:attr(data-value)}
+pre .conum[data-value]{position:relative;top:-.125em}
+b.conum *{color:inherit!important}
+.conum:not([data-value]):empty{display:none}
+dt,th.tableblock,td.content,div.footnote{text-rendering:optimizeLegibility}
+h1,h2,p,td.content,span.alt{letter-spacing:-.01em}
+p strong,td.content strong,div.footnote strong{letter-spacing:-.005em}
+p,blockquote,dt,td.content,span.alt{font-size:1.0625rem}
+p{margin-bottom:1.25rem}
+.sidebarblock p,.sidebarblock dt,.sidebarblock td.content,p.tableblock{font-size:1em}
+.exampleblock>.content{background-color:#fffef7;border-color:#e0e0dc;-webkit-box-shadow:0 1px 4px #e0e0dc;box-shadow:0 1px 4px #e0e0dc}
+.print-only{display:none!important}
+@media print{@page{margin:1.25cm .75cm}
+*{-webkit-box-shadow:none!important;box-shadow:none!important;text-shadow:none!important}
+a{color:inherit!important;text-decoration:underline!important}
+a.bare,a[href^="#"],a[href^="mailto:"]{text-decoration:none!important}
+a[href^=";http:"]:not(.bare):after,a[href^="https:"]:not(.bare):after{content:"(" attr(href) ")";display:inline-block;font-size:.875em;padding-left:.25em}
+abbr[title]:after{content:" (" attr(title) ")"}
+pre,blockquote,tr,img,object,svg{page-break-inside:avoid}
+thead{display:table-header-group}
+svg{max-width:100%}
+p,blockquote,dt,td.content{font-size:1em;orphans:3;widows:3}
+h2,h3,#toctitle,.sidebarblock>.content>.title{page-break-after:avoid}
+#toc,.sidebarblock,.exampleblock>.content{background:none!important}
+#toc{border-bottom:1px solid #ddddd8!important;padding-bottom:0!important}
+.sect1{padding-bottom:0!important}
+.sect1+.sect1{border:0!important}
+#header>h1:first-child{margin-top:1.25rem}
+body.book #header{text-align:center}
+body.book #header>h1:first-child{border:0!important;margin:2.5em 0 1em 0}
+body.book #header .details{border:0!important;display:block;padding:0!important}
+body.book #header .details span:first-child{margin-left:0!important}
+body.book #header .details br{display:block}
+body.book #header .details br+span:before{content:none!important}
+body.book #toc{border:0!important;text-align:left!important;padding:0!important;margin:0!important}
+body.book #toc,body.book #preamble,body.book h1.sect0,body.book .sect1>h2{page-break-before:always}
+.listingblock code[data-lang]:before{display:block}
+#footer{background:none!important;padding:0 .9375em}
+#footer-text{color:rgba(0,0,0,.6)!important;font-size:.9em}
+.hide-on-print{display:none!important}
+.print-only{display:block!important}
+.hide-for-print{display:none!important}
+.show-for-print{display:inherit!important}}
diff --git a/tosa_spec.adoc b/tosa_spec.adoc
new file mode 100644
index 0000000..78926b1
--- /dev/null
+++ b/tosa_spec.adoc
@@ -0,0 +1,21 @@
+//
+// This confidential and proprietary software may be used only as
+// authorised by a licensing agreement from ARM Limited
+// (C) COPYRIGHT 2020 ARM Limited
+// ALL RIGHTS RESERVED
+// The entire notice above must be reproduced on all authorised
+// copies and copies may only be made to the extent permitted
+// by a licensing agreement from ARM Limited.
+
+= TOSA {revnumber} specification
+:numbered:
+:toc: left
+:toclevels: 3
+:source-highliter: coderay
+
+include::chapters/notice.adoc[]
+
+include::chapters/introduction.adoc[]
+
+include::chapters/operators.adoc[]
+