From a9101530d8ea7a3cb470b722bc6cf8745ab283ac Mon Sep 17 00:00:00 2001
From: Eric Kunze <eric.kunze@arm.com>
Date: Thu, 17 Jun 2021 18:01:09 -0700
Subject: Replace assert with REQUIRE()

REQUIRE is a direct replacement for the asserts, and uses
the unpredictable() function in pseudocode to describe the required
conditions for operators

Change-Id: I35dc81e083d8e41f16728d992bdb8b06b0271226
Signed-off-by: Eric Kunze <eric.kunze@arm.com>
---
 chapters/control_flow.adoc    |  18 +++---
 chapters/data_layout.adoc     |   8 +--
 chapters/ewise_binary.adoc    |  12 ++--
 chapters/ewise_unary.adoc     |   4 +-
 chapters/image.adoc           |  18 +++---
 chapters/introduction.adoc    | 119 +++++----------------------------------
 chapters/operators.adoc       |   8 ++-
 chapters/pseudocode.adoc      | 128 ++++++++++++++++++++++++++++++++++++++++++
 chapters/reduction.adoc       |  24 ++++----
 chapters/scatter_gather.adoc  |   6 +-
 chapters/tensor_ops.adoc      |  30 +++++-----
 chapters/type_conversion.adoc |   6 +-
 tosa_spec.adoc                |   3 +-
 13 files changed, 213 insertions(+), 171 deletions(-)
 create mode 100644 chapters/pseudocode.adoc

diff --git a/chapters/control_flow.adoc b/chapters/control_flow.adoc
index 611d2e5..2759464 100644
--- a/chapters/control_flow.adoc
+++ b/chapters/control_flow.adoc
@@ -31,10 +31,10 @@ Evaluates a Boolean condition and then takes one of two distinct execution paths
 
 [source,c++]
 ----
-assert(tensor_list_shape(input_list)==tosa_input_shape(then_graph));
-assert(tensor_list_shape(input_list)==tosa_input_shape(else_graph));
-assert(tensor_list_shape(output_list)==tosa_output_shape(then_graph));
-assert(tensor_list_shape(output_list)==tosa_output_shape(else_graph));
+REQUIRE(tensor_list_shape(input_list) == tosa_input_shape(then_graph));
+REQUIRE(tensor_list_shape(input_list) == tosa_input_shape(else_graph));
+REQUIRE(tensor_list_shape(output_list) == tosa_output_shape(then_graph));
+REQUIRE(tensor_list_shape(output_list) == tosa_output_shape(else_graph));
 
 if (condition) {
     tosa_execute_graph(then_graph, input_list, output_list);
@@ -62,11 +62,11 @@ Generates and evaluates a Bool condition and either executes a loop body or exit
 
 [source,c++]
 ----
-assert(tensor_list_shape(input_list)==tosa_list_shape(output_list));
-assert(tensor_list_shape(input_list)==tosa_input_shape(cond_graph));
-assert(tensor_list_shape(input_list)==tosa_input_shape(body_graph));
-assert(tensor_list_shape(input_list)==tosa_output_shape(body_graph));
-assert(tosa_output_shape(cond_graph)==tosa_list_shape([bool_t]));
+REQUIRE(tensor_list_shape(input_list) == tosa_list_shape(output_list));
+REQUIRE(tensor_list_shape(input_list) == tosa_input_shape(cond_graph));
+REQUIRE(tensor_list_shape(input_list) == tosa_input_shape(body_graph));
+REQUIRE(tensor_list_shape(input_list) == tosa_output_shape(body_graph));
+REQUIRE(tosa_output_shape(cond_graph) == tosa_list_shape([bool_t]));
 
 // The iteration number 'i' is included to give unique names to variables
 // in each iteration of the loop and is not required by implementations
diff --git a/chapters/data_layout.adoc b/chapters/data_layout.adoc
index de90322..09df5be 100644
--- a/chapters/data_layout.adoc
+++ b/chapters/data_layout.adoc
@@ -121,7 +121,7 @@ Returns a tensor with the same type/values as the input, with a new shape specif
 
 [source,c++]
 ----
-assert(tensor_size(shape1) == tensor_size(shape));
+REQUIRE(tensor_size(shape1) == tensor_size(shape));
 for(i = 0; i < tensor_size(shape); i++) {
     output[i] = input[i];
 }
@@ -157,7 +157,7 @@ Returns a tensor with the same type/values as the input, with the data reversed
 
 [source,c++]
 ----
-assert(0 <= axis && axis < rank(shape));
+REQUIRE(0 <= axis && axis < rank(shape));
 for_each(index in shape) {
     tmp_index = index;
     tmp_index[axis] = shape[axis] - 1 - index[axis];
@@ -240,7 +240,7 @@ Replicates input1 multiplies times along each dimension.
 for_each(index in shape) {
     tmp_index = index;
     for(i = 0; i < rank(shape); i++) {
-        assert(shape1[i] * multiplies[i] == shape[i]);
+        REQUIRE(shape1[i] * multiplies[i] == shape[i]);
         tmp_index[i] = index[i] % shape1[i];
     }
     in_t value = tensor_read<in_t>(input, shape1, tmp_index);
@@ -281,7 +281,7 @@ Permutes the dimensions based on perm.
 for_each(index in shape) {
     tmp_index = index;
     for(i = 0; i < rank(shape); i++) {
-        assert(shape1[perm[i]] == shape[i])
+        REQUIRE(shape1[perm[i]] == shape[i])
         tmp_index[perm[i]] = index[i]
     }
     in_t value = tensor_read<in_t>(input, shape1, tmp_index);
diff --git a/chapters/ewise_binary.adoc b/chapters/ewise_binary.adoc
index 45075e9..d94676c 100644
--- a/chapters/ewise_binary.adoc
+++ b/chapters/ewise_binary.adoc
@@ -71,7 +71,7 @@ for_each(index in shape) {
     index2 = apply_broadcast(shape, shape2, index);
     in_t value1 = tensor_read<in_t>(input1, shape1, index1);
     in_t value2 = tensor_read<in_t>(input2, shape2, index2);
-    assert(0 <= value2 && value2 <= 31);
+    REQUIRE(0 <= value2 && value2 <= 31);
     in_t acc = value1 >> value2;
     if (round == true && value2 > 0 && (value1 >> (value2 - 1)) & 1 != 0) {
         acc = acc + 1;
@@ -235,8 +235,8 @@ for_each(index in shape) {
     index2 = apply_broadcast(shape, shape2, index);
     in_t value1 = tensor_read<in_t>(input1, shape1, index1);
     in_t value2 = tensor_read<in_t>(input2, shape2, index2);
-    assert(value2 != 0);
-    assert((int64_t)value1 / value2 <= maximum<in_t>);
+    REQUIRE(value2 != 0);
+    REQUIRE((int64_t)value1 / value2 <= maximum<in_t>);
     in_t acc = value1 / value2;
     tensor_write<in_t>(output, shape, index, acc);
 }
@@ -314,7 +314,7 @@ for_each(index in shape) {
     index2 = apply_broadcast(shape, shape2, index);
     in_t value1 = tensor_read<in_t>(input1, shape1, index1);
     in_t value2 = tensor_read<in_t>(input2, shape2, index2);
-    assert(0 <= value2 && value2 <= 31);
+    REQUIRE(0 <= value2 && value2 <= 31);
     in_t acc = value1 << value2;
     tensor_write<in_t>(output, shape, index, acc);
 }
@@ -354,7 +354,7 @@ for_each(index in shape) {
     index2 = apply_broadcast(shape, shape2, index);
     in_t value1 = tensor_read<in_t>(input1, shape1, index1);
     in_t value2 = tensor_read<in_t>(input2, shape2, index2);
-    assert(0 <= value2 && value2 <= 31);
+    REQUIRE(0 <= value2 && value2 <= 31);
     in_t acc = (unsigned in_t)value1 >> value2;
     tensor_write<in_t>(output, shape, index, acc);
 }
@@ -540,7 +540,7 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match
 
 [source,c++]
 ----
-assert(in_t == int32_t || shift == 0);
+REQUIRE(in_t == int32_t || shift == 0);
 for_each(index in shape) {
     index1 = apply_broadcast(shape, shape1, index);
     index2 = apply_broadcast(shape, shape2, index);
diff --git a/chapters/ewise_unary.adoc b/chapters/ewise_unary.adoc
index 21b14ef..304ef72 100644
--- a/chapters/ewise_unary.adoc
+++ b/chapters/ewise_unary.adoc
@@ -259,8 +259,8 @@ Elementwise negation operation
 
 [source,c++]
 ----
-assert(in_t == int8_t || input1_zp == 0) // Zero point only for int8_t
-assert(in_t == int8_t || output_zp == 0) // Zero point only for int8_t
+REQUIRE(in_t == int8_t || input1_zp == 0) // Zero point only for int8_t
+REQUIRE(in_t == int8_t || output_zp == 0) // Zero point only for int8_t
 for_each(index in shape) {
     acc_t acc = tensor_read<in_t>(input1, shape, index, input1_zp);
     acc = apply_sub<acc_t>(0, acc);
diff --git a/chapters/image.adoc b/chapters/image.adoc
index 3bf4109..e098bac 100644
--- a/chapters/image.adoc
+++ b/chapters/image.adoc
@@ -1,7 +1,7 @@
 //
 // This confidential and proprietary software may be used only as
 // authorised by a licensing agreement from ARM Limited
-// (C) COPYRIGHT 2020 ARM Limited
+// (C) COPYRIGHT 2020-2021 ARM Limited
 // ALL RIGHTS RESERVED
 // The entire notice above must be reproduced on all authorised
 // copies and copies may only be made to the extent permitted
@@ -55,21 +55,21 @@ None
 ----
 // Ensure image size is supported by GPU APIs and that for integer
 // implementations, position * stride does not overflow int32_t.
-assert(max(OH,OW,IH,IW) < 16384);
+REQUIRE(max(OH,OW,IH,IW) < 16384);
 if (resize_t == float_t) {
     // The shift attribute is not used for floating point
-    assert(shift == 0);
+    REQUIRE(shift == 0);
 } else {
     // if in_t=int8_t ensure that an int32_t accumulator can be used
-    assert(0 < shift && shift <= 11);
+    REQUIRE(0 < shift && shift <= 11);
     // set a consistent lower limit of 1/16 downscale
     // independent of the shift value to simplify implementations
-    assert(0 < stride_x && stride_x < (16 << shift));
-    assert(0 < stride_y && stride_y < (16 << shift));
+    REQUIRE(0 < stride_x && stride_x < (16 << shift));
+    REQUIRE(0 < stride_y && stride_y < (16 << shift));
     // offset range is similarly limited to maximum 16 pixels irrespective
     // of shift. Both stride and offset fit in int16_t when shift=11.
-    assert((-16 << shift) < offset_x && offset_x < (16 << shift));
-    assert((-16 << shift) < offset_y && offset_y < (16 << shift));
+    REQUIRE((-16 << shift) < offset_x && offset_x < (16 << shift));
+    REQUIRE((-16 << shift) < offset_y && offset_y < (16 << shift));
 }
 for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= c < C) {
     unit = (resize_t == float_t) ? 1.0 : (1 << shift);
@@ -86,7 +86,7 @@ for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= c < C) {
     iy1 = apply_min(iy+1, IH-1);
     ix0 = apply_max(ix, 0);
     ix1 = apply_min(ix+1, IW-1);
-    assert(ix0 <= ix1 && iy0 <= iy1);
+    REQUIRE(ix0 <= ix1 && iy0 <= iy1);
     if (mode==BILINEAR) {
         v00 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix0,c]);
         v01 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix1,c]);
diff --git a/chapters/introduction.adoc b/chapters/introduction.adoc
index d410121..33ebea1 100644
--- a/chapters/introduction.adoc
+++ b/chapters/introduction.adoc
@@ -197,17 +197,17 @@ The padding array represents the before and after pair for each dimension.
 
 [source,c++]
 ----
-assert((pad ==  NULL) || size(pad) == 2 * size(shape));
+REQUIRE((pad ==  NULL) || size(pad) == 2 * size(shape));
 out_t tensor_read<in_t>(in_t *address, dim_t shape, dim_t index, in_t zero_point=0, dim_t pad=NULL) {
-    assert(in_t == int8_t || zero_point == 0)
+    REQUIRE(in_t == int8_t || zero_point == 0)
     unsigned offset = 0;
     for (i = 0; i < rank(shape); i++) {
         if (index[i] < 0) {
-            assert(pad && pad[2 * i] + index[i] >= 0);
+            REQUIRE(pad && pad[2 * i] + index[i] >= 0);
             return 0;
         }
         if (index[i] >= shape[i]) {
-            assert(pad && index[i] < shape[i] + pad[2 * i + 1]);
+            REQUIRE(pad && index[i] < shape[i] + pad[2 * i + 1]);
             return 0;
         }
         offset = offset * shape[i] + index[i];
@@ -223,7 +223,7 @@ out_t tensor_read<in_t>(in_t *address, dim_t shape, dim_t index, in_t zero_point
 tensor_write<type>(<type> *address, dim_t shape, dim_t index, <type> value) {
     unsigned offset = 0;
     for (i = 0; i < rank(shape); i++) {
-        assert (index[i] >= 0 && index[i] < shape[i]);
+        REQUIRE(index[i] >= 0 && index[i] < shape[i]);
         offset = offset * shape[i] + index[i];
     }
     address[offset] = value;
@@ -242,10 +242,10 @@ The following function maps an index in the output tensor to an index in the inp
 [source,c++]
 ----
 dim_t apply_broadcast(dim_t out_shape, dim_t in_shape, dim_t index) {
-    assert(rank(out_shape) == rank(in_shape));
+    REQUIRE(rank(out_shape) == rank(in_shape));
     for (i = 0; i < rank(out_shape); i++) {
         if (out_shape[i] != in_shape[i]) {
-            assert(in_shape[i] == 1);
+            REQUIRE(in_shape[i] == 1);
             index[i] = 0;
         }
     }
@@ -285,8 +285,8 @@ The apply_scale functions provide a scaling of approximately (multiplier * 2^-sh
 [source,c++]
 ----
 int32_t apply_scale_32(int32_t value, int32_t multipler, uint6_t shift, bool_t double_round=false) {
-    assert(multiplier >= 0);
-    assert(2 <= shift && shift <= 62);
+    REQUIRE(multiplier >= 0);
+    REQUIRE(2 <= shift && shift <= 62);
     int64_t round = 1 << (shift - 1);
     if (double_round) {
         if (shift > 31 && value >= 0) round += 1<<30;
@@ -294,17 +294,17 @@ int32_t apply_scale_32(int32_t value, int32_t multipler, uint6_t shift, bool_t d
     }
     int64_t result = (int64_t)value * multiplier + round;
     result = result >> shift;
-    assert(result >= minimum<int32_t> && result <= maximum<int32_t>);
+    REQUIRE(result >= minimum<int32_t> && result <= maximum<int32_t>);
     return (int32_t)result;
 }
 
 int32_t apply_scale_16(int48_t value, int16_t multipler, uint6_t shift) {
-    assert(multiplier >= 0);
-    assert(2 <= shift && shift <= 62);
+    REQUIRE(multiplier >= 0);
+    REQUIRE(2 <= shift && shift <= 62);
     int64_t round = (1 << (shift - 1));
     int64_t result = (int64_t)value * multiplier + round;
     result = result >> shift;
-    assert(result >= minimum<int32_t> && result <= maximum<int32_t>);
+    REQUIRE(result >= minimum<int32_t> && result <= maximum<int32_t>);
     return (int32_t)result;
 }
 ----
@@ -324,7 +324,7 @@ In places where a divide is required, we also use the function below to calculat
 [source,c++]
 ----
 scale_t reciprocal_scale(uint32_t value) {
-    assert(value > 0);
+    REQUIRE(value > 0);
     scale_t scale;
     int k = 32 - count_leading_zeros(value - 1); // (1 << k) / 2 < value <= (1 << k)
     int64_t numerator = ((1 << 30) + 1) << k;
@@ -419,94 +419,3 @@ These features ensure that detection of overflow and other exceptional condition
 
 |===
 
-=== General Pseudocode Helpers
-
-This section contains general pseudocode utility functions used throughout the specification.
-
-The following functions provide basic arithmetic with asserts that values stay in the valid range supported by TOSA.
-
-[source,c++]
-----
-acc_t apply_add<acc_t>(acc_t a, acc_t b) {
-    if (acc_t == float_t) return a + b;
-    int64_t c = (int64_t)a + (int64_t)b;
-    assert(c >= minimum<acc_t> && c <= maximum<acc_t>);
-    return (acc_t)c;
-}
-
-acc_t apply_sub<acc_t>(acc_t a, acc_t b) {
-    if (acc_t == float_t) return a - b;
-    int64_t c = (int64_t)a - (int64_t)b;
-    assert(c >= minimum<acc_t> && c <= maximum<acc_t>);
-    return (acc_t)c;
-}
-----
-
-The following functions are used in the pseudocode to take maximum,
-minimum, clip values to a range, or count leading zeros.
-[[count_leading_zeros]]
-[source,c++]
-----
-<type> apply_max<type>(<type> a, <type> b) {
-    if (a >= b) return a; else return b;
-}
-
-<type> apply_min<type>(<type> a, <type> b) {
-    if (a < b) return a; else return b;
-}
-
-<type> apply_clip<type>(<type> value, <type> min_val, <type> max_val) {
-    assert(min_val <= max_val);
-    value = apply_max(value, min_val);
-    value = apply_min(value, max_val);
-    return value;
-}
-
-int32_t count_leading_zeros(int32_t a) {
-    int32_t acc = 32;
-    if (a != 0) {
-        uint32_t mask;
-        mask = 1 << (32 - 1); // width of int32_t - 1
-        acc = 0;
-        while ((mask & a) == 0) {
-            mask = mask >> 1;
-            acc = acc + 1;
-        }
-    }
-    return acc;
-}
-----
-
-The following definitions are used in pseudocode to do numeric conversions.
-
-[source,c++]
-----
-int round_to_nearest_int(float_t f)
-  Converts the floating-point value to f, with rounding to the nearest integer value.
-
-float_t round_to_nearest_float(in_t f)
-  Converts the input value into floating-point, rounding to the nearest representable value.
-  The behavior for ties is implementation dependent.
-
-out_t sign_extend(in_t input)
-  Only valid for twos complement integer values where out_t has more bits than in_t.
-  Output = input
-  Replicate the top bit of input for all bits between the top bit of input and the top bit of output.
-
-out_t truncate(in_t input)
-  output is the sizeof(out_t) least significant bits in input.
-----
-
-The following definition is used to flatten a list of lists into a single list
-
-[source,c++]
-----
-in_t* flatten(in_t lists[]) {
-    in_t output = [];
-    for_each(list in lists) {
-        for_each(element in list) {
-            output.append(element);
-        }
-    }
-}
-----
diff --git a/chapters/operators.adoc b/chapters/operators.adoc
index 896931a..75084d1 100644
--- a/chapters/operators.adoc
+++ b/chapters/operators.adoc
@@ -38,9 +38,13 @@ The following function denotes the execution of a TOSA graph, on an input tensor
 [source,c++]
 ----
 tosa_execute_graph(tosa_graph_t graph, tosa_list_t input_list, tosa_list_t output_list) {
-    assert(tensor_list_shape(input_list)==tosa_input_shape(graph));
-    assert(tensor_list_shape(output_list)==tosa_output_shape(graph));
+    REQUIRE(tensor_list_shape(input_list)==tosa_input_shape(graph));
+    REQUIRE(tensor_list_shape(output_list)==tosa_output_shape(graph));
     <Execute TOSA graph operators as defined in this specification>
+    if (tosa_graph_result_unpredictable == true) {
+        // Result of TOSA graph execution is unpredictable due to calling
+        // the unpredictable() function during execution.
+    }
 }
 ----
 
diff --git a/chapters/pseudocode.adoc b/chapters/pseudocode.adoc
new file mode 100644
index 0000000..901211a
--- /dev/null
+++ b/chapters/pseudocode.adoc
@@ -0,0 +1,128 @@
+//
+// This confidential and proprietary software may be used only as
+// authorised by a licensing agreement from ARM Limited
+// (C) COPYRIGHT 2021 ARM Limited
+// ALL RIGHTS RESERVED
+// The entire notice above must be reproduced on all authorised
+// copies and copies may only be made to the extent permitted
+// by a licensing agreement from ARM Limited.
+
+== TOSA Pseudocode
+
+The TOSA pseudocode provides precise descriptions of TOSA operations.
+Each operator contains pseudocode describing the operator's functionality.
+This section contains pseudocode functions shared across multiple operators in the specification.
+
+=== Operator Validation Helpers
+
+
+The following functions are used to define the valid conditions for TOSA operators.
+The REQUIRE function defines the conditions required by the TOSA operator.
+When a call to unpredictable() is made, processing defined in the pseudocode for this operator may or may not be executed.
+Once unpredictable is called, the whole TOSA graph is considered unpredictable, even if the unpredictable result does not propagate to the graph output.
+
+[source,c++]
+----
+void unpredictable() {
+    // Behavior of this TOSA operator cannot be relied on if this is called.
+    tosa_graph_result_unpredictable = true;
+}
+
+void REQUIRE(condition) {
+    if (not condition) {
+        unpredictable();
+    }
+}
+----
+
+=== General Pseudocode Helpers
+
+This section contains general pseudocode utility functions used throughout the specification.
+
+The following functions provide basic arithmetic while defining requirements such that values stay in the valid range.
+
+[source,c++]
+----
+acc_t apply_add<acc_t>(acc_t a, acc_t b) {
+    if (acc_t == float_t) return a + b;
+    int64_t c = (int64_t)a + (int64_t)b;
+    REQUIRE(c >= minimum<acc_t> && c <= maximum<acc_t>);
+    return (acc_t)c;
+}
+
+acc_t apply_sub<acc_t>(acc_t a, acc_t b) {
+    if (acc_t == float_t) return a - b;
+    int64_t c = (int64_t)a - (int64_t)b;
+    REQUIRE(c >= minimum<acc_t> && c <= maximum<acc_t>);
+    return (acc_t)c;
+}
+----
+
+The following functions are used in the pseudocode to take maximum,
+minimum, clip values to a range, or count leading zeros.
+[[count_leading_zeros]]
+[source,c++]
+----
+<type> apply_max<type>(<type> a, <type> b) {
+    if (a >= b) return a; else return b;
+}
+
+<type> apply_min<type>(<type> a, <type> b) {
+    if (a < b) return a; else return b;
+}
+
+<type> apply_clip<type>(<type> value, <type> min_val, <type> max_val) {
+    REQUIRE(min_val <= max_val);
+    value = apply_max(value, min_val);
+    value = apply_min(value, max_val);
+    return value;
+}
+
+int32_t count_leading_zeros(int32_t a) {
+    int32_t acc = 32;
+    if (a != 0) {
+        uint32_t mask;
+        mask = 1 << (32 - 1); // width of int32_t - 1
+        acc = 0;
+        while ((mask & a) == 0) {
+            mask = mask >> 1;
+            acc = acc + 1;
+        }
+    }
+    return acc;
+}
+----
+
+The following definitions are used in pseudocode to do numeric conversions.
+
+[source,c++]
+----
+int round_to_nearest_int(float_t f)
+  Converts the floating-point value to f, with rounding to the nearest integer value.
+
+float_t round_to_nearest_float(in_t f)
+  Converts the input value into floating-point, rounding to the nearest representable value.
+  The behavior for ties is implementation dependent.
+
+out_t sign_extend(in_t input)
+  Only valid for twos complement integer values where out_t has more bits than in_t.
+  Output = input
+  Replicate the top bit of input for all bits between the top bit of input and the top bit of output.
+
+out_t truncate(in_t input)
+  output is the sizeof(out_t) least significant bits in input.
+----
+
+The following definition is used to flatten a list of lists into a single list
+
+[source,c++]
+----
+in_t* flatten(in_t lists[]) {
+    in_t output = [];
+    for_each(list in lists) {
+        for_each(element in list) {
+            output.append(element);
+        }
+    }
+}
+----
diff --git a/chapters/reduction.adoc b/chapters/reduction.adoc
index b84b7f0..391591f 100644
--- a/chapters/reduction.adoc
+++ b/chapters/reduction.adoc
@@ -27,8 +27,8 @@ Reduce a tensor along the given axis with a logical AND operation
 
 [source,c]
 ----
-assert(0 <= axis && axis < rank(shape1));
-assert(shape[axis] == 1);
+REQUIRE(0 <= axis && axis < rank(shape1));
+REQUIRE(shape[axis] == 1);
 for_each(index in shape) {
     tensor_write<in_t>(output, shape, index, true);
 }
@@ -68,8 +68,8 @@ Reduce a tensor along the given axis with a logical OR operation
 
 [source,c]
 ----
-assert(0 <= axis && axis < rank(shape1));
-assert(shape[axis] == 1);
+REQUIRE(0 <= axis && axis < rank(shape1));
+REQUIRE(shape[axis] == 1);
 for_each(index in shape) {
     tensor_write<in_t>(output, shape, index, false);
 }
@@ -109,8 +109,8 @@ Reduce a tensor along the given axis with a maximum operation
 
 [source,c]
 ----
-assert(0 <= axis && axis < rank(shape1));
-assert(shape[axis] == 1);
+REQUIRE(0 <= axis && axis < rank(shape1));
+REQUIRE(shape[axis] == 1);
 for_each(index in shape) {
     tensor_write<in_t>(output, shape, index, minimum<in_t>);
 }
@@ -156,8 +156,8 @@ Quantization is ignored when doing the REDUCE_MIN operation. The input and outpu
 
 [source,c]
 ----
-assert(0 <= axis && axis < rank(shape1));
-assert(shape[axis]==1);
+REQUIRE(0 <= axis && axis < rank(shape1));
+REQUIRE(shape[axis]==1);
 for_each(index in shape) {
     tensor_write<in_t>(output, shape, index, maximum<in_t>);
 }
@@ -200,8 +200,8 @@ Reduce a tensor along the given axis by computing the product of the axis.
 
 [source,c]
 ----
-assert(0 <= axis && axis < rank(shape1));
-assert(shape[axis] == 1);
+REQUIRE(0 <= axis && axis < rank(shape1));
+REQUIRE(shape[axis] == 1);
 for_each(index in shape) {
     tensor_write<in_t>(output, shape, index, 1.0);
 }
@@ -241,8 +241,8 @@ Reduce a tensor along the given axis by computing the sum of the axis.
 
 [source,c]
 ----
-assert(0 <= axis && axis < rank(shape1));
-assert(shape[axis] == 1);
+REQUIRE(0 <= axis && axis < rank(shape1));
+REQUIRE(shape[axis] == 1);
 for_each(index in shape) {
     tensor_write<in_t>(output, shape, index, 0);
 }
diff --git a/chapters/scatter_gather.adoc b/chapters/scatter_gather.adoc
index a657f55..6fedb45 100644
--- a/chapters/scatter_gather.adoc
+++ b/chapters/scatter_gather.adoc
@@ -34,7 +34,7 @@ None
 ----
 for_each(0 <= n < N, 0 <= w < W, 0 <= c < C) {
     index_t k = tensor_read<index_t>(indices, [N,W], [n,w]);
-    assert(0 <= k && k < K);
+    REQUIRE(0 <= k && k < K);
     value_t value = tensor_read<value_t>(values, [N,K,C], [n,k,c]);
     tensor_write<value_t>(output, [N,W,C], [n,w,c], value);
 }
@@ -93,8 +93,8 @@ for_each(0 <= n < N, 0 <= k < K, 0 <= c < C) {
 // Now perform the SCATTER operation, modifying the positions from the indices tensor
 for_each(0 <= n < N, 0 <= w < W, 0 <= c < C) {
     index_t k = tensor_read<index_t>(indices, [N,W], [n,w]);
-    assert(0 <= k && k < K);
-    assert(output_modified[n,k,c] == false);
+    REQUIRE(0 <= k && k < K);
+    REQUIRE(output_modified[n,k,c] == false);
     value_t value = tensor_read<value_t>(input, [N,W,C], [n,w,c]);
     tensor_write<value_t>(values_out, [N,K,C], [n, k, c], value);
     output_modified[n,k,c] = true;
diff --git a/chapters/tensor_ops.adoc b/chapters/tensor_ops.adoc
index b2c220e..6780b1c 100644
--- a/chapters/tensor_ops.adoc
+++ b/chapters/tensor_ops.adoc
@@ -31,7 +31,7 @@ None
 
 [source,c++]
 ----
-assert(axis >= 0 && axis < rank(shape1) && rank(shape1) <= 4);
+REQUIRE(axis >= 0 && axis < rank(shape1) && rank(shape1) <= 4);
 if (axis == 0) {
     left_shape = [];
 } else {
@@ -42,7 +42,7 @@ if (axis == rank(shape1)-1) {
 } else {
     right_shape = shape1[axis+1:rank(shape1) - 1];
 }
-assert(flatten(left_shape, right_shape) == shape);
+REQUIRE(flatten(left_shape, right_shape) == shape);
 for_each(left_index in left_shape) {
     for_each(right_index in right_shape) {
         in_t max_value = minimum_value<in_t>;
@@ -97,8 +97,8 @@ This performs an average pooling over the given input tensor. A sliding window o
 
 [source,c++]
 ----
-assert(in_t == int8_t || input_zp == 0); // Zero point only for int8_t
-assert(in_t == int8_t || output_zp == 0); // Zero point only for int8_t
+REQUIRE(in_t == int8_t || input_zp == 0); // Zero point only for int8_t
+REQUIRE(in_t == int8_t || output_zp == 0); // Zero point only for int8_t
 pad = flatten([0,0], pad, [0,0]);
 for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) {
     in_t output_val;
@@ -164,8 +164,8 @@ Performs a 2D convolution over the given tensor input, using the weight tensor.
 
 [source,c++]
 ----
-assert(in_t == int8_t || input_zp == 0); // Zero point only for int8_t
-assert(weight_t == int8_t || weight_zp == 0);
+REQUIRE(in_t == int8_t || input_zp == 0); // Zero point only for int8_t
+REQUIRE(weight_t == int8_t || weight_zp == 0);
 pad = flatten([0,0], pad, [0,0]);
 for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W; 0 <= oc < OC) {
     acc_t acc = 0;
@@ -225,8 +225,8 @@ Performs a 3D convolution over the given input tensor.
 
 [source,c++]
 ----
-assert(in_t == int8_t || input_zp == 0); // Zero point only for int8_t
-assert(weight_t == int8_t || weight_zp == 0);
+REQUIRE(in_t == int8_t || input_zp == 0); // Zero point only for int8_t
+REQUIRE(weight_t == int8_t || weight_zp == 0);
 pad = flatten([0,0], pad, [0,0]);
 for_each(0 <= n < N, 0 <= od < D, 0 <= oy < H, 0 <= ox < W; 0 <= oc < OC) {
     acc_t acc = 0;
@@ -289,8 +289,8 @@ Performs 2D convolutions separately over each channel of the given tensor input,
 
 [source,c++]
 ----
-assert(in_t == int8_t || input_zp == 0); // Zero point only for int8_t
-assert(weight_t == int8_t || weight_zp == 0);
+REQUIRE(in_t == int8_t || input_zp == 0); // Zero point only for int8_t
+REQUIRE(weight_t == int8_t || weight_zp == 0);
 pad = flatten([0,0], pad, [0,0]);
 for_each(0 <= n<N, 0 <= oy < H, 0 <= ox < W; 0 <= c < (C * M), 0 <= m < M) {
     acc_t acc = 0;
@@ -347,8 +347,8 @@ Performs a fully connected network.
 
 [source,c++]
 ----
-assert(in_t == int8_t || input_zp == 0); // Zero point only for int8_t
-assert(weight_t == int8_t || weight_zp == 0);
+REQUIRE(in_t == int8_t || input_zp == 0); // Zero point only for int8_t
+REQUIRE(weight_t == int8_t || weight_zp == 0);
 for_each(0 <= n < N, 0 <= oc < OC) {
     acc_t acc = 0;
     for_each(0 <= ic < IC) {
@@ -398,7 +398,7 @@ Performs two dimensional matrix multiplications. This allows both inputs to be a
 
 [source,c++]
 ----
-assert(in_t == int8_t || (A_zp == 0 && B_zp == 0)); // Zero point only for int8_t
+REQUIRE(in_t == int8_t || (A_zp == 0 && B_zp == 0)); // Zero point only for int8_t
 for_each(0 <= n < N, 0 <= h < H, 0 <= w < W) {
     acc_t acc = 0;
     for_each(0 <= c < C) {
@@ -499,8 +499,8 @@ Performs a 2D transposed convolution over the given tensor input, using the weig
 
 [source,c++]
 ----
-assert(in_t == int8_t  || input_zp == 0); // Zero point only allowed for int8_t
-assert(weight_t == int8_t || weight_zp == 0);
+REQUIRE(in_t == int8_t  || input_zp == 0); // Zero point only allowed for int8_t
+REQUIRE(weight_t == int8_t || weight_zp == 0);
 for_each(index in out_shape) {
     tensor_write<acc_t>(output, [N,OH,OW,OC], index, bias[index[3]])
 }
diff --git a/chapters/type_conversion.adoc b/chapters/type_conversion.adoc
index 2ca505e..35cebe7 100644
--- a/chapters/type_conversion.adoc
+++ b/chapters/type_conversion.adoc
@@ -103,9 +103,9 @@ Rescale quantized values into a new domain. This function scales by factor: mult
 [source,c++]
 ----
 for_each(index in shape) {
-    assert(in_t == int8_t  || in_t == uint8_t  || input_zp == 0);
-    assert(out_t == int8_t || out_t == uint8_t || output_zp == 0);
-    assert((scale32 && in_t != int48_t_t) || (!scale32 && !double_round));
+    REQUIRE(in_t == int8_t  || in_t == uint8_t  || input_zp == 0);
+    REQUIRE(out_t == int8_t || out_t == uint8_t || output_zp == 0);
+    REQUIRE((scale32 && in_t != int48_t_t) || (!scale32 && !double_round));
     int48_t value = tensor_read<in_t>(input, shape, index, input_zp);
     int c = (per_channel) ? index[dims-1] : 0;
     int32_t result = (scale32) ?
diff --git a/tosa_spec.adoc b/tosa_spec.adoc
index 0f96d5c..130d98e 100644
--- a/tosa_spec.adoc
+++ b/tosa_spec.adoc
@@ -1,7 +1,7 @@
 //
 // This confidential and proprietary software may be used only as
 // authorised by a licensing agreement from ARM Limited
-// (C) COPYRIGHT 2020 ARM Limited
+// (C) COPYRIGHT 2020-2021 ARM Limited
 // ALL RIGHTS RESERVED
 // The entire notice above must be reproduced on all authorised
 // copies and copies may only be made to the extent permitted
@@ -19,3 +19,4 @@ include::chapters/introduction.adoc[]
 
 include::chapters/operators.adoc[]
 
+include::chapters/pseudocode.adoc[]
-- 
cgit v1.2.1