From ca2a854e3d46f91ecaa446d4b2311112cc2326fd Mon Sep 17 00:00:00 2001
From: Dominic Symes <dominic.symes@arm.com>
Date: Fri, 19 Mar 2021 13:56:27 +0000
Subject: Add definition of TOSA compliance

Signed-off-by: Dominic Symes <dominic.symes@arm.com>
Change-Id: I2e25d0467843adb078d5ab9fd681af40b2ffa52e
---
 chapters/control_flow.adoc    | 18 ++++++------
 chapters/data_layout.adoc     |  2 +-
 chapters/ewise_binary.adoc    |  3 +-
 chapters/ewise_unary.adoc     |  4 +--
 chapters/image.adoc           | 14 +++++-----
 chapters/introduction.adoc    | 64 ++++++++++++++++++++++++++++++++++++++++---
 chapters/operators.adoc       | 22 +++++++++++----
 chapters/pseudocode.adoc      | 33 ++++++++++++++++------
 chapters/reduction.adoc       | 24 ++++++++--------
 chapters/tensor_ops.adoc      | 30 ++++++++++----------
 chapters/type_conversion.adoc |  9 +++---
 11 files changed, 152 insertions(+), 71 deletions(-)

diff --git a/chapters/control_flow.adoc b/chapters/control_flow.adoc
index 2759464..c9d4e15 100644
--- a/chapters/control_flow.adoc
+++ b/chapters/control_flow.adoc
@@ -31,10 +31,10 @@ Evaluates a Boolean condition and then takes one of two distinct execution paths
 
 [source,c++]
 ----
-REQUIRE(tensor_list_shape(input_list) == tosa_input_shape(then_graph));
-REQUIRE(tensor_list_shape(input_list) == tosa_input_shape(else_graph));
-REQUIRE(tensor_list_shape(output_list) == tosa_output_shape(then_graph));
-REQUIRE(tensor_list_shape(output_list) == tosa_output_shape(else_graph));
+ERROR_IF(tensor_list_shape(input_list) != tosa_input_shape(then_graph));
+ERROR_IF(tensor_list_shape(input_list) != tosa_input_shape(else_graph));
+ERROR_IF(tensor_list_shape(output_list) != tosa_output_shape(then_graph));
+ERROR_IF(tensor_list_shape(output_list) != tosa_output_shape(else_graph));
 
 if (condition) {
     tosa_execute_graph(then_graph, input_list, output_list);
@@ -62,11 +62,11 @@ Generates and evaluates a Bool condition and either executes a loop body or exit
 
 [source,c++]
 ----
-REQUIRE(tensor_list_shape(input_list) == tosa_list_shape(output_list));
-REQUIRE(tensor_list_shape(input_list) == tosa_input_shape(cond_graph));
-REQUIRE(tensor_list_shape(input_list) == tosa_input_shape(body_graph));
-REQUIRE(tensor_list_shape(input_list) == tosa_output_shape(body_graph));
-REQUIRE(tosa_output_shape(cond_graph) == tosa_list_shape([bool_t]));
+ERROR_IF(tensor_list_shape(input_list) != tosa_list_shape(output_list));
+ERROR_IF(tensor_list_shape(input_list) != tosa_input_shape(cond_graph));
+ERROR_IF(tensor_list_shape(input_list) != tosa_input_shape(body_graph));
+ERROR_IF(tensor_list_shape(input_list) != tosa_output_shape(body_graph));
+ERROR_IF(tosa_output_shape(cond_graph) != tosa_list_shape([bool_t]));
 
 // The iteration number 'i' is included to give unique names to variables
 // in each iteration of the loop and is not required by implementations
diff --git a/chapters/data_layout.adoc b/chapters/data_layout.adoc
index 09df5be..e625085 100644
--- a/chapters/data_layout.adoc
+++ b/chapters/data_layout.adoc
@@ -121,7 +121,7 @@ Returns a tensor with the same type/values as the input, with a new shape specif
 
 [source,c++]
 ----
-REQUIRE(tensor_size(shape1) == tensor_size(shape));
+ERROR_IF(tensor_size(shape1) != tensor_size(shape));
 for(i = 0; i < tensor_size(shape); i++) {
     output[i] = input[i];
 }
diff --git a/chapters/ewise_binary.adoc b/chapters/ewise_binary.adoc
index d94676c..2b1eadd 100644
--- a/chapters/ewise_binary.adoc
+++ b/chapters/ewise_binary.adoc
@@ -540,13 +540,12 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match
 
 [source,c++]
 ----
-REQUIRE(in_t == int32_t || shift == 0);
 for_each(index in shape) {
     index1 = apply_broadcast(shape, shape1, index);
     index2 = apply_broadcast(shape, shape2, index);
     in_t value1 = tensor_read<in_t>(input1, shape1, index1);
     in_t value2 = tensor_read<in_t>(input2, shape2, index2);
-    if (shift>0) {
+    if (in_t == int32_t && shift > 0) {
         out_t acc = apply_scale_32(value1, value2, shift);
     } else {
         out_t acc = value1 * value2;  // low 32-bits of result for int32_t
diff --git a/chapters/ewise_unary.adoc b/chapters/ewise_unary.adoc
index 304ef72..036c6e3 100644
--- a/chapters/ewise_unary.adoc
+++ b/chapters/ewise_unary.adoc
@@ -259,8 +259,8 @@ Elementwise negation operation
 
 [source,c++]
 ----
-REQUIRE(in_t == int8_t || input1_zp == 0) // Zero point only for int8_t
-REQUIRE(in_t == int8_t || output_zp == 0) // Zero point only for int8_t
+ERROR_IF(in_t != int8_t && input1_zp != 0) // Zero point only for int8_t
+ERROR_IF(in_t != int8_t && output_zp != 0) // Zero point only for int8_t
 for_each(index in shape) {
     acc_t acc = tensor_read<in_t>(input1, shape, index, input1_zp);
     acc = apply_sub<acc_t>(0, acc);
diff --git a/chapters/image.adoc b/chapters/image.adoc
index e098bac..f997992 100644
--- a/chapters/image.adoc
+++ b/chapters/image.adoc
@@ -55,21 +55,21 @@ None
 ----
 // Ensure image size is supported by GPU APIs and that for integer
 // implementations, position * stride does not overflow int32_t.
-REQUIRE(max(OH,OW,IH,IW) < 16384);
+ERROR_IF(max(OH,OW,IH,IW) >= 16384);
 if (resize_t == float_t) {
     // The shift attribute is not used for floating point
-    REQUIRE(shift == 0);
+    ERROR_IF(shift != 0);
 } else {
     // if in_t=int8_t ensure that an int32_t accumulator can be used
-    REQUIRE(0 < shift && shift <= 11);
+    ERROR_IF(shift < 1 || shift > 11);
     // set a consistent lower limit of 1/16 downscale
     // independent of the shift value to simplify implementations
-    REQUIRE(0 < stride_x && stride_x < (16 << shift));
-    REQUIRE(0 < stride_y && stride_y < (16 << shift));
+    ERROR_IF(stride_x <= 0 || stride_x >= (16 << shift));
+    ERROR_IF(stride_y <= 0 || stride_y >= (16 << shift));
     // offset range is similarly limited to maximum 16 pixels irrespective
     // of shift. Both stride and offset fit in int16_t when shift=11.
-    REQUIRE((-16 << shift) < offset_x && offset_x < (16 << shift));
-    REQUIRE((-16 << shift) < offset_y && offset_y < (16 << shift));
+    ERROR_IF(offset_x <= (-16 << shift) || offset_x >= (16 << shift));
+    ERROR_IF(offset_y <= (-16 << shift) || offset_y >= (16 << shift));
 }
 for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= c < C) {
     unit = (resize_t == float_t) ? 1.0 : (1 << shift);
diff --git a/chapters/introduction.adoc b/chapters/introduction.adoc
index 33ebea1..72c0298 100644
--- a/chapters/introduction.adoc
+++ b/chapters/introduction.adoc
@@ -67,6 +67,62 @@ The following table summarizes the three profiles:
 |Main Training|TOSA-MT|Yes|Yes|Yes
 |===
 
+=== Compliance
+
+This section defines when a TOSA implementation is compliant to a given TOSA specification profile.
+The term conformant will mean the same as compliant.
+
+==== Baseline Inference Profile
+
+The <<Operator Graphs>> section of this specification defines a TOSA graph and the behaviour defined for a TOSA graph.
+This behaviour is captured in the pseudo-code function tosa_execute_graph().
+For a given input graph (with attributes) and input tensors there are three possible tosa_graph_result values after executing the graph:
+
+* tosa_unpredictable: The result of the graph on the given inputs cannot be relied upon.
+* tosa_error: The graph does not meet the specification and is recognised as an illegal graph.
+* tosa_valid: The result is defined and predictable and the list of output tensors defines the result.
+
+An implementation is compliant to the TOSA Baseline Inference Profile if it matches the above results as follows:
+
+* For tosa_unpredictable, the implementation can return whatever result it chooses (including error)
+* For tosa_error, the implementation must return an error result (and there is no requirement on how much of the graph is executed, if any)
+* For tosa_valid, the implementation must execute the entire graph without error and return the result defined by this specification.
+
+In terms of psuedo-code, if *graph* is a TOSA graph consisting of Baseline Inference Profile operators and *input_list* is a list of input tensors then the following test must pass.
+
+[source,c++]
+----
+bool tosa_test_compliance(tosa_graph_t graph, tosa_list_t input_list) {
+    shape_list_t output_list_spec = tosa_allocate_list(tosa_output_shape(graph));
+    shape_list_t output_list_test = tosa_allocate_list(tosa_output_shape(graph));
+    tosa_graph_result = tosa_valid    // result starts as valid
+    tosa_execute_graph(graph, input_list, output_list_spec);
+    if (tosa_graph_result == tosa_unpredictable) {
+        return true;    // No requirement to match an unpredictable result
+    }
+    result_test = execute_implementation_under_test(graph, input_list, output_list_test);
+    if (tosa_graph_result == tosa_error) {
+        return result_test == tosa_error;   // result must be an error
+    }
+    if (exact_tensor_match(output_list_spec, output_list_test)) {
+       // Predictable bit-exact value match required
+       return true;
+    }
+    return false;
+}
+----
+
+==== Main Inference and Main Training Profile
+
+An implementation is compliant to the Main Inference or Main Training profiles if the following both hold for that respective profile:
+
+* For a graph returning tosa_error the implementation must also return an error
+* For a graph returning tosa_valid the implementation must execute the entire graph without error
+* For a graph returning tosa_valid and consisting only of integer operators the results must match exactly
+* The implementation must report the maximum relative error on a set of standard graphs that contain floating point operators. These graphs will be provided as a future appendix to this specification.
+
+Note that for graphs containing floating point there is no strict precision requirement that must be met, but that the precision achieved must be reported.
+
 === Operator Selection
 
 TOSA defines a set of primitive operators to which higher level operators can be lowered in a consistent way. To remain effective and efficient to implement the set of operators must be constrained to a reasonably small set of primitive operations out of which others can be constructed. The following principles govern the selection of operators within TOSA.
@@ -197,9 +253,9 @@ The padding array represents the before and after pair for each dimension.
 
 [source,c++]
 ----
-REQUIRE((pad ==  NULL) || size(pad) == 2 * size(shape));
+ERROR_IF((pad !=  NULL) && size(pad) != 2 * size(shape));
 out_t tensor_read<in_t>(in_t *address, dim_t shape, dim_t index, in_t zero_point=0, dim_t pad=NULL) {
-    REQUIRE(in_t == int8_t || zero_point == 0)
+    ERROR_IF(in_t != int8_t && zero_point != 0)
     unsigned offset = 0;
     for (i = 0; i < rank(shape); i++) {
         if (index[i] < 0) {
@@ -242,10 +298,10 @@ The following function maps an index in the output tensor to an index in the inp
 [source,c++]
 ----
 dim_t apply_broadcast(dim_t out_shape, dim_t in_shape, dim_t index) {
-    REQUIRE(rank(out_shape) == rank(in_shape));
+    ERROR_IF(rank(out_shape) != rank(in_shape));
     for (i = 0; i < rank(out_shape); i++) {
         if (out_shape[i] != in_shape[i]) {
-            REQUIRE(in_shape[i] == 1);
+            ERROR_IF(in_shape[i] != 1);
             index[i] = 0;
         }
     }
diff --git a/chapters/operators.adoc b/chapters/operators.adoc
index 75084d1..264063a 100644
--- a/chapters/operators.adoc
+++ b/chapters/operators.adoc
@@ -19,6 +19,14 @@ An operator processes input operands to produce output operands. An operator can
 
 === Operator Graphs
 
+A TOSA graph is a collection of TOSA operators where:
+
+* The output of an operator in the graph may be connected to one or more inputs of other operators in the graph
+* When an output is connected to an input the tensor list shapes must match
+* The attributes of the operators are defined and considered part of the graph
+* The attributes must be in the valid range permitted for the operator
+* The tensor dimensions must be in the valid range permitted for the operator
+
 Some operators, such as control flow operators, take a graph of other operators as an attribute. The type tosa_graph_t will denote a graph of operators and the following functions define the tensor shape list for the graph input and outputs:
 
 [source,c++]
@@ -38,12 +46,14 @@ The following function denotes the execution of a TOSA graph, on an input tensor
 [source,c++]
 ----
 tosa_execute_graph(tosa_graph_t graph, tosa_list_t input_list, tosa_list_t output_list) {
-    REQUIRE(tensor_list_shape(input_list)==tosa_input_shape(graph));
-    REQUIRE(tensor_list_shape(output_list)==tosa_output_shape(graph));
-    <Execute TOSA graph operators as defined in this specification>
-    if (tosa_graph_result_unpredictable == true) {
-        // Result of TOSA graph execution is unpredictable due to calling
-        // the unpredictable() function during execution.
+    ERROR_IF(tensor_list_shape(input_list) != tosa_input_shape(graph));
+    ERROR_IF(tensor_list_shape(output_list) != tosa_output_shape(graph));
+    for_each(operator in graph order) {
+        ERROR_IF(operator input tensors do not meet requirement of operator Arguments inputs)
+        ERROR_IF(operator attributes do not meet requirement of operator Arguments attributes)
+        ERROR_IF(operator output tensors do not meet requirement of operator Arguments outputs)
+        ERROR_IF(operator data types do not meet requirement of operator Supported Data Types)
+        <Execute operator as defined by the Operation Function pseduo-code>
     }
 }
 ----
diff --git a/chapters/pseudocode.adoc b/chapters/pseudocode.adoc
index 901211a..d9d8836 100644
--- a/chapters/pseudocode.adoc
+++ b/chapters/pseudocode.adoc
@@ -15,22 +15,37 @@ This section contains pseudocode functions shared across multiple operators in t
 
 === Operator Validation Helpers
 
-
 The following functions are used to define the valid conditions for TOSA operators.
+
 The REQUIRE function defines the conditions required by the TOSA operator.
-When a call to unpredictable() is made, processing defined in the pseudocode for this operator may or may not be executed.
-Once unpredictable is called, the whole TOSA graph is considered unpredictable, even if the unpredictable result does not propagate to the graph output.
+If the conditions are not met then the result of the TOSA graph is marked as unpredictable.
+Once the tosa_graph_result is set to tosa_unpredictable, the whole graph is considered unpredictable.
+
+The ERROR_IF function defines a condition that must set an error if the condition holds and the graph is not unpredictable.
+Note that if a graph contains both unpredictable and error statements then result of tosa_execute_graph() is tosa_unpredictable.
+This condition is captured in the ERROR_IF function.
+
+*Implementation Notes*
+
+* An implementation is not required to detect unpredictable behaviour. If tosa_execute_graph() returns tosa_unpredictable then the tosa_test_compliance() function does not require any specific output from an implementation.
+* An implementation is required to detect errors in a graph that does not have unpredictable behaviour (see tosa_test_compliance).
+* An acceptable implementation is to stop and report an error on the first ERROR_IF condition that occurs. This satifies tosa_test_compliance() even if the tosa_execute_graph() was tosa_unpredictable.
+* If the tosa_execute_graphs() result is tosa_unpredictable or tosa_error, then there is no requirement on the implementation to execute any portion of the TOSA graph.
 
 [source,c++]
 ----
-void unpredictable() {
-    // Behavior of this TOSA operator cannot be relied on if this is called.
-    tosa_graph_result_unpredictable = true;
+void REQUIRE(condition) {
+    // Unpredictable overrides any previous result
+    if (!(condition)) {
+        tosa_graph_result = tosa_unpredictable;
+    }
 }
 
-void REQUIRE(condition) {
-    if (not condition) {
-        unpredictable();
+void ERROR_IF(condition) {
+    // Error encodes a predictable error state and so is not registered
+    // if the graph is marked as unpredictable.
+    if (tosa_graph_result != tosa_unpredictable && condition) {
+        tosa_graph_result = tosa_error;
     }
 }
 ----
diff --git a/chapters/reduction.adoc b/chapters/reduction.adoc
index 391591f..c39fecf 100644
--- a/chapters/reduction.adoc
+++ b/chapters/reduction.adoc
@@ -27,8 +27,8 @@ Reduce a tensor along the given axis with a logical AND operation
 
 [source,c]
 ----
-REQUIRE(0 <= axis && axis < rank(shape1));
-REQUIRE(shape[axis] == 1);
+ERROR_IF(axis < 0  || axis >= rank(shape1));
+ERROR_IF(shape[axis] != 1);
 for_each(index in shape) {
     tensor_write<in_t>(output, shape, index, true);
 }
@@ -68,8 +68,8 @@ Reduce a tensor along the given axis with a logical OR operation
 
 [source,c]
 ----
-REQUIRE(0 <= axis && axis < rank(shape1));
-REQUIRE(shape[axis] == 1);
+ERROR_IF(axis < 0  || axis >= rank(shape1));
+ERROR_IF(shape[axis] != 1);
 for_each(index in shape) {
     tensor_write<in_t>(output, shape, index, false);
 }
@@ -109,8 +109,8 @@ Reduce a tensor along the given axis with a maximum operation
 
 [source,c]
 ----
-REQUIRE(0 <= axis && axis < rank(shape1));
-REQUIRE(shape[axis] == 1);
+ERROR_IF(axis < 0  || axis >= rank(shape1));
+ERROR_IF(shape[axis] != 1);
 for_each(index in shape) {
     tensor_write<in_t>(output, shape, index, minimum<in_t>);
 }
@@ -156,8 +156,8 @@ Quantization is ignored when doing the REDUCE_MIN operation. The input and outpu
 
 [source,c]
 ----
-REQUIRE(0 <= axis && axis < rank(shape1));
-REQUIRE(shape[axis]==1);
+ERROR_IF(axis < 0  || axis >= rank(shape1));
+ERROR_IF(shape[axis] != 1);
 for_each(index in shape) {
     tensor_write<in_t>(output, shape, index, maximum<in_t>);
 }
@@ -200,8 +200,8 @@ Reduce a tensor along the given axis by computing the product of the axis.
 
 [source,c]
 ----
-REQUIRE(0 <= axis && axis < rank(shape1));
-REQUIRE(shape[axis] == 1);
+ERROR_IF(axis < 0  || axis >= rank(shape1));
+ERROR_IF(shape[axis] != 1);
 for_each(index in shape) {
     tensor_write<in_t>(output, shape, index, 1.0);
 }
@@ -241,8 +241,8 @@ Reduce a tensor along the given axis by computing the sum of the axis.
 
 [source,c]
 ----
-REQUIRE(0 <= axis && axis < rank(shape1));
-REQUIRE(shape[axis] == 1);
+ERROR_IF(axis < 0  || axis >= rank(shape1));
+ERROR_IF(shape[axis] != 1);
 for_each(index in shape) {
     tensor_write<in_t>(output, shape, index, 0);
 }
diff --git a/chapters/tensor_ops.adoc b/chapters/tensor_ops.adoc
index 6780b1c..9a1c035 100644
--- a/chapters/tensor_ops.adoc
+++ b/chapters/tensor_ops.adoc
@@ -31,7 +31,7 @@ None
 
 [source,c++]
 ----
-REQUIRE(axis >= 0 && axis < rank(shape1) && rank(shape1) <= 4);
+ERROR_IF(axis < 0 || axis >= rank(shape1) || rank(shape1) > 4);
 if (axis == 0) {
     left_shape = [];
 } else {
@@ -42,7 +42,7 @@ if (axis == rank(shape1)-1) {
 } else {
     right_shape = shape1[axis+1:rank(shape1) - 1];
 }
-REQUIRE(flatten(left_shape, right_shape) == shape);
+ERROR_IF(flatten(left_shape, right_shape) != shape);
 for_each(left_index in left_shape) {
     for_each(right_index in right_shape) {
         in_t max_value = minimum_value<in_t>;
@@ -97,8 +97,8 @@ This performs an average pooling over the given input tensor. A sliding window o
 
 [source,c++]
 ----
-REQUIRE(in_t == int8_t || input_zp == 0); // Zero point only for int8_t
-REQUIRE(in_t == int8_t || output_zp == 0); // Zero point only for int8_t
+ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only for int8_t
+ERROR_IF(in_t != int8_t && output_zp != 0); // Zero point only for int8_t
 pad = flatten([0,0], pad, [0,0]);
 for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) {
     in_t output_val;
@@ -164,8 +164,8 @@ Performs a 2D convolution over the given tensor input, using the weight tensor.
 
 [source,c++]
 ----
-REQUIRE(in_t == int8_t || input_zp == 0); // Zero point only for int8_t
-REQUIRE(weight_t == int8_t || weight_zp == 0);
+ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only for int8_t
+ERROR_IF(weight_t != int8_t && weight_zp != 0);
 pad = flatten([0,0], pad, [0,0]);
 for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W; 0 <= oc < OC) {
     acc_t acc = 0;
@@ -225,8 +225,8 @@ Performs a 3D convolution over the given input tensor.
 
 [source,c++]
 ----
-REQUIRE(in_t == int8_t || input_zp == 0); // Zero point only for int8_t
-REQUIRE(weight_t == int8_t || weight_zp == 0);
+ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only for int8_t
+ERROR_IF(weight_t != int8_t && weight_zp != 0);
 pad = flatten([0,0], pad, [0,0]);
 for_each(0 <= n < N, 0 <= od < D, 0 <= oy < H, 0 <= ox < W; 0 <= oc < OC) {
     acc_t acc = 0;
@@ -289,8 +289,8 @@ Performs 2D convolutions separately over each channel of the given tensor input,
 
 [source,c++]
 ----
-REQUIRE(in_t == int8_t || input_zp == 0); // Zero point only for int8_t
-REQUIRE(weight_t == int8_t || weight_zp == 0);
+ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only for int8_t
+ERROR_IF(weight_t != int8_t && weight_zp != 0);
 pad = flatten([0,0], pad, [0,0]);
 for_each(0 <= n<N, 0 <= oy < H, 0 <= ox < W; 0 <= c < (C * M), 0 <= m < M) {
     acc_t acc = 0;
@@ -347,8 +347,8 @@ Performs a fully connected network.
 
 [source,c++]
 ----
-REQUIRE(in_t == int8_t || input_zp == 0); // Zero point only for int8_t
-REQUIRE(weight_t == int8_t || weight_zp == 0);
+ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only for int8_t
+ERROR_IF(weight_t != int8_t && weight_zp != 0);
 for_each(0 <= n < N, 0 <= oc < OC) {
     acc_t acc = 0;
     for_each(0 <= ic < IC) {
@@ -398,7 +398,7 @@ Performs two dimensional matrix multiplications. This allows both inputs to be a
 
 [source,c++]
 ----
-REQUIRE(in_t == int8_t || (A_zp == 0 && B_zp == 0)); // Zero point only for int8_t
+ERROR_IF(in_t != int8_t && (A_zp != 0 || B_zp != 0)); // Zero point only for int8_t
 for_each(0 <= n < N, 0 <= h < H, 0 <= w < W) {
     acc_t acc = 0;
     for_each(0 <= c < C) {
@@ -499,8 +499,8 @@ Performs a 2D transposed convolution over the given tensor input, using the weig
 
 [source,c++]
 ----
-REQUIRE(in_t == int8_t  || input_zp == 0); // Zero point only allowed for int8_t
-REQUIRE(weight_t == int8_t || weight_zp == 0);
+ERROR_IF(in_t != int8_t  && input_zp != 0); // Zero point only allowed for int8_t
+ERROR_IF(weight_t != int8_t && weight_zp != 0);
 for_each(index in out_shape) {
     tensor_write<acc_t>(output, [N,OH,OW,OC], index, bias[index[3]])
 }
diff --git a/chapters/type_conversion.adoc b/chapters/type_conversion.adoc
index 35cebe7..b4d372d 100644
--- a/chapters/type_conversion.adoc
+++ b/chapters/type_conversion.adoc
@@ -93,7 +93,7 @@ Rescale quantized values into a new domain. This function scales by factor: mult
 |Attribute|out_t|output_zp|-|Output tensor zero point
 |Input (MT profile) Attribute (BI/MI profiles)|mul_t|multiplier[NC]|-|Scaling multiplier array
 |Input (MT profile) Attribute (BI/MI profiles)|uint6_t|shift[NC] |-|Scaling shift array
-|Input (MT profile) Attribute (BI/MI profiles)|bool_t|scale32|-|if (scale32) mul_t=int32_t else mul_t=int16_t
+|Attribute|bool_t|scale32|-|if (scale32) mul_t=int32_t else mul_t=int16_t
 |Attribute|bool_t|double_round|-|Select double round mode
 |Attribute|bool_t|per_channel|-|if (per_channel) NC=shape[dims-1] else NC=1
 |===
@@ -103,9 +103,10 @@ Rescale quantized values into a new domain. This function scales by factor: mult
 [source,c++]
 ----
 for_each(index in shape) {
-    REQUIRE(in_t == int8_t  || in_t == uint8_t  || input_zp == 0);
-    REQUIRE(out_t == int8_t || out_t == uint8_t || output_zp == 0);
-    REQUIRE((scale32 && in_t != int48_t_t) || (!scale32 && !double_round));
+    ERROR_IF(in_t != int8_t && in_t != uint8_t && input_zp != 0);
+    ERROR_IF(out_t != int8_t && out_t != uint8_t && output_zp != 0);
+    ERROR_IF(scale32 && int_t == int48_t);
+    ERROR_IF(!scale32 && double_round);
     int48_t value = tensor_read<in_t>(input, shape, index, input_zp);
     int c = (per_channel) ? index[dims-1] : 0;
     int32_t result = (scale32) ?
-- 
cgit v1.2.1