1 files changed, 180 insertions, 47 deletions
diff --git a/docs/user_guide/operator_list.dox b/docs/user_guide/operator_list.dox
index 05cc892d40..e7f1823f8b 100644
--- a/docs/user_guide/operator_list.dox
+++ b/docs/user_guide/operator_list.dox
@@ -1,5 +1,5 @@
 ///
-/// Copyright (c) 2021 Arm Limited.
+/// Copyright (c) 2021-2024 Arm Limited.
 ///
 /// SPDX-License-Identifier: MIT
 ///
@@ -52,9 +52,10 @@ Compute Library supports the following data layouts (fast changing dimension fro
   <ul>
     <li>NHWC: The native layout of Compute Library that delivers the best performance where channels are in the fastest changing dimension
     <li>NCHW: Legacy layout where width is in the fastest changing dimension
+    <li>NDHWC: New data layout for supporting 3D operators
     <li>All: Agnostic to any specific data layout
   </ul>
-where N = batches, C = channels, H = height, W = width
+where N = batches, C = channels, H = height, W = width, D = depth
 
 <table>
 <caption id="multi_row"></caption>
@@ -108,6 +109,26 @@ where N = batches, C = channels, H = height, W = width
     <tr><td>F32<td>F32
     </table>
 <tr>
+  <td rowspan="1">AddMulAdd
+  <td rowspan="1" style="width:200px;"> Performs a fused Add + Mul + Add [+ Relu-based-Activation] operation.
+  <td rowspan="1">
+      <ul>
+       <li>n/a
+      </ul>
+  <td>NEAddMulAdd
+  <td>
+      <ul>
+       <li>Any
+      </ul>
+  <td>
+    <table>
+    <tr><th>input1<th>input2<th>bn_mul<th>bn_add<th>add_output<th>final_output
+    <tr><td>QASYMM8<td>QASYMM8<td>QASYMM8<td>QASYMM8<td>QASYMM8<td>QASYMM8
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+    <tr><td>F16<td>F16<td>F16<td>F16<td>F16<td>F16
+    <tr><td>F32<td>F32<td>F32<td>F32<td>F32<td>F32
+    </table>
+<tr>
   <td rowspan="2">ArgMinMaxLayer
   <td rowspan="2" style="width:200px;"> Function to calculate the index of the minimum or maximum values in a tensor based on an axis.
   <td rowspan="2">
@@ -125,7 +146,7 @@ where N = batches, C = channels, H = height, W = width
     <tr><th>src<th>dst
     <tr><td>QASYMM8<td>U32, S32
     <tr><td>QASYMM8_SIGNED<td>U32, S32
-    <tr><td>S32<td>U32, S32
+    <tr><td>S32<td>U32, S32, S64
     <tr><td>F16<td>U32, S32
     <tr><td>F32<td>U32, S32
     </table>
@@ -164,9 +185,6 @@ where N = batches, C = channels, H = height, W = width
     <tr><td>QSYMM16<td>QSYMM16<td>QASYMM16
     <tr><td>QSYMM16<td>QSYMM16<td>S32
     <tr><td>U8<td>U8<td>U8
-    <tr><td>U8<td>U8<td>S16
-    <tr><td>U8<td>S16<td>S16
-    <tr><td>S16<td>U8<td>S16
     <tr><td>S16<td>S16<td>S16
     <tr><td>S32<td>S32<td>S32
     <tr><td>F16<td>F16<td>F16
@@ -192,9 +210,6 @@ where N = batches, C = channels, H = height, W = width
     <tr><td>QSYMM16<td>QSYMM16<td>QASYMM16
     <tr><td>QSYMM16<td>QSYMM16<td>S32
     <tr><td>U8<td>U8<td>U8
-    <tr><td>U8<td>U8<td>S16
-    <tr><td>U8<td>S16<td>S16
-    <tr><td>S16<td>U8<td>S16
     <tr><td>S16<td>S16<td>S16
     <tr><td>S32<td>S32<td>S32
     <tr><td>F16<td>F16<td>F16
@@ -442,12 +457,15 @@ where N = batches, C = channels, H = height, W = width
     <table>
     <tr><th>src<th>dst
     <tr><td>U8<td>S8, U16, S16, U32, S32, F16, F32
+    <tr><td>S8<td>U8, U16, S16, U32, S32, F16, F32
     <tr><td>U16<td>U8, S8, S16, U32, S32, F16, F32
     <tr><td>S16<td>U8, S8, U16, U32, S32, F16, F32
     <tr><td>U32<td>U8, S8, U16, S16, S32, F16, F32
     <tr><td>S32<td>U8, S8, U16, S16, U32, F16, F32
-    <tr><td>F16<td>U8, S8, U16, S16, U32, F32
-    <tr><td>F32<td>U8, S8, U16, S16, U32, F16
+    <tr><td>U64<td>U8, S8, U16, S16, U32, S32, F16, F32
+    <tr><td>S64<td>U8, S8, U16, S16, U32, S32, F16, F32
+    <tr><td>F16<td>U8, S8, U16, S16, S32, U32, F32
+    <tr><td>F32<td>U8, S8, U16, S16, S32, U32, F16
     </table>
 <tr>
   <td rowspan="2">ChannelShuffleLayer
@@ -460,6 +478,7 @@ where N = batches, C = channels, H = height, W = width
   <td>
       <ul>
        <li>NCHW
+       <li>NHWC
       </ul>
   <td>
     <table>
@@ -471,6 +490,7 @@ where N = batches, C = channels, H = height, W = width
   <td>
       <ul>
        <li>NCHW
+       <li>NHWC
       </ul>
   <td>
     <table>
@@ -604,6 +624,40 @@ where N = batches, C = channels, H = height, W = width
     <tr><td>QASYMM8_SIGNED<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8_SIGNED
     </table>
 <tr>
+  <td rowspan="2">Conv3D
+  <td rowspan="2" style="width:200px;"> Function to compute a 3d convolution layer.
+  <td rowspan="2">
+      <ul>
+       <li>ANEURALNETWORKS_CONV_3D
+      </ul>
+  <td>NEConv3D
+  <td>
+      <ul>
+       <li>NDHWC
+      </ul>
+  <td>
+    <table>
+    <tr><th>src0<th>src1<th>src2<th>dst
+    <tr><td>F16<td>F16<td>F16<td>F16
+    <tr><td>F32<td>F32<td>F32<td>F32
+    <tr><td>QASYMM8<td>QASYMM8<td>S32<td>QASYMM8
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>QASYMM8_SIGNED
+    </table>
+<tr>
+  <td>CLConv3D
+  <td>
+      <ul>
+       <li>NDHWC
+      </ul>
+  <td>
+    <table>
+    <tr><th>src0<th>src1<th>src2<th>dst
+    <tr><td>F16<td>F16<td>F16<td>F16
+    <tr><td>F32<td>F32<td>F32<td>F32
+    <tr><td>QASYMM8<td>QASYMM8<td>S32<td>QASYMM8
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>QASYMM8_SIGNED
+    </table>
+<tr>
   <td rowspan="2">Copy
   <td rowspan="2" style="width:200px;"> Function to copy a tensor.
   <td rowspan="2">
@@ -1408,9 +1462,9 @@ where N = batches, C = channels, H = height, W = width
     <tr><td>All<td>All
     </table>
 <tr>
-  <td rowspan="2">FillBorder
-  <td rowspan="2" style="width:200px;"> Function to fill the borders within the XY-planes.
-  <td rowspan="2">
+  <td rowspan="1">FillBorder
+  <td rowspan="1" style="width:200px;"> Function to fill the borders within the XY-planes.
+  <td rowspan="1">
       <ul>
        <li>n/a
       </ul>
@@ -1425,17 +1479,6 @@ where N = batches, C = channels, H = height, W = width
     <tr><td>All<td>All
     </table>
 <tr>
-  <td>CLFillBorder
-  <td>
-      <ul>
-       <li>All
-      </ul>
-  <td>
-    <table>
-    <tr><th>src<th>dst
-    <tr><td>All<td>All
-    </table>
-<tr>
   <td rowspan="2">FlattenLayer
   <td rowspan="2" style="width:200px;"> Reshape a tensor to be 1D
   <td rowspan="2">
@@ -1730,6 +1773,7 @@ where N = batches, C = channels, H = height, W = width
     <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>S32
     <tr><td>QASYMM8_SIGNED<td>QSYMM8_PER_CHANNEL<td>S32<td>S32
     <tr><td>QASYMM8_SIGNED<td>QSYMM8<td>S32<td>S32
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>F32<td>F32
     </table>
 <tr>
   <td>CLGEMMLowpMatrixMultiplyCore
@@ -2032,6 +2076,41 @@ where N = batches, C = channels, H = height, W = width
     <tr><td>QASYMM8<td>S32<td>QSYMM16<td>QASYMM8<td>QSYMM16<td>QASYMM8
     </table>
 <tr>
+  <td rowspan="2">MatMul
+  <td rowspan="2" style="width:200px;"> Computes a matrix multiplication in batches.
+  <td rowspan="2">
+      <ul>
+       <li>ANEURALNETWORKS_BATCH_MATMUL
+      </ul>
+  <td>NEMatMul
+  <td>
+      <ul>
+       <li>Any
+      </ul>
+  <td>
+    <table>
+    <tr><th>lhs<th>rhs<th>dst
+    <tr><td>F32<td>F32<td>F32
+    <tr><td>F16<td>F16<td>F16
+    <tr><td>BFLOAT16<td>BFLOAT16<td>BFLOAT16
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+    <tr><td>QASYMM8<td>QASYMM8<td>QASYMM8
+    </table>
+<tr>
+  <td>CLMatMul
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>lhs<th>rhs<th>dst
+    <tr><td>F32<td>F32<td>F32
+    <tr><td>F16<td>F16<td>F16
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+    <tr><td>QASYMM8<td>QASYMM8<td>QASYMM8
+    </table>
+<tr>
   <td rowspan="2">MaxUnpoolingLayer
   <td rowspan="2" style="width:200px;"> Function to perform MaxUnpooling.
   <td rowspan="2">
@@ -2132,6 +2211,27 @@ where N = batches, C = channels, H = height, W = width
     <tr><td>F16<td>F16
     </table>
 <tr>
+  <td rowspan="1">NormalizePlanarYUVLayer
+  <td rowspan="1" style="width:200px;"> Function to compute normalization planar YUV layer.
+  <td rowspan="1">
+      <ul>
+       <li>n/a
+      </ul>
+  <td>CLNormalizePlanarYUVLayer
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>F32<td>F32
+    <tr><td>F16<td>F16
+    <tr><td>QASYMM8<td>QASYMM8
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+    </table>
+<tr>
   <td rowspan="2">PadLayer
   <td rowspan="2" style="width:200px;"> Function to pad a tensor.
   <td rowspan="2">
@@ -2280,6 +2380,40 @@ where N = batches, C = channels, H = height, W = width
     <tr><td>F32<td>F32
     </table>
 <tr>
+  <td rowspan="2">Pooling3dLayer
+  <td rowspan="2" style="width:200px;"> Function to perform pooling 3D with the specified pooling operation.
+  <td rowspan="2">
+      <ul>
+       <li>N/A
+      </ul>
+  <td>NEPooling3dLayer
+  <td>
+      <ul>
+       <li>NDHWC
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>F16<td>F16
+    <tr><td>F32<td>F32
+    <tr><td>QASYMM8<td>QASYMM8
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+    </table>
+<tr>
+  <td>CLPooling3dLayer
+  <td>
+      <ul>
+       <li>NDHWC
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>F16<td>F16
+    <tr><td>F32<td>F32
+    <tr><td>QASYMM8<td>QASYMM8
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+    </table>
+<tr>
   <td rowspan="2">PReluLayer
   <td rowspan="2" style="width:200px;"> Function to compute the activation layer with the PRELU activation function.
   <td rowspan="2">
@@ -2525,6 +2659,23 @@ where N = batches, C = channels, H = height, W = width
     <tr><td>S32<td>S32
     </table>
 <tr>
+  <td rowspan="1">ReorderLayer
+  <td rowspan="1" style="width:200px;"> Reorders a tensor to a different weights format.
+  <td rowspan="1">
+      <ul>
+       <li>n/a
+      </ul>
+  <td>NEReorderLayer
+  <td>
+      <ul>
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>F32<td>F32
+    </table>
+<tr>
   <td rowspan="2">ReorgLayer
   <td rowspan="2" style="width:200px;"> Performs a reorganization layer of input tensor to the output tensor.
   <td rowspan="2">
@@ -2598,7 +2749,7 @@ where N = batches, C = channels, H = height, W = width
   <td>
     <table>
     <tr><th>src0<th>src1<th>dst
-    <tr><td>All<td>U32<td>All
+    <tr><td>All<td>U32, S32<td>All
     </table>
 <tr>
   <td>CLReverse
@@ -2609,7 +2760,7 @@ where N = batches, C = channels, H = height, W = width
   <td>
     <table>
     <tr><th>src0<th>src1<th>dst
-    <tr><td>All<td>U32<td>All
+    <tr><td>All<td>U32, S32<td>All
     </table>
 <tr>
   <td rowspan="2">RNNLayer
@@ -2730,6 +2881,7 @@ where N = batches, C = channels, H = height, W = width
     <tr><td>F16<td>F16
     <tr><td>F32<td>F32
     <tr><td>U8<td>U8
+    <tr><td>S8<td>S8
     <tr><td>S16<td>S16
     </table>
 <tr>
@@ -3100,26 +3252,7 @@ where N = batches, C = channels, H = height, W = width
     <tr><td>F16<td>F16<td>F16<td>F16
     <tr><td>F32<td>F32<td>F32<td>F32
     </table>
-<tr>
-  <td rowspan="1">WinogradInputTransform
-  <td rowspan="1" style="width:200px;"> Function to perform a Winograd transform on the input tensor.
-  <td rowspan="1">
-      <ul>
-       <li>n/a
-      </ul>
-  <td>CLWinogradInputTransform
-  <td>
-      <ul>
-       <li>NHWC
-       <li>NCHW
-      </ul>
-  <td>
-    <table>
-    <tr><th>src<th>dst
-    <tr><td>F16<td>F16
-    <tr><td>F32<td>F32
-    </table>
 </table>
 
 */
-} // namespace
-\ No newline at end of file
+} // namespace