COMPMID-433 - Port NEGEMM to support 16 bit fixed point

Change-Id: I82de74d7027bbc8a00a4d6671e968785280d5f6c Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79498 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Moritz Pflanzer <moritz.pflanzer@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
author: Gian Marco Iodice <gianmarco.iodice@arm.com> 2017-06-30 12:21:00 +0100
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-09-17 14:15:39 +0100
commit: bdb6b0bb156588dc39fd5084d4c91d05b5148610 (patch)
tree: bb3c3645dd9abbf20462dace7828bb7ec459dc4d /arm_compute
parent: ac69aa137e360340fe9f148f019d93af6c3d8336 (diff)
download: ComputeLibrary-bdb6b0bb156588dc39fd5084d4c91d05b5148610.tar.gz
5 files changed, 7 insertions, 7 deletions
diff --git a/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h b/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h
index b9884ffb57..84b82d0ffc 100644
--- a/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h
+++ b/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h
@@ -56,7 +56,7 @@ public:
     NEGEMMInterleave4x4Kernel();
     /** Initialise the kernel's input and output.
      *
-     * @param[in]  input  Input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32
+     * @param[in]  input  Input tensor. Data types supported: U8/S8/QS8/QS16/U16/S16/F16/U32/S32/F32
      * @param[out] output Output tensor which stores the interleaved matrix. Data type supported: same as @p input.
      */
     void configure(const ITensor *input, ITensor *output);
@@ -67,7 +67,7 @@ public:
 private:
     /** Common signature for all the transpose functions
      *
-     * @param[in]  input  An input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32
+     * @param[in]  input  An input tensor. Data types supported: U8/S8/QS8/U16/S16/QS16/F16/U32/S32/F32
      * @param[out] output The output tensor. Data type supported: same as @p input
      * @param[in]  window Region on which to execute the kernel.
      */
diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h
index 1ab52fa2f2..5cdcc95ee9 100644
--- a/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h
+++ b/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h
@@ -55,7 +55,7 @@ public:
      *
      * @note The input and output tensor must have the same dimensions
      *
-     * @param[in]      input  Input tensor (Matrix C). Data types supported: QS8/F16/F32
+     * @param[in]      input  Input tensor (Matrix C). Data types supported: QS8/QS16/F16/F32
      * @param[in, out] output Output tensor. If this kernel is used to finalize the GEMM result, output contains the result obtained by the kernel @ref NEGEMMMatrixMultiplyKernel. Data type supported: the same as @p input.
      * @param[in]      beta   Weight of matrix C
      */
@@ -67,7 +67,7 @@ public:
 private:
     /** Common signature for all the matrix addition functions
      *
-     * @param[in]  input  An input tensor. Data types supported: QS8/F16/F32
+     * @param[in]  input  An input tensor. Data types supported: QS8/QS16/F16/F32
      * @param[out] output The output tensor. Data type supported: same as @p input
      * @param[in]  window Region on which to execute the kernel.
      * @param[in]  beta   Weight of matrix C
diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h
index a684945828..e82fc6f5d7 100644
--- a/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h
+++ b/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h
@@ -54,7 +54,7 @@ public:
      * @note If the output tensor is a matrix, the input matrices @p input0 and @p input1 should be the output of the kernels: @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel
      *       These two kernels change the layout of the original matrices to be more cache-friendly.
      *
-     * @param[in]  input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F16/F32
+     * @param[in]  input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: QS8/QS16/F16/F32
      * @param[in]  input1 Input tensor containing the transposed Matrix B if the first input tensor A is not a vector.
      *                    If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0
      * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0.
diff --git a/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h b/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h
index 5d8a3697cb..22c07e5c9a 100644
--- a/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h
+++ b/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h
@@ -70,7 +70,7 @@ class NEGEMMTranspose1xWKernel : public INESimpleKernel
 public:
     /** Initialise the kernel's input and output.
      *
-     * @param[in]  input  Input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32
+     * @param[in]  input  Input tensor. Data types supported: U8/S8/QS8/U16/S16/QS16/F16/U32/S32/F32
      * @param[out] output Output tensor. Data type supported: same as @p input.
      */
     void configure(const ITensor *input, ITensor *output);
diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h
index a40aa910a5..3c8d7cf9b7 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMM.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMM.h
@@ -52,7 +52,7 @@ public:
      * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C].
      * @note GEMM: The tensors a, b, c, d must have the same data type. You should not mix data types when calling this function.
      *
-     * @param[in]  a     First input tensor  (Matrix A or Vector A). Data type supported: QS8/F16/F32
+     * @param[in]  a     First input tensor  (Matrix A or Vector A). Data type supported: QS8/QS16/F16/F32
      * @param[in]  b     Second input tensor (Matrix B). Data type supported: same as @p a
      * @param[in]  c     Third input tensor  (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a
      * @param[out] d     Output tensor. Data type supported: same as @p a
author	Gian Marco Iodice <gianmarco.iodice@arm.com>	2017-06-30 12:21:00 +0100
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-09-17 14:15:39 +0100
commit	bdb6b0bb156588dc39fd5084d4c91d05b5148610 (patch)
tree	bb3c3645dd9abbf20462dace7828bb7ec459dc4d /arm_compute
parent	ac69aa137e360340fe9f148f019d93af6c3d8336 (diff)
download	ComputeLibrary-bdb6b0bb156588dc39fd5084d4c91d05b5148610.tar.gz