From bdb6b0bb156588dc39fd5084d4c91d05b5148610 Mon Sep 17 00:00:00 2001
From: Gian Marco Iodice <gianmarco.iodice@arm.com>
Date: Fri, 30 Jun 2017 12:21:00 +0100
Subject: COMPMID-433 - Port NEGEMM to support 16 bit fixed point

Change-Id: I82de74d7027bbc8a00a4d6671e968785280d5f6c
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79498
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Moritz Pflanzer <moritz.pflanzer@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
---
 arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h  | 4 ++--
 arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h | 4 ++--
 arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h | 2 +-
 arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h   | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'arm_compute/core/NEON/kernels')

diff --git a/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h b/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h
index b9884ffb57..84b82d0ffc 100644
--- a/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h
+++ b/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h
@@ -56,7 +56,7 @@ public:
     NEGEMMInterleave4x4Kernel();
     /** Initialise the kernel's input and output.
      *
-     * @param[in]  input  Input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32
+     * @param[in]  input  Input tensor. Data types supported: U8/S8/QS8/QS16/U16/S16/F16/U32/S32/F32
      * @param[out] output Output tensor which stores the interleaved matrix. Data type supported: same as @p input.
      */
     void configure(const ITensor *input, ITensor *output);
@@ -67,7 +67,7 @@ public:
 private:
     /** Common signature for all the transpose functions
      *
-     * @param[in]  input  An input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32
+     * @param[in]  input  An input tensor. Data types supported: U8/S8/QS8/U16/S16/QS16/F16/U32/S32/F32
      * @param[out] output The output tensor. Data type supported: same as @p input
      * @param[in]  window Region on which to execute the kernel.
      */
diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h
index 1ab52fa2f2..5cdcc95ee9 100644
--- a/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h
+++ b/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h
@@ -55,7 +55,7 @@ public:
      *
      * @note The input and output tensor must have the same dimensions
      *
-     * @param[in]      input  Input tensor (Matrix C). Data types supported: QS8/F16/F32
+     * @param[in]      input  Input tensor (Matrix C). Data types supported: QS8/QS16/F16/F32
      * @param[in, out] output Output tensor. If this kernel is used to finalize the GEMM result, output contains the result obtained by the kernel @ref NEGEMMMatrixMultiplyKernel. Data type supported: the same as @p input.
      * @param[in]      beta   Weight of matrix C
      */
@@ -67,7 +67,7 @@ public:
 private:
     /** Common signature for all the matrix addition functions
      *
-     * @param[in]  input  An input tensor. Data types supported: QS8/F16/F32
+     * @param[in]  input  An input tensor. Data types supported: QS8/QS16/F16/F32
      * @param[out] output The output tensor. Data type supported: same as @p input
      * @param[in]  window Region on which to execute the kernel.
      * @param[in]  beta   Weight of matrix C
diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h
index a684945828..e82fc6f5d7 100644
--- a/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h
+++ b/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h
@@ -54,7 +54,7 @@ public:
      * @note If the output tensor is a matrix, the input matrices @p input0 and @p input1 should be the output of the kernels: @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel
      *       These two kernels change the layout of the original matrices to be more cache-friendly.
      *
-     * @param[in]  input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F16/F32
+     * @param[in]  input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: QS8/QS16/F16/F32
      * @param[in]  input1 Input tensor containing the transposed Matrix B if the first input tensor A is not a vector.
      *                    If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0
      * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0.
diff --git a/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h b/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h
index 5d8a3697cb..22c07e5c9a 100644
--- a/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h
+++ b/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h
@@ -70,7 +70,7 @@ class NEGEMMTranspose1xWKernel : public INESimpleKernel
 public:
     /** Initialise the kernel's input and output.
      *
-     * @param[in]  input  Input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32
+     * @param[in]  input  Input tensor. Data types supported: U8/S8/QS8/U16/S16/QS16/F16/U32/S32/F32
      * @param[out] output Output tensor. Data type supported: same as @p input.
      */
     void configure(const ITensor *input, ITensor *output);
-- 
cgit v1.2.1