aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2017-06-30 12:21:00 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-09-17 14:15:39 +0100
commitbdb6b0bb156588dc39fd5084d4c91d05b5148610 (patch)
treebb3c3645dd9abbf20462dace7828bb7ec459dc4d /arm_compute
parentac69aa137e360340fe9f148f019d93af6c3d8336 (diff)
downloadComputeLibrary-bdb6b0bb156588dc39fd5084d4c91d05b5148610.tar.gz
COMPMID-433 - Port NEGEMM to support 16 bit fixed point
Change-Id: I82de74d7027bbc8a00a4d6671e968785280d5f6c Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79498 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Moritz Pflanzer <moritz.pflanzer@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h4
-rw-r--r--arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h4
-rw-r--r--arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h2
-rw-r--r--arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h2
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMM.h2
5 files changed, 7 insertions, 7 deletions
diff --git a/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h b/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h
index b9884ffb57..84b82d0ffc 100644
--- a/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h
+++ b/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h
@@ -56,7 +56,7 @@ public:
NEGEMMInterleave4x4Kernel();
/** Initialise the kernel's input and output.
*
- * @param[in] input Input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32
+ * @param[in] input Input tensor. Data types supported: U8/S8/QS8/QS16/U16/S16/F16/U32/S32/F32
* @param[out] output Output tensor which stores the interleaved matrix. Data type supported: same as @p input.
*/
void configure(const ITensor *input, ITensor *output);
@@ -67,7 +67,7 @@ public:
private:
/** Common signature for all the transpose functions
*
- * @param[in] input An input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32
+ * @param[in] input An input tensor. Data types supported: U8/S8/QS8/U16/S16/QS16/F16/U32/S32/F32
* @param[out] output The output tensor. Data type supported: same as @p input
* @param[in] window Region on which to execute the kernel.
*/
diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h
index 1ab52fa2f2..5cdcc95ee9 100644
--- a/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h
+++ b/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h
@@ -55,7 +55,7 @@ public:
*
* @note The input and output tensor must have the same dimensions
*
- * @param[in] input Input tensor (Matrix C). Data types supported: QS8/F16/F32
+ * @param[in] input Input tensor (Matrix C). Data types supported: QS8/QS16/F16/F32
* @param[in, out] output Output tensor. If this kernel is used to finalize the GEMM result, output contains the result obtained by the kernel @ref NEGEMMMatrixMultiplyKernel. Data type supported: the same as @p input.
* @param[in] beta Weight of matrix C
*/
@@ -67,7 +67,7 @@ public:
private:
/** Common signature for all the matrix addition functions
*
- * @param[in] input An input tensor. Data types supported: QS8/F16/F32
+ * @param[in] input An input tensor. Data types supported: QS8/QS16/F16/F32
* @param[out] output The output tensor. Data type supported: same as @p input
* @param[in] window Region on which to execute the kernel.
* @param[in] beta Weight of matrix C
diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h
index a684945828..e82fc6f5d7 100644
--- a/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h
+++ b/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h
@@ -54,7 +54,7 @@ public:
* @note If the output tensor is a matrix, the input matrices @p input0 and @p input1 should be the output of the kernels: @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel
* These two kernels change the layout of the original matrices to be more cache-friendly.
*
- * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F16/F32
+ * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: QS8/QS16/F16/F32
* @param[in] input1 Input tensor containing the transposed Matrix B if the first input tensor A is not a vector.
* If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0
* @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0.
diff --git a/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h b/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h
index 5d8a3697cb..22c07e5c9a 100644
--- a/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h
+++ b/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h
@@ -70,7 +70,7 @@ class NEGEMMTranspose1xWKernel : public INESimpleKernel
public:
/** Initialise the kernel's input and output.
*
- * @param[in] input Input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32
+ * @param[in] input Input tensor. Data types supported: U8/S8/QS8/U16/S16/QS16/F16/U32/S32/F32
* @param[out] output Output tensor. Data type supported: same as @p input.
*/
void configure(const ITensor *input, ITensor *output);
diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h
index a40aa910a5..3c8d7cf9b7 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMM.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMM.h
@@ -52,7 +52,7 @@ public:
* @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C].
* @note GEMM: The tensors a, b, c, d must have the same data type. You should not mix data types when calling this function.
*
- * @param[in] a First input tensor (Matrix A or Vector A). Data type supported: QS8/F16/F32
+ * @param[in] a First input tensor (Matrix A or Vector A). Data type supported: QS8/QS16/F16/F32
* @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a
* @param[in] c Third input tensor (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a
* @param[out] d Output tensor. Data type supported: same as @p a