From bdb6b0bb156588dc39fd5084d4c91d05b5148610 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Fri, 30 Jun 2017 12:21:00 +0100 Subject: COMPMID-433 - Port NEGEMM to support 16 bit fixed point Change-Id: I82de74d7027bbc8a00a4d6671e968785280d5f6c Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79498 Reviewed-by: Georgios Pinitas Tested-by: Kaizen Reviewed-by: Moritz Pflanzer Reviewed-by: Anthony Barbier --- arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h | 4 ++-- arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h | 4 ++-- arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h | 2 +- arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arm_compute/core/NEON/kernels') diff --git a/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h b/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h index b9884ffb57..84b82d0ffc 100644 --- a/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h +++ b/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h @@ -56,7 +56,7 @@ public: NEGEMMInterleave4x4Kernel(); /** Initialise the kernel's input and output. * - * @param[in] input Input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32 + * @param[in] input Input tensor. Data types supported: U8/S8/QS8/QS16/U16/S16/F16/U32/S32/F32 * @param[out] output Output tensor which stores the interleaved matrix. Data type supported: same as @p input. */ void configure(const ITensor *input, ITensor *output); @@ -67,7 +67,7 @@ public: private: /** Common signature for all the transpose functions * - * @param[in] input An input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32 + * @param[in] input An input tensor. Data types supported: U8/S8/QS8/U16/S16/QS16/F16/U32/S32/F32 * @param[out] output The output tensor. Data type supported: same as @p input * @param[in] window Region on which to execute the kernel. */ diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h index 1ab52fa2f2..5cdcc95ee9 100644 --- a/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h +++ b/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h @@ -55,7 +55,7 @@ public: * * @note The input and output tensor must have the same dimensions * - * @param[in] input Input tensor (Matrix C). Data types supported: QS8/F16/F32 + * @param[in] input Input tensor (Matrix C). Data types supported: QS8/QS16/F16/F32 * @param[in, out] output Output tensor. If this kernel is used to finalize the GEMM result, output contains the result obtained by the kernel @ref NEGEMMMatrixMultiplyKernel. Data type supported: the same as @p input. * @param[in] beta Weight of matrix C */ @@ -67,7 +67,7 @@ public: private: /** Common signature for all the matrix addition functions * - * @param[in] input An input tensor. Data types supported: QS8/F16/F32 + * @param[in] input An input tensor. Data types supported: QS8/QS16/F16/F32 * @param[out] output The output tensor. Data type supported: same as @p input * @param[in] window Region on which to execute the kernel. * @param[in] beta Weight of matrix C diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h index a684945828..e82fc6f5d7 100644 --- a/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h +++ b/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h @@ -54,7 +54,7 @@ public: * @note If the output tensor is a matrix, the input matrices @p input0 and @p input1 should be the output of the kernels: @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel * These two kernels change the layout of the original matrices to be more cache-friendly. * - * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F16/F32 + * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: QS8/QS16/F16/F32 * @param[in] input1 Input tensor containing the transposed Matrix B if the first input tensor A is not a vector. * If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0 * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. diff --git a/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h b/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h index 5d8a3697cb..22c07e5c9a 100644 --- a/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h +++ b/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h @@ -70,7 +70,7 @@ class NEGEMMTranspose1xWKernel : public INESimpleKernel public: /** Initialise the kernel's input and output. * - * @param[in] input Input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32 + * @param[in] input Input tensor. Data types supported: U8/S8/QS8/U16/S16/QS16/F16/U32/S32/F32 * @param[out] output Output tensor. Data type supported: same as @p input. */ void configure(const ITensor *input, ITensor *output); -- cgit v1.2.1