diff options
author | Gian Marco Iodice <gianmarco.iodice@arm.com> | 2017-06-22 12:09:49 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-09-17 14:14:20 +0100 |
commit | 9f89baebb81e6a01ec06fe916564da45eb204f34 (patch) | |
tree | edd21a058a2701f0e89456717e10011ab44d473a /arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h | |
parent | 10c672c2e21bb77b7234d9d3611267400dce7ae0 (diff) | |
download | ComputeLibrary-9f89baebb81e6a01ec06fe916564da45eb204f34.tar.gz |
COMPMID-411 - Ported CLGEMMInterleave4x4Kernel and CLGEMMTranspose1xWKernel to support 8 bit fixed point
Change-Id: If236c9047ed536e808a0ed26e97e1799ca938e03
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/78529
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Moritz Pflanzer <moritz.pflanzer@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h')
-rw-r--r-- | arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h | 8 |
1 files changed, 3 insertions, 5 deletions
diff --git a/arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h b/arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h index 8d44a4c4fa..9657a2af45 100644 --- a/arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h +++ b/arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h @@ -30,7 +30,7 @@ namespace arm_compute { class ICLTensor; -/** OpenCL kernel which transposes the elements of a matrix in chunks of 1x4 if the input data type is F32 or in chunks of 1x8 if the input data type is F16. +/** OpenCL kernel which transposes the elements of a matrix in chunks of 1xW, where W is equal to (16 / element size of the tensor) * * Following an example of how the transposition1xW works when the input data type is F32 * @@ -62,9 +62,7 @@ class ICLTensor; * \end{array} \right) * @f] * - * @note If the input data type is F32, the output matrix will have the following shape: [ height * 4, width / 4 ] - * @note If the input data type is F16, the output matrix will have the following shape: [ height * 8, width / 8 ] - * @note If the input data type is U8, the output matrix will have the following shape: [ height * 16, width / 16 ] + * @note The output matrix will have the following shape: [ height * W, ceil(width / W) ], where W = (16 / element size of the tensor) * */ class CLGEMMTranspose1xWKernel : public ICLSimple2DKernel @@ -72,7 +70,7 @@ class CLGEMMTranspose1xWKernel : public ICLSimple2DKernel public: /** Initialise the kernel's input and output. * - * @param[in] input Input tensor. Data types supported: U8/F16/F32 + * @param[in] input Input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32 * @param[out] output Output tensor. Data type supported: same as @p input */ void configure(const ICLTensor *input, ICLTensor *output); |