COMPMID-411 - Ported CLGEMMInterleave4x4Kernel and CLGEMMTranspose1xWKernel to support 8 bit fixed point

Change-Id: If236c9047ed536e808a0ed26e97e1799ca938e03 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/78529 Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Moritz Pflanzer <moritz.pflanzer@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
author: Gian Marco Iodice <gianmarco.iodice@arm.com> 2017-06-22 12:09:49 +0100
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-09-17 14:14:20 +0100
commit: 9f89baebb81e6a01ec06fe916564da45eb204f34 (patch)
tree: edd21a058a2701f0e89456717e10011ab44d473a /arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h
parent: 10c672c2e21bb77b7234d9d3611267400dce7ae0 (diff)
download: ComputeLibrary-9f89baebb81e6a01ec06fe916564da45eb204f34.tar.gz
1 files changed, 3 insertions, 5 deletions
diff --git a/arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h b/arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h
index 8d44a4c4fa..9657a2af45 100644
--- a/arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h
@@ -30,7 +30,7 @@ namespace arm_compute
 {
 class ICLTensor;
 
-/** OpenCL kernel which transposes the elements of a matrix in chunks of 1x4 if the input data type is F32 or in chunks of 1x8 if the input data type is F16.
+/** OpenCL kernel which transposes the elements of a matrix in chunks of 1xW, where W is equal to (16 / element size of the tensor)
  *
  * Following an example of how the transposition1xW works when the input data type is F32
  *
@@ -62,9 +62,7 @@ class ICLTensor;
  * \end{array} \right)
  * @f]
  *
- * @note If the input data type is F32, the output matrix will have the following shape: [ height * 4, width / 4 ]
- * @note If the input data type is F16, the output matrix will have the following shape: [ height * 8, width / 8 ]
- * @note If the input data type is U8, the output matrix will have the following shape: [ height * 16, width / 16 ]
+ * @note The output matrix will have the following shape: [ height * W, ceil(width / W) ], where W = (16 / element size of the tensor)
  *
  */
 class CLGEMMTranspose1xWKernel : public ICLSimple2DKernel
@@ -72,7 +70,7 @@ class CLGEMMTranspose1xWKernel : public ICLSimple2DKernel
 public:
     /** Initialise the kernel's input and output.
      *
-     * @param[in]  input  Input tensor. Data types supported: U8/F16/F32
+     * @param[in]  input  Input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32
      * @param[out] output Output tensor. Data type supported: same as @p input
      */
     void configure(const ICLTensor *input, ICLTensor *output);
author	Gian Marco Iodice <gianmarco.iodice@arm.com>	2017-06-22 12:09:49 +0100
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-09-17 14:14:20 +0100
commit	9f89baebb81e6a01ec06fe916564da45eb204f34 (patch)
tree	edd21a058a2701f0e89456717e10011ab44d473a /arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h
parent	10c672c2e21bb77b7234d9d3611267400dce7ae0 (diff)
download	ComputeLibrary-9f89baebb81e6a01ec06fe916564da45eb204f34.tar.gz