aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2017-06-22 12:09:49 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-09-17 14:14:20 +0100
commit9f89baebb81e6a01ec06fe916564da45eb204f34 (patch)
treeedd21a058a2701f0e89456717e10011ab44d473a /arm_compute
parent10c672c2e21bb77b7234d9d3611267400dce7ae0 (diff)
downloadComputeLibrary-9f89baebb81e6a01ec06fe916564da45eb204f34.tar.gz
COMPMID-411 - Ported CLGEMMInterleave4x4Kernel and CLGEMMTranspose1xWKernel to support 8 bit fixed point
Change-Id: If236c9047ed536e808a0ed26e97e1799ca938e03 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/78529 Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Moritz Pflanzer <moritz.pflanzer@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h2
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h8
-rw-r--r--arm_compute/runtime/CL/CLFunctions.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMMInterleave4x4.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMMTranspose1xW.h47
5 files changed, 53 insertions, 7 deletions
diff --git a/arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h b/arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h
index 3ac7b3c4fa..9466b16a91 100644
--- a/arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h
@@ -64,7 +64,7 @@ public:
CLGEMMInterleave4x4Kernel &operator=(CLGEMMInterleave4x4Kernel &&) = default;
/** Initialise the kernel's input and output.
*
- * @param[in] input Input tensor. Data types supported: U8/S8/U16/S16/F16/U32/S32/F32
+ * @param[in] input Input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32
* @param[out] output Output tensor. Data type supported: same as @p input
*/
void configure(const ICLTensor *input, ICLTensor *output);
diff --git a/arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h b/arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h
index 8d44a4c4fa..9657a2af45 100644
--- a/arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h
@@ -30,7 +30,7 @@ namespace arm_compute
{
class ICLTensor;
-/** OpenCL kernel which transposes the elements of a matrix in chunks of 1x4 if the input data type is F32 or in chunks of 1x8 if the input data type is F16.
+/** OpenCL kernel which transposes the elements of a matrix in chunks of 1xW, where W is equal to (16 / element size of the tensor)
*
* Following an example of how the transposition1xW works when the input data type is F32
*
@@ -62,9 +62,7 @@ class ICLTensor;
* \end{array} \right)
* @f]
*
- * @note If the input data type is F32, the output matrix will have the following shape: [ height * 4, width / 4 ]
- * @note If the input data type is F16, the output matrix will have the following shape: [ height * 8, width / 8 ]
- * @note If the input data type is U8, the output matrix will have the following shape: [ height * 16, width / 16 ]
+ * @note The output matrix will have the following shape: [ height * W, ceil(width / W) ], where W = (16 / element size of the tensor)
*
*/
class CLGEMMTranspose1xWKernel : public ICLSimple2DKernel
@@ -72,7 +70,7 @@ class CLGEMMTranspose1xWKernel : public ICLSimple2DKernel
public:
/** Initialise the kernel's input and output.
*
- * @param[in] input Input tensor. Data types supported: U8/F16/F32
+ * @param[in] input Input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32
* @param[out] output Output tensor. Data type supported: same as @p input
*/
void configure(const ICLTensor *input, ICLTensor *output);
diff --git a/arm_compute/runtime/CL/CLFunctions.h b/arm_compute/runtime/CL/CLFunctions.h
index 82929ba139..862b2c1c40 100644
--- a/arm_compute/runtime/CL/CLFunctions.h
+++ b/arm_compute/runtime/CL/CLFunctions.h
@@ -54,6 +54,7 @@
#include "arm_compute/runtime/CL/functions/CLGEMM.h"
#include "arm_compute/runtime/CL/functions/CLGEMMInterleave4x4.h"
#include "arm_compute/runtime/CL/functions/CLGEMMLowp.h"
+#include "arm_compute/runtime/CL/functions/CLGEMMTranspose1xW.h"
#include "arm_compute/runtime/CL/functions/CLGaussian3x3.h"
#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h"
#include "arm_compute/runtime/CL/functions/CLGaussianPyramid.h"
diff --git a/arm_compute/runtime/CL/functions/CLGEMMInterleave4x4.h b/arm_compute/runtime/CL/functions/CLGEMMInterleave4x4.h
index b80136b328..8c755aeab2 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMInterleave4x4.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMInterleave4x4.h
@@ -40,7 +40,7 @@ class CLGEMMInterleave4x4 : public ICLSimpleFunction
public:
/** Initialise the kernel's inputs, output
*
- * @param[in] input First input tensor. Data types supported: U8/S8/U16/S16/F16/U32/S32/F32
+ * @param[in] input First input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32
* @param[out] output Output tensor. Data type supported: same as @p input
*/
void configure(const ICLTensor *input, ICLTensor *output);
diff --git a/arm_compute/runtime/CL/functions/CLGEMMTranspose1xW.h b/arm_compute/runtime/CL/functions/CLGEMMTranspose1xW.h
new file mode 100644
index 0000000000..866c17b51e
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLGEMMTranspose1xW.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLGEMMTRANSPOSE1XW_H__
+#define __ARM_COMPUTE_CLGEMMTRANSPOSE1XW_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+/** Basic function to execute CLGEMMTranspose1xWKernel. This function calls the following OpenCL kernels:
+ *
+ * -# @ref CLGEMMTranspose1xWKernel
+ *
+ */
+class CLGEMMTranspose1xW : public ICLSimpleFunction
+{
+public:
+ /** Initialise the kernel's inputs, output
+ *
+ * @param[in] input First input tensor. Data type supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32/
+ * @param[out] output Output tensor. Data type supported: same as @p input
+ */
+ void configure(const ICLTensor *input, ICLTensor *output);
+};
+}
+#endif /*__ARM_COMPUTE_CLGEMMTRANSPOSE1XW_H__ */