COMPMID-477 - Optimized batched case in CLConvolutionLayer

Change-Id: I4ef18f49f1da0cb816aaa0762466b940792c15ed Reviewed-on: http://mpd-gerrit.cambridge.arm.com/84162 Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
author: Gian Marco Iodice <gianmarco.iodice@arm.com> 2017-08-15 11:45:22 +0100
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-11-02 16:35:24 +0000
commit: edfa9f463bed084f8b0953557202b2a1e56da817 (patch)
tree: 5d1e92926d112fde05dcbc61324d96f73f692390 /arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h
parent: dc460f13ee65e27b2a428e44c2d80afb1f516a99 (diff)
download: ComputeLibrary-edfa9f463bed084f8b0953557202b2a1e56da817.tar.gz
1 files changed, 9 insertions, 9 deletions
diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h
index dec63e0679..a768a19914 100644
--- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h
@@ -30,10 +30,10 @@ namespace arm_compute
 {
 class ICLTensor;
 
-/** OpenCL kernel to multiply two input matrices "A" and "B" or to multiply a vector "A" by a matrix "B". All elements of the output matrix/vector will be multiplied by alpha
+/** OpenCL kernel to multiply two input matrices "A" and "B" . All elements of the output matrix will be multiplied by alpha
  *
- * @note If the output tensor is a matrix, the implementation assumes that the input tensors @p input0 and @p input1 are both matrices and reshaped respectively with @ref CLGEMMInterleave4x4Kernel" and @ref CLGEMMTranspose1xWKernel
- * @note If the output tensor is a vector and the data type is F32, the implementation assumes that the first input tensor @p input0 is a vector and the second input tensor @p input1 a matrix. The implementation also assumes that both tensors have not been reshaped
+ * @note If the input tensors @p input0 and @p input1 have been reshaped respectively with @ref CLGEMMInterleave4x4Kernel" and @ref CLGEMMTranspose1xWKernel,
+ *       the flag @p is_interleaved_transposed must be set to true
  *
  * @attention The second input tensor must have at least 2 dimensions (matrix)
  *
@@ -53,13 +53,13 @@ public:
     CLGEMMMatrixMultiplyKernel &operator=(CLGEMMMatrixMultiplyKernel &&) = default;
     /** Initialise the kernel's input, output and alpha
      *
-     * @param[in]  input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: QS8/QS16/F16/F32
-     * @param[in]  input1 Input tensor containing the transposed Matrix B if the first input tensor A is not a vector.
-     *                    If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0
-     * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
-     * @param[in]  alpha  Weight of the matrix product
+     * @param[in]  input0                    Input tensor containing the Matrix A. Data types supported: QS8/QS16/F16/F32
+     * @param[in]  input1                    Input tensor containing the Matrix B. Data type supported: same as @p input0
+     * @param[out] output                    Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
+     * @param[in]  alpha                     Weight of the matrix product
+     * @param[in]  is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref CLGEMMInterleave4x4Kernel and @ref CLGEMMTranspose1xWKernel
      */
-    void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, float alpha);
+    void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, float alpha, bool is_interleaved_transposed = true);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
author	Gian Marco Iodice <gianmarco.iodice@arm.com>	2017-08-15 11:45:22 +0100
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-11-02 16:35:24 +0000
commit	edfa9f463bed084f8b0953557202b2a1e56da817 (patch)
tree	5d1e92926d112fde05dcbc61324d96f73f692390 /arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h
parent	dc460f13ee65e27b2a428e44c2d80afb1f516a99 (diff)
download	ComputeLibrary-edfa9f463bed084f8b0953557202b2a1e56da817.tar.gz