From edfa9f463bed084f8b0953557202b2a1e56da817 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Tue, 15 Aug 2017 11:45:22 +0100 Subject: COMPMID-477 - Optimized batched case in CLConvolutionLayer Change-Id: I4ef18f49f1da0cb816aaa0762466b940792c15ed Reviewed-on: http://mpd-gerrit.cambridge.arm.com/84162 Tested-by: Kaizen Reviewed-by: Anthony Barbier --- .../core/CL/kernels/CLGEMMMatrixMultiplyKernel.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h') diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h index dec63e0679..a768a19914 100644 --- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h +++ b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h @@ -30,10 +30,10 @@ namespace arm_compute { class ICLTensor; -/** OpenCL kernel to multiply two input matrices "A" and "B" or to multiply a vector "A" by a matrix "B". All elements of the output matrix/vector will be multiplied by alpha +/** OpenCL kernel to multiply two input matrices "A" and "B" . All elements of the output matrix will be multiplied by alpha * - * @note If the output tensor is a matrix, the implementation assumes that the input tensors @p input0 and @p input1 are both matrices and reshaped respectively with @ref CLGEMMInterleave4x4Kernel" and @ref CLGEMMTranspose1xWKernel - * @note If the output tensor is a vector and the data type is F32, the implementation assumes that the first input tensor @p input0 is a vector and the second input tensor @p input1 a matrix. The implementation also assumes that both tensors have not been reshaped + * @note If the input tensors @p input0 and @p input1 have been reshaped respectively with @ref CLGEMMInterleave4x4Kernel" and @ref CLGEMMTranspose1xWKernel, + * the flag @p is_interleaved_transposed must be set to true * * @attention The second input tensor must have at least 2 dimensions (matrix) * @@ -53,13 +53,13 @@ public: CLGEMMMatrixMultiplyKernel &operator=(CLGEMMMatrixMultiplyKernel &&) = default; /** Initialise the kernel's input, output and alpha * - * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: QS8/QS16/F16/F32 - * @param[in] input1 Input tensor containing the transposed Matrix B if the first input tensor A is not a vector. - * If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0 - * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 - * @param[in] alpha Weight of the matrix product + * @param[in] input0 Input tensor containing the Matrix A. Data types supported: QS8/QS16/F16/F32 + * @param[in] input1 Input tensor containing the Matrix B. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 + * @param[in] alpha Weight of the matrix product + * @param[in] is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref CLGEMMInterleave4x4Kernel and @ref CLGEMMTranspose1xWKernel */ - void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, float alpha); + void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, float alpha, bool is_interleaved_transposed = true); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; -- cgit v1.2.1