aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/runtime/CL/functions/CLGEMM.h
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2018-12-12 10:18:04 +0000
committerGian Marco Iodice <gianmarco.iodice@arm.com>2018-12-14 14:57:48 +0000
commitbf9731edfa0439cad4d70efc3065e71e199c62b8 (patch)
tree71340a3d04a6294744c642ed6e4a56c0e8a77592 /arm_compute/runtime/CL/functions/CLGEMM.h
parent92e278d5f462c930af1947883a5f48c10586ae9c (diff)
downloadComputeLibrary-bf9731edfa0439cad4d70efc3065e71e199c62b8.tar.gz
COMPMID-1687: Optimize CLGEMMMatrixMultiplyKernel for Mali-G76 - Part1
The current implementation is limited just to FP32 Change-Id: I185ab57e483e879d7c301e9cc3033efc8b41e244 Reviewed-on: https://review.mlplatform.org/389 Reviewed-by: Anthony Barbier <Anthony.barbier@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Diffstat (limited to 'arm_compute/runtime/CL/functions/CLGEMM.h')
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMM.h40
1 files changed, 25 insertions, 15 deletions
diff --git a/arm_compute/runtime/CL/functions/CLGEMM.h b/arm_compute/runtime/CL/functions/CLGEMM.h
index c4513f29d9..7d47194e56 100644
--- a/arm_compute/runtime/CL/functions/CLGEMM.h
+++ b/arm_compute/runtime/CL/functions/CLGEMM.h
@@ -27,6 +27,9 @@
#include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
+#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
+#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
+#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h"
#include "arm_compute/runtime/CL/CLMemoryGroup.h"
#include "arm_compute/runtime/CL/CLTensor.h"
@@ -39,9 +42,12 @@ class ICLTensor;
/** Basic function to execute GEMM on OpenCL. This function calls the following OpenCL kernels:
*
- * -# @ref CLGEMMInterleave4x4Kernel (only if the reshaped GEMM is selected by the heuristic model)
- * -# @ref CLGEMMTranspose1xWKernel (only if the reshaped GEMM is selected by the heuristic model)
- * -# @ref CLGEMMMatrixMultiplyKernel
+ * -# @ref CLGEMMInterleave4x4Kernel (only if the reshaped GEMM is selected by the heuristic model and the GPU target is NOT Mali-G76)
+ * -# @ref CLGEMMReshapeLHSMatrixKernel (only if the reshaped GEMM is selected by the heuristic model and the GPU target IS Mali-G76)
+ * -# @ref CLGEMMTranspose1xWKernel (only if the reshaped GEMM is selected by the heuristic model and the GPU target is NOT Mali-G76)
+ * -# @ref CLGEMMReshapeRHSMatrixKernel (only if the reshaped GEMM is selected by the heuristic model and the GPU target IS Mali-G76)
+ * -# @ref CLGEMMMatrixMultiplyKernel (if GPU target is NOT G76 or if the reshaped GEMM is NOT selected)
+ * -# @ref CLGEMMMatrixMultiplyReshapedKernel (only if the reshaped GEMM is selected by the heuristic model and the GPU target IS Mali-G76)
* -# @ref CLGEMMMatrixAdditionKernel (if c != nullptr and beta != 0.0)
*
*/
@@ -100,18 +106,22 @@ public:
void prepare() override;
private:
- CLMemoryGroup _memory_group;
- CLGEMMInterleave4x4Kernel _interleave_kernel;
- CLGEMMTranspose1xWKernel _transpose_kernel;
- CLGEMMMatrixMultiplyKernel _mm_kernel;
- CLGEMMMatrixAdditionKernel _ma_kernel;
- CLTensor _tmp_a;
- CLTensor _tmp_b;
- const ICLTensor *_original_b;
- bool _is_interleaved_transposed;
- bool _run_addition;
- bool _reshape_b_only_on_first_run;
- bool _is_prepared;
+ CLMemoryGroup _memory_group;
+ CLGEMMInterleave4x4Kernel _interleave_kernel; // TODO - COMPMID-1835: Remove this kernel and use CLGEMMReshapeLHSMatrixKernel
+ CLGEMMTranspose1xWKernel _transpose_kernel; // TODO - COMPMID-1836: Remove this kernel and use CLGEMMReshapeRHSMatrixKernel
+ CLGEMMMatrixMultiplyKernel _mm_kernel;
+ CLGEMMMatrixAdditionKernel _ma_kernel;
+ CLGEMMReshapeLHSMatrixKernel _reshape_lhs_kernel;
+ CLGEMMReshapeRHSMatrixKernel _reshape_rhs_kernel;
+ CLGEMMMatrixMultiplyReshapedKernel _mm_reshaped_kernel;
+ CLTensor _tmp_a;
+ CLTensor _tmp_b;
+ const ICLTensor *_original_b;
+ bool _is_interleaved_transposed;
+ bool _run_addition;
+ bool _reshape_b_only_on_first_run;
+ bool _is_prepared;
+ bool _is_G76_path; // TODO: To be removed once completed COMPMID-1835 and COMPMID-1836
};
}