diff options
author | ramelg01 <ramy.elgammal@arm.com> | 2021-11-11 10:05:00 +0000 |
---|---|---|
committer | Gian Marco Iodice <gianmarco.iodice@arm.com> | 2021-11-20 17:38:07 +0000 |
commit | 9cca592c13f1e688a35698641069bcd37a525f0c (patch) | |
tree | 8f69b654c5f543d918ec5d61140af30bbadbd390 /src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_native.cl | |
parent | e330fb41d85d7058f74902ce1d47b2dc00b10a52 (diff) | |
download | ComputeLibrary-9cca592c13f1e688a35698641069bcd37a525f0c.tar.gz |
Improve start-up timer for GeMM (floating-point):
- Pass M,N,K at runtime as kernel parameters
- Add a guard macro to compile only kernel of interest
- Move reshpaing kernels to gemm_utils.cl
- Remove the fallback reshaping kernel with Y-Padding support
Resolves: COMPMID-4888
Signed-off-by: Ramy Elgammal <ramy.elgammal@arm.com>
Change-Id: Ida3851326f0b77e410633271de9ecca106e37931
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6662
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_native.cl')
-rw-r--r-- | src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_native.cl | 13 |
1 files changed, 9 insertions, 4 deletions
diff --git a/src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_native.cl b/src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_native.cl index 4665d612f5..d8453ed80a 100644 --- a/src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_native.cl +++ b/src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_native.cl @@ -27,7 +27,7 @@ #include "repeat.h" /** (EXPERIMENTAL_POST_OPS) gemm_mm_native kernel */ -#if defined(M0) && defined(N0) && defined(K0) && defined(K) && defined(DATA_TYPE) && defined(PARTIAL_STORE_M0) && defined(PARTIAL_STORE_N0) +#if defined(M0) && defined(N0) && defined(K0) && defined(DATA_TYPE) && defined(PARTIAL_STORE_M0) && defined(PARTIAL_STORE_N0) #if defined(P2_ELTWISE_OP) && defined(P2_ELTWISE_ARG1_HEIGHT) && defined(P2_ELTWISE_ARG1_WIDTH) #define VFMA(a, b, c) \ @@ -107,6 +107,7 @@ #error "M0 not supported" #endif // M0 not supported +#if defined(GEMM_MM_NATIVE_POST_ACT_ELTWISE_OP_ACT) /** This OpenCL kernel computes the matrix multiplication between 2 matrices plus 3 post ops: * Post op 1: activation (optional) * Post op 2: elementwise op @@ -140,8 +141,11 @@ __kernel void gemm_mm_native_post_act_eltwise_op_act(IMAGE_DECLARATION(lhs), #if defined(BETA) uint bias_stride_z, #endif //defined(BETA) - uint dst_stride_z, - uint eltwise_operand_stride_z + uint dst_stride_z, + uint eltwise_operand_stride_z, + const int M, + const int N, + const int K #if defined(REINTERPRET_INPUT_AS_3D) , uint lhs_cross_plane_pad @@ -360,5 +364,6 @@ __kernel void gemm_mm_native_post_act_eltwise_op_act(IMAGE_DECLARATION(lhs), // Store output block STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x); } +#endif // defined(GEMM_MM_NATIVE_POST_ACT_ELTWISE_OP_ACT) #endif // defined(P2_ELTWISE_OP) && defined(P2_ELTWISE_ARG1_HEIGHT) && defined(P2_ELTWISE_ARG1_WIDTH) -#endif // defined(M0) && defined(N0) && defined(K0) && defined(K) && defined(DATA_TYPE) && defined(PARTIAL_STORE_M0) && defined(PARTIAL_STORE_N0) +#endif // defined(M0) && defined(N0) && defined(K0) && defined(DATA_TYPE) && defined(PARTIAL_STORE_M0) && defined(PARTIAL_STORE_N0) |