aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_native.cl
diff options
context:
space:
mode:
authorramelg01 <ramy.elgammal@arm.com>2021-11-11 10:05:00 +0000
committerGian Marco Iodice <gianmarco.iodice@arm.com>2021-11-20 17:38:07 +0000
commit9cca592c13f1e688a35698641069bcd37a525f0c (patch)
tree8f69b654c5f543d918ec5d61140af30bbadbd390 /src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_native.cl
parente330fb41d85d7058f74902ce1d47b2dc00b10a52 (diff)
downloadComputeLibrary-9cca592c13f1e688a35698641069bcd37a525f0c.tar.gz
Improve start-up timer for GeMM (floating-point):
- Pass M,N,K at runtime as kernel parameters - Add a guard macro to compile only kernel of interest - Move reshpaing kernels to gemm_utils.cl - Remove the fallback reshaping kernel with Y-Padding support Resolves: COMPMID-4888 Signed-off-by: Ramy Elgammal <ramy.elgammal@arm.com> Change-Id: Ida3851326f0b77e410633271de9ecca106e37931 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6662 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_native.cl')
-rw-r--r--src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_native.cl13
1 files changed, 9 insertions, 4 deletions
diff --git a/src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_native.cl b/src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_native.cl
index 4665d612f5..d8453ed80a 100644
--- a/src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_native.cl
+++ b/src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_native.cl
@@ -27,7 +27,7 @@
#include "repeat.h"
/** (EXPERIMENTAL_POST_OPS) gemm_mm_native kernel */
-#if defined(M0) && defined(N0) && defined(K0) && defined(K) && defined(DATA_TYPE) && defined(PARTIAL_STORE_M0) && defined(PARTIAL_STORE_N0)
+#if defined(M0) && defined(N0) && defined(K0) && defined(DATA_TYPE) && defined(PARTIAL_STORE_M0) && defined(PARTIAL_STORE_N0)
#if defined(P2_ELTWISE_OP) && defined(P2_ELTWISE_ARG1_HEIGHT) && defined(P2_ELTWISE_ARG1_WIDTH)
#define VFMA(a, b, c) \
@@ -107,6 +107,7 @@
#error "M0 not supported"
#endif // M0 not supported
+#if defined(GEMM_MM_NATIVE_POST_ACT_ELTWISE_OP_ACT)
/** This OpenCL kernel computes the matrix multiplication between 2 matrices plus 3 post ops:
* Post op 1: activation (optional)
* Post op 2: elementwise op
@@ -140,8 +141,11 @@ __kernel void gemm_mm_native_post_act_eltwise_op_act(IMAGE_DECLARATION(lhs),
#if defined(BETA)
uint bias_stride_z,
#endif //defined(BETA)
- uint dst_stride_z,
- uint eltwise_operand_stride_z
+ uint dst_stride_z,
+ uint eltwise_operand_stride_z,
+ const int M,
+ const int N,
+ const int K
#if defined(REINTERPRET_INPUT_AS_3D)
,
uint lhs_cross_plane_pad
@@ -360,5 +364,6 @@ __kernel void gemm_mm_native_post_act_eltwise_op_act(IMAGE_DECLARATION(lhs),
// Store output block
STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x);
}
+#endif // defined(GEMM_MM_NATIVE_POST_ACT_ELTWISE_OP_ACT)
#endif // defined(P2_ELTWISE_OP) && defined(P2_ELTWISE_ARG1_HEIGHT) && defined(P2_ELTWISE_ARG1_WIDTH)
-#endif // defined(M0) && defined(N0) && defined(K0) && defined(K) && defined(DATA_TYPE) && defined(PARTIAL_STORE_M0) && defined(PARTIAL_STORE_N0)
+#endif // defined(M0) && defined(N0) && defined(K0) && defined(DATA_TYPE) && defined(PARTIAL_STORE_M0) && defined(PARTIAL_STORE_N0)