From b0f342ec315397e4b87d3a9cc3d12f3645c153bc Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Tue, 21 May 2019 13:32:43 +0100 Subject: COMPMID-2171: Fuse bias addition with CLGEMMMatrixMultiplyReshapedOnlyRHSKernel Change-Id: I1d1e1f28fe7022309d72900893e8368820ca0f89 Signed-off-by: giuros01 Reviewed-on: https://review.mlplatform.org/c/1259 Comments-Addressed: Arm Jenkins Reviewed-by: Michele Di Giorgio Reviewed-by: Gian Marco Iodice Tested-by: Arm Jenkins --- src/core/CL/cl_kernels/gemm_helpers.h | 174 ++++++++++++++++++++++++++++++---- 1 file changed, 157 insertions(+), 17 deletions(-) (limited to 'src/core/CL/cl_kernels/gemm_helpers.h') diff --git a/src/core/CL/cl_kernels/gemm_helpers.h b/src/core/CL/cl_kernels/gemm_helpers.h index 2c76992b31..cd2d39b433 100644 --- a/src/core/CL/cl_kernels/gemm_helpers.h +++ b/src/core/CL/cl_kernels/gemm_helpers.h @@ -360,69 +360,69 @@ #define CONVERT_STORE_BLOCK(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) CONVERT_STORE_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) #define SCALE_ROW_1(DATA_TYPE, BASENAME, SCALE) \ - BASENAME##0 = BASENAME##0 * (DATA_TYPE)SCALE; + BASENAME##0 *= (DATA_TYPE)SCALE; #define SCALE_ROW_2(DATA_TYPE, BASENAME, SCALE) \ SCALE_ROW_1(DATA_TYPE, BASENAME, SCALE) \ - BASENAME##1 = BASENAME##1 * (DATA_TYPE)SCALE; + BASENAME##1 *= (DATA_TYPE)SCALE; #define SCALE_ROW_3(DATA_TYPE, BASENAME, SCALE) \ SCALE_ROW_2(DATA_TYPE, BASENAME, SCALE) \ - BASENAME##2 = BASENAME##2 * (DATA_TYPE)SCALE; + BASENAME##2 *= (DATA_TYPE)SCALE; #define SCALE_ROW_4(DATA_TYPE, BASENAME, SCALE) \ SCALE_ROW_3(DATA_TYPE, BASENAME, SCALE) \ - BASENAME##3 = BASENAME##3 * (DATA_TYPE)SCALE; + BASENAME##3 *= (DATA_TYPE)SCALE; #define SCALE_ROW_5(DATA_TYPE, BASENAME, SCALE) \ SCALE_ROW_4(DATA_TYPE, BASENAME, SCALE) \ - BASENAME##4 = BASENAME##4 * (DATA_TYPE)SCALE; + BASENAME##4 *= (DATA_TYPE)SCALE; #define SCALE_ROW_6(DATA_TYPE, BASENAME, SCALE) \ SCALE_ROW_5(DATA_TYPE, BASENAME, SCALE) \ - BASENAME##5 = BASENAME##5 * (DATA_TYPE)SCALE; + BASENAME##5 *= (DATA_TYPE)SCALE; #define SCALE_ROW_7(DATA_TYPE, BASENAME, SCALE) \ SCALE_ROW_6(DATA_TYPE, BASENAME, SCALE) \ - BASENAME##6 = BASENAME##6 * (DATA_TYPE)SCALE; + BASENAME##6 *= (DATA_TYPE)SCALE; #define SCALE_ROW_8(DATA_TYPE, BASENAME, SCALE) \ SCALE_ROW_7(DATA_TYPE, BASENAME, SCALE) \ - BASENAME##7 = BASENAME##7 * (DATA_TYPE)SCALE; + BASENAME##7 *= (DATA_TYPE)SCALE; #define SCALE_ROW_9(DATA_TYPE, BASENAME, SCALE) \ SCALE_ROW_8(DATA_TYPE, BASENAME, SCALE) \ - BASENAME##8 = BASENAME##8 * (DATA_TYPE)SCALE; + BASENAME##8 *= (DATA_TYPE)SCALE; #define SCALE_ROW_10(DATA_TYPE, BASENAME, SCALE) \ SCALE_ROW_9(DATA_TYPE, BASENAME, SCALE) \ - BASENAME##9 = BASENAME##9 * (DATA_TYPE)SCALE; + BASENAME##9 *= (DATA_TYPE)SCALE; #define SCALE_ROW_11(DATA_TYPE, BASENAME, SCALE) \ SCALE_ROW_10(DATA_TYPE, BASENAME, SCALE) \ - BASENAME##A = BASENAME##A * (DATA_TYPE)SCALE; + BASENAME##A *= (DATA_TYPE)SCALE; #define SCALE_ROW_12(DATA_TYPE, BASENAME, SCALE) \ SCALE_ROW_11(DATA_TYPE, BASENAME, SCALE) \ - BASENAME##B = BASENAME##B * (DATA_TYPE)SCALE; + BASENAME##B *= (DATA_TYPE)SCALE; #define SCALE_ROW_13(DATA_TYPE, BASENAME, SCALE) \ SCALE_ROW_12(DATA_TYPE, BASENAME, SCALE) \ - BASENAME##C = BASENAME##C * (DATA_TYPE)SCALE; + BASENAME##C *= (DATA_TYPE)SCALE; #define SCALE_ROW_14(DATA_TYPE, BASENAME, SCALE) \ SCALE_ROW_13(DATA_TYPE, BASENAME, SCALE) \ - BASENAME##D = BASENAME##D * (DATA_TYPE)SCALE; + BASENAME##D *= (DATA_TYPE)SCALE; #define SCALE_ROW_15(DATA_TYPE, BASENAME, SCALE) \ SCALE_ROW_14(DATA_TYPE, BASENAME, SCALE) \ - BASENAME##E = BASENAME##E * (DATA_TYPE)SCALE; + BASENAME##E *= (DATA_TYPE)SCALE; #define SCALE_ROW_16(DATA_TYPE, BASENAME, SCALE) \ SCALE_ROW_15(DATA_TYPE, BASENAME, SCALE) \ - BASENAME##F = BASENAME##F * (DATA_TYPE)SCALE; + BASENAME##F *= (DATA_TYPE)SCALE; -// SCALE_ROW_n scales the variables BASENAME##0 to BASENAME##(n-1) by SCALE +// SCALE_BLOCK_n scales the variables BASENAME##0 to BASENAME##(n-1) by SCALE #define SCALE_BLOCK_STR(N, DATA_TYPE, BASENAME, SCALE) SCALE_ROW_##N(DATA_TYPE, BASENAME, SCALE) /** Scale elements stored in variables BASENAME##0 to BASENAME##(N-1) by SCALE * Supported cases N=1,2,3..16, for variables BASENAME[0..N] @@ -479,3 +479,143 @@ #define TRANSPOSE_K0XN0(K0, N0, BASENAME, B) \ CONCAT(TRANSPOSE_K0X, N0) \ (K0, BASENAME, B); + +#define ADD_ROW_1(BASENAME, BIAS) \ + BASENAME##0 += BIAS##0; + +#define ADD_ROW_2(BASENAME, BIAS) \ + ADD_ROW_1(BASENAME, BIAS) \ + BASENAME##1 += BIAS##1; + +#define ADD_ROW_3(BASENAME, BIAS) \ + ADD_ROW_2(BASENAME, BIAS) \ + BASENAME##2 += BIAS##2; + +#define ADD_ROW_4(BASENAME, BIAS) \ + ADD_ROW_3(BASENAME, BIAS) \ + BASENAME##3 += BIAS##3; + +#define ADD_ROW_5(BASENAME, BIAS) \ + ADD_ROW_4(BASENAME, BIAS) \ + BASENAME##4 += BIAS##4; + +#define ADD_ROW_6(BASENAME, BIAS) \ + ADD_ROW_5(BASENAME, BIAS) \ + BASENAME##5 += BIAS##5; + +#define ADD_ROW_7(BASENAME, BIAS) \ + ADD_ROW_6(BASENAME, BIAS) \ + BASENAME##6 += BIAS##6; + +#define ADD_ROW_8(BASENAME, BIAS) \ + ADD_ROW_7(BASENAME, BIAS) \ + BASENAME##7 += BIAS##7; + +#define ADD_ROW_9(BASENAME, BIAS) \ + ADD_ROW_8(BASENAME, BIAS) \ + BASENAME##8 += BIAS##8; + +#define ADD_ROW_10(BASENAME, BIAS) \ + ADD_ROW_9(BASENAME, BIAS) \ + BASENAME##9 += BIAS##9; + +#define ADD_ROW_11(BASENAME, BIAS) \ + ADD_ROW_10(BASENAME, BIAS) \ + BASENAME##A += BIAS##A; + +#define ADD_ROW_12(BASENAME, BIAS) \ + ADD_ROW_11(BASENAME, BIAS) \ + BASENAME##B += BIAS##B; + +#define ADD_ROW_13(BASENAME, BIAS) \ + ADD_ROW_12(BASENAME, BIAS) \ + BASENAME##C += BIAS##C; + +#define ADD_ROW_14(BASENAME, BIAS) \ + ADD_ROW_13(BASENAME, BIAS) \ + BASENAME##D += BIAS##D; + +#define ADD_ROW_15(BASENAME, BIAS) \ + ADD_ROW_14(BASENAME, BIAS) \ + BASENAME##E += BIAS##E; + +#define ADD_ROW_16(BASENAME, BIAS) \ + ADD_ROW_15(BASENAME, BIAS) \ + BASENAME##F += BIAS##F; + +// ADD_ROW_n add the variables BIAS##0... BIAS##(n-1) to BASENAME##0 to BASENAME##(n-1) +#define ADD_BLOCK_STR(N, BASENAME, BIAS) ADD_ROW_##N(BASENAME, BIAS) +/** Add BIAS to BASENAME##0 ... BASENAME##(N-1) + * Supported cases N=1,2,3..16, for variables BASENAME[0..N] + */ +#define ADD_BLOCK(N, BASENAME, BIAS) ADD_BLOCK_STR(N, BASENAME, BIAS) + +#define ADD_ROW_BROADCAST_1(BASENAME, BIAS) \ + BASENAME##0 += BIAS; + +#define ADD_ROW_BROADCAST_2(BASENAME, BIAS) \ + ADD_ROW_BROADCAST_1(BASENAME, BIAS) \ + BASENAME##1 += BIAS; + +#define ADD_ROW_BROADCAST_3(BASENAME, BIAS) \ + ADD_ROW_BROADCAST_2(BASENAME, BIAS) \ + BASENAME##2 += BIAS; + +#define ADD_ROW_BROADCAST_4(BASENAME, BIAS) \ + ADD_ROW_BROADCAST_3(BASENAME, BIAS) \ + BASENAME##3 += BIAS; + +#define ADD_ROW_BROADCAST_5(BASENAME, BIAS) \ + ADD_ROW_BROADCAST_4(BASENAME, BIAS) \ + BASENAME##4 += BIAS; + +#define ADD_ROW_BROADCAST_6(BASENAME, BIAS) \ + ADD_ROW_BROADCAST_5(BASENAME, BIAS) \ + BASENAME##5 += BIAS; + +#define ADD_ROW_BROADCAST_7(BASENAME, BIAS) \ + ADD_ROW_BROADCAST_6(BASENAME, BIAS) \ + BASENAME##6 += BIAS; + +#define ADD_ROW_BROADCAST_8(BASENAME, BIAS) \ + ADD_ROW_BROADCAST_7(BASENAME, BIAS) \ + BASENAME##7 += BIAS; + +#define ADD_ROW_BROADCAST_9(BASENAME, BIAS) \ + ADD_ROW_BROADCAST_8(BASENAME, BIAS) \ + BASENAME##8 += BIAS; + +#define ADD_ROW_BROADCAST_10(BASENAME, BIAS) \ + ADD_ROW_BROADCAST_9(BASENAME, BIAS) \ + BASENAME##9 += BIAS; + +#define ADD_ROW_BROADCAST_11(BASENAME, BIAS) \ + ADD_ROW_BROADCAST_10(BASENAME, BIAS) \ + BASENAME##A += BIAS; + +#define ADD_ROW_BROADCAST_12(BASENAME, BIAS) \ + ADD_ROW_BROADCAST_11(BASENAME, BIAS) \ + BASENAME##B += BIAS; + +#define ADD_ROW_BROADCAST_13(BASENAME, BIAS) \ + ADD_ROW_BROADCAST_12(BASENAME, BIAS) \ + BASENAME##C += BIAS; + +#define ADD_ROW_BROADCAST_14(BASENAME, BIAS) \ + ADD_ROW_BROADCAST_13(BASENAME, BIAS) \ + BASENAME##D += BIAS; + +#define ADD_ROW_BROADCAST_15(BASENAME, BIAS) \ + ADD_ROW_BROADCAST_14(BASENAME, BIAS) \ + BASENAME##E += BIAS; + +#define ADD_ROW_BROADCAST_16(BASENAME, BIAS) \ + ADD_ROW_BROADCAST_15(BASENAME, BIAS) \ + BASENAME##F += BIAS; + +// ADD_ROW_n add the variables BIAS to BASENAME##0 to BASENAME##(n-1) +#define ADD_BLOCK_BROADCAST_STR(N, BASENAME, BIAS) ADD_ROW_BROADCAST_##N(BASENAME, BIAS) +/** Add elements stored in variables BIAS##0 ... BIAS##(N-1) to BASENAME##0 ... BASENAME##(N-1) + * Supported cases N=1,2,3..16, for variables BASENAME[0..N] + */ +#define ADD_BLOCK_BROADCAST(N, BASENAME, BIAS) ADD_BLOCK_BROADCAST_STR(N, BASENAME, BIAS) -- cgit v1.2.1