diff options
Diffstat (limited to 'src/core/CL/cl_kernels/load_store_utility.h')
-rw-r--r-- | src/core/CL/cl_kernels/load_store_utility.h | 84 |
1 files changed, 47 insertions, 37 deletions
diff --git a/src/core/CL/cl_kernels/load_store_utility.h b/src/core/CL/cl_kernels/load_store_utility.h index 56b1538c6f..4daf0adc89 100644 --- a/src/core/CL/cl_kernels/load_store_utility.h +++ b/src/core/CL/cl_kernels/load_store_utility.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 Arm Limited. + * Copyright (c) 2020, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -223,8 +223,10 @@ * @param[in] Z The offset in z-axis direction * @{ */ -#define STORE_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) STORE_ROW_##M0(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) -#define STORE_BLOCK(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) STORE_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) +#define STORE_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \ + STORE_ROW_##M0(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) +#define STORE_BLOCK(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \ + STORE_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) /** @} */ // end of group STORE_BLOCK /** Convert and store a block of the given size M0xN0 @@ -245,8 +247,10 @@ * @param[in] Z The offset in z-axis direction * @{ */ -#define CONVERT_STORE_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) CONVERT_STORE_ROW_##M0(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) -#define CONVERT_STORE_BLOCK(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) CONVERT_STORE_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) +#define CONVERT_STORE_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \ + CONVERT_STORE_ROW_##M0(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) +#define CONVERT_STORE_BLOCK(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \ + CONVERT_STORE_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) /** @} */ // end of group CONVERT_STORE_BLOCK /** Partially store the 0 to (n-1)th rows of the given variables @@ -365,8 +369,10 @@ * @param[in] Z The offset in z-axis direction * @{ */ -#define STORE_BLOCK_PARTIAL_STR(STORE_M0, STORE_N0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) STORE_ROW_PARTIAL_##STORE_M0(N0, STORE_N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) -#define STORE_BLOCK_PARTIAL(STORE_M0, STORE_N0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) STORE_BLOCK_PARTIAL_STR(STORE_M0, STORE_N0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) +#define STORE_BLOCK_PARTIAL_STR(STORE_M0, STORE_N0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \ + STORE_ROW_PARTIAL_##STORE_M0(N0, STORE_N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) +#define STORE_BLOCK_PARTIAL(STORE_M0, STORE_N0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \ + STORE_BLOCK_PARTIAL_STR(STORE_M0, STORE_N0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) /** Store a block that can be partial in both x and y dimensions * * @note in cases @p PARTIAL_STORE_N0 != 1, 2, 3, 4, 8, 16, extra vstore(s) will be invoked, thus incurring small performance penalty. @@ -388,22 +394,23 @@ * @param[in] PARTIAL_COND_Y Condition on the y axis to perform the partial store Y. True to use PARTIAL_STORE_M0 rather than M0. * @param[in] PARTIAL_COND_X Condition on the x axis to perform the partial store X. True to use PARTIAL_STORE_N0 rather than N0. */ -#define STORE_BLOCK_PARTIAL_IN_X_AND_Y(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z, PARTIAL_STORE_M0, PARTIAL_STORE_N0, PARTIAL_COND_Y, PARTIAL_COND_X) \ - if(!(PARTIAL_COND_X) && !(PARTIAL_COND_Y)) \ - { \ - STORE_BLOCK_PARTIAL(M0, N0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z); \ - } \ - else if((PARTIAL_COND_Y) && !(PARTIAL_COND_X)) \ - { \ - STORE_BLOCK_PARTIAL(PARTIAL_STORE_M0, N0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z); \ - } \ - else if(!(PARTIAL_COND_Y) && (PARTIAL_COND_X)) \ - { \ - STORE_BLOCK_PARTIAL(M0, PARTIAL_STORE_N0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z); \ - } \ - else \ - { \ - STORE_BLOCK_PARTIAL(PARTIAL_STORE_M0, PARTIAL_STORE_N0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z); \ +#define STORE_BLOCK_PARTIAL_IN_X_AND_Y(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z, PARTIAL_STORE_M0, \ + PARTIAL_STORE_N0, PARTIAL_COND_Y, PARTIAL_COND_X) \ + if (!(PARTIAL_COND_X) && !(PARTIAL_COND_Y)) \ + { \ + STORE_BLOCK_PARTIAL(M0, N0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z); \ + } \ + else if ((PARTIAL_COND_Y) && !(PARTIAL_COND_X)) \ + { \ + STORE_BLOCK_PARTIAL(PARTIAL_STORE_M0, N0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z); \ + } \ + else if (!(PARTIAL_COND_Y) && (PARTIAL_COND_X)) \ + { \ + STORE_BLOCK_PARTIAL(M0, PARTIAL_STORE_N0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z); \ + } \ + else \ + { \ + STORE_BLOCK_PARTIAL(PARTIAL_STORE_M0, PARTIAL_STORE_N0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z); \ } /** Store a block that can only be partial in x but not y. * @@ -425,7 +432,7 @@ * @param[in] PARTIAL_COND_X Condition on the x axis to perform the partial store X. True to use PARTIAL_STORE_N0 rather than N0. */ #define STORE_BLOCK_PARTIAL_IN_X(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z, PARTIAL_STORE_N0, PARTIAL_COND_X) \ - if(!(PARTIAL_COND_X)) \ + if (!(PARTIAL_COND_X)) \ { \ STORE_BLOCK_PARTIAL(M0, N0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z); \ } \ @@ -453,7 +460,7 @@ * @param[in] PARTIAL_COND_Y Condition on the y axis to perform the partial store Y. True to use PARTIAL_STORE_M0 rather than M0. */ #define STORE_BLOCK_PARTIAL_IN_Y(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z, PARTIAL_STORE_M0, PARTIAL_COND_Y) \ - if(!(PARTIAL_COND_Y)) \ + if (!(PARTIAL_COND_Y)) \ { \ STORE_BLOCK_PARTIAL(M0, N0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z); \ } \ @@ -463,8 +470,6 @@ } /** @} */ // end of group STORE_BLOCK_PARTIAL -#if defined(PARTIAL_STORE_M0) && defined(PARTIAL_STORE_N0) - /** Boundary-aware GEMM block store * @name STORE_BLOCK_BOUNDARY_AWARE * This macro assumes the following schemes to achieve boundary-awareness: @@ -516,32 +521,37 @@ * @param[in] PARTIAL_COND_X Condition on the x axis to perform the partial store X. True to use PARTIAL_STORE_N0 rather than N0. * @{ */ +#if defined(PARTIAL_STORE_M0) && defined(PARTIAL_STORE_N0) #if PARTIAL_STORE_M0 == 0 && PARTIAL_STORE_N0 == 0 // Case1: No partial blocks in either x or y -#define STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z, PARTIAL_STORE_M0, PARTIAL_STORE_N0, PARTIAL_COND_Y, PARTIAL_COND_X) \ +#define STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z, PARTIAL_STORE_M0, PARTIAL_STORE_N0, \ + PARTIAL_COND_Y, PARTIAL_COND_X) \ STORE_BLOCK(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) #elif PARTIAL_STORE_M0 > 0 && PARTIAL_STORE_N0 == 0 // Case2: Partial blocks in y -#define STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z, PARTIAL_STORE_M0, PARTIAL_STORE_N0, PARTIAL_COND_Y, PARTIAL_COND_X) \ +#define STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z, PARTIAL_STORE_M0, PARTIAL_STORE_N0, \ + PARTIAL_COND_Y, PARTIAL_COND_X) \ STORE_BLOCK_PARTIAL_IN_Y(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z, PARTIAL_STORE_M0, PARTIAL_COND_Y) #elif PARTIAL_STORE_M0 == 0 && PARTIAL_STORE_N0 > 0 // Case3: Partial blocks in x -#define STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z, PARTIAL_STORE_M0, PARTIAL_STORE_N0, PARTIAL_COND_Y, PARTIAL_COND_X) \ +#define STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z, PARTIAL_STORE_M0, PARTIAL_STORE_N0, \ + PARTIAL_COND_Y, PARTIAL_COND_X) \ STORE_BLOCK_PARTIAL_IN_X(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z, PARTIAL_STORE_N0, PARTIAL_COND_X) #else // PARTIAL_STORE_M0 == 0 && PARTIAL_STORE_N0 == 0 // Case4: Partial blocks in both x and y -#define STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z, PARTIAL_STORE_M0, PARTIAL_STORE_N0, PARTIAL_COND_Y, PARTIAL_COND_X) \ - STORE_BLOCK_PARTIAL_IN_X_AND_Y(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z, PARTIAL_STORE_M0, PARTIAL_STORE_N0, PARTIAL_COND_Y, PARTIAL_COND_X) +#define STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z, PARTIAL_STORE_M0, PARTIAL_STORE_N0, \ + PARTIAL_COND_Y, PARTIAL_COND_X) \ + STORE_BLOCK_PARTIAL_IN_X_AND_Y(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z, PARTIAL_STORE_M0, PARTIAL_STORE_N0, \ + PARTIAL_COND_Y, PARTIAL_COND_X) #endif // PARTIAL_STORE_M0 == 0 && PARTIAL_STORE_N0 == 0 #endif // defined(PARTIAL_STORE_M0) && defined(PARTIAL_STORE_N0) /** @} */ // end of group STORE_BLOCK_BOUNDARY_AWARE -#if defined(PARTIAL_STORE_M0) /** Compute the start m0 row (LHS, BIAS and DST) in a boundary-aware way so as to avoid padding * @name COMPUTE_M0_START_ROW * If there're any partial blocks in y dimension, they are placed at the beginning of the rows. @@ -558,16 +568,16 @@ * @param[in] PARTIAL_STORE_M0 The partial size in y, for partial blocks. Supported: [0, @p M0) * @{ */ +#if defined(PARTIAL_STORE_M0) #define COMPUTE_M0_START_ROW(y, M0, PARTIAL_STORE_M0) \ ((uint)(max(0, (int)(y * M0) - (int)((M0 - PARTIAL_STORE_M0) % M0)))) #else // defined(PARTIAL_STORE_M0) -#define COMPUTE_M0_START_ROW(y, M0, PARTIAL_STORE_M0) \ - ((uint)(y * M0)) +#define COMPUTE_M0_START_ROW(y, M0, PARTIAL_STORE_M0) ((uint)(y * M0)) #endif // defined(PARTIAL_STORE_M0) /** @} */ // end of group COMPUTE_M0_START_ROW /** Store a vector that can only be partial in x. - * + * @name STORE_VECTOR_SELECT * @note in case @p vec_size or @p leftover != 1, 2, 3, 4, 8, 16, extra vstore(s) will be invoked, thus incurring small performance penalty. * * The data to store is expected to end in a 0. @@ -583,4 +593,4 @@ */ #define STORE_VECTOR_SELECT(basename, data_type, ptr, vec_size, leftover, cond) \ STORE_BLOCK_PARTIAL_IN_X(1, vec_size, data_type, basename, ptr, 0, 0, leftover, cond) -/** @} */ // end of group STORE_VECTOR_SELECT
\ No newline at end of file +/** @} */ // end of group STORE_VECTOR_SELECT |