diff options
Diffstat (limited to 'src/core/CL/cl_kernels/tile_helpers.h')
-rw-r--r-- | src/core/CL/cl_kernels/tile_helpers.h | 86 |
1 files changed, 76 insertions, 10 deletions
diff --git a/src/core/CL/cl_kernels/tile_helpers.h b/src/core/CL/cl_kernels/tile_helpers.h index acc174d04f..507e172dfb 100644 --- a/src/core/CL/cl_kernels/tile_helpers.h +++ b/src/core/CL/cl_kernels/tile_helpers.h @@ -130,8 +130,44 @@ uint name##_offset_first_element_in_bytes #define TENSOR4D_T_STR(name, type) TENSOR4D_T_##type(name) + +/** Legacy tensor 4D arguments + * + * @param[in] name Tensor name. The tensor name is the prefix of the tensor components + * @param[in] type Tensor type (BUFFER or IMAGE) + */ #define TENSOR4D_T(name, type) TENSOR4D_T_STR(name, type) +#define TENSOR4D_RO_T_IMAGE(name) \ + __read_only image2d_t name##_img, \ + TENSOR4D_T_BUFFER(name) + +#define TENSOR4D_RO_T_BUFFER(name) TENSOR4D_T_BUFFER(name) + +#define TENSOR4D_RO_T_STR(name, type) TENSOR4D_RO_T_##type(name) + +/** Read-Only (RO) tensor 4D. + * + * @param[in] name Tensor name. The tensor name is the prefix of the tensor components + * @param[in] type Tensor type (BUFFER or IMAGE) + */ +#define TENSOR4D_RO_T(name, type) TENSOR4D_RO_T_STR(name, type) + +#define TENSOR4D_WO_T_IMAGE(name) \ + __write_only image2d_t name##_img, \ + TENSOR4D_T_BUFFER(name) + +#define TENSOR4D_WO_T_BUFFER(name) TENSOR4D_T_BUFFER(name) + +#define TENSOR4D_WO_T_STR(name, type) TENSOR4D_WO_T_##type(name) + +/** Write-Only (WO) tensor 4D. + * + * @param[in] name Tensor name. The tensor name is the prefix of the tensor components + * @param[in] type Tensor type (BUFFER or IMAGE) + */ +#define TENSOR4D_WO_T(name, type) TENSOR4D_WO_T_STR(name, type) + #define TENSOR3D_T_IMAGE(name) \ __read_only image2d_t name##_img, \ __global uchar *name##_ptr, \ @@ -457,6 +493,25 @@ (0, (__global DATA_TYPE *)(TENSOR##_ptr + TENSOR##_offset_first_element_in_bytes + (X) * sizeof(DATA_TYPE) + (Y) * (STRIDE_Y))) #define V_LOAD_IMAGE(DATA_TYPE, WIDTH, TENSOR, X, Y, STRIDE_Y) READ_IMAGE2D(DATA_TYPE, CONVERT_VECTOR_SIZE_TO_PIXEL_UNIT(WIDTH), TENSOR##_img, (X) / 4, (Y)) +/** Store a vector in global memory (tensor) + * + * @param[in] DATA_TYPE Data type + * @param[in] WIDTH Number of dst columns + * @param[in] TENSOR_TYPE Type of cl_type used to store the tensor in global memory (BUFFER=cl_buffer, IMAGE=cl_image). + * In case of cl_image, only WIDTH multiples of 4 are supported (4, 8, 16) + * @param[in] TENSOR Tensor basename + * @param[in] X Starting X position + * @param[in] Y Starting Y position + * @param[in] STRIDE_Y Stride Y (in bytes) + * @param[in] VALUES Values to store in memory + */ +#define V_STORE(DATA_TYPE, WIDTH, TENSOR_TYPE, TENSOR, X, Y, STRIDE_Y, VALUES) V_STORE_STR(DATA_TYPE, WIDTH, TENSOR_TYPE, TENSOR, X, Y, STRIDE_Y, VALUES) +#define V_STORE_STR(DATA_TYPE, WIDTH, TENSOR_TYPE, TENSOR, X, Y, STRIDE_Y, VALUES) V_STORE_##TENSOR_TYPE(DATA_TYPE, WIDTH, TENSOR, X, Y, STRIDE_Y, VALUES) +#define V_STORE_BUFFER(DATA_TYPE, WIDTH, TENSOR, X, Y, STRIDE_Y, VALUES) \ + VSTORE(WIDTH) \ + (VALUES, 0, (__global DATA_TYPE *)(TENSOR##_ptr + TENSOR##_offset_first_element_in_bytes + (X) * sizeof(DATA_TYPE) + (Y) * (STRIDE_Y))) +#define V_STORE_IMAGE(DATA_TYPE, WIDTH, TENSOR, X, Y, STRIDE_Y, VALUES) WRITE_IMAGE2D(DATA_TYPE, CONVERT_VECTOR_SIZE_TO_PIXEL_UNIT(WIDTH), TENSOR##_img, (X) / 4, (Y), VALUES) + /** Load a tile from global memory (tensor) * * @param[in] DATA_TYPE Data type @@ -658,7 +713,8 @@ * @param[in] DATA_TYPE Data type * @param[in] TILE_AREA Number of elements to load from Y (height) dimension * Number of elements to load from X (width) dimension * @param[in] TILE_CHANNELS Number of elements to load from C (channel) dimension - * @param[in] TENSOR_TYPE Type of cl_type used to store the tensor in global memory (BUFFER=cl_buffer, IMAGE=cl_image). Currently BUFFER only is supported + * @param[in] TENSOR_TYPE Type of cl_type used to store the tensor in global memory (BUFFER=cl_buffer, IMAGE=cl_image). + * When TENSOR_TYPE=IMAGE, the if condition for the out-of-bound check can be skipped * In case of cl_image, only TILE_CHANNELS multiples of 4 are supported (4, 8, 16) * @param[in] TENSOR Tensor basename * @param[in] C Starting C index @@ -667,15 +723,25 @@ * 16 is the maximum indirect buffer size. * @param[out] dst Output tile */ -#define T_LOAD2D_INDIRECT(DATA_TYPE, TILE_AREA, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, STRIDE_Y, yi, dst) \ - ({ \ - LOOP_UNROLLING(int, _i, 0, 1, TILE_AREA, \ - { \ - if(yi[0].s[_i] >= 0) \ - { \ - dst[_i].v = V_LOAD(DATA_TYPE, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, yi[0].s[_i], STRIDE_Y); \ - } \ - }) \ +#define T_LOAD2D_INDIRECT(DATA_TYPE, TILE_AREA, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, STRIDE_Y, yi, dst) T_LOAD2D_INDIRECT_STR(DATA_TYPE, TILE_AREA, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, STRIDE_Y, yi, dst) +#define T_LOAD2D_INDIRECT_STR(DATA_TYPE, TILE_AREA, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, STRIDE_Y, yi, dst) T_LOAD2D_INDIRECT_##TENSOR_TYPE(DATA_TYPE, TILE_AREA, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, STRIDE_Y, yi, dst) +#define T_LOAD2D_INDIRECT_BUFFER(DATA_TYPE, TILE_AREA, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, STRIDE_Y, yi, dst) \ + ({ \ + LOOP_UNROLLING(int, _i, 0, 1, TILE_AREA, \ + { \ + if(yi[0].s[_i] >= 0) \ + { \ + dst[_i].v = V_LOAD(DATA_TYPE, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, yi[0].s[_i], STRIDE_Y); \ + } \ + }) \ + }) + +#define T_LOAD2D_INDIRECT_IMAGE(DATA_TYPE, TILE_AREA, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, STRIDE_Y, yi, dst) \ + ({ \ + LOOP_UNROLLING(int, _i, 0, 1, TILE_AREA, \ + { \ + dst[_i].v = V_LOAD(DATA_TYPE, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, yi[0].s[_i], STRIDE_Y); \ + }) \ }) /** Load a tile from global memory (tensor) when the tensor is stored using a NDHWC layout using indirect X, Y and Z coordinates |