From e7b333ea718364ebeafd032461eb7b13aa8f3354 Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Wed, 15 Jan 2020 10:30:51 +0000 Subject: COMPMID-3007: Nightly CLGEMM fails in creating kernel on firefly64 Change-Id: Icdbe61761ba7955f6032b06fa72de0df438c28a5 Signed-off-by: Michele Di Giorgio Reviewed-on: https://review.mlplatform.org/c/2588 Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas Comments-Addressed: Arm Jenkins Reviewed-by: Giorgio Arena --- src/core/CL/cl_kernels/gemmlowp.cl | 34 +++++++++++------------ src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp | 4 +-- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/core/CL/cl_kernels/gemmlowp.cl b/src/core/CL/cl_kernels/gemmlowp.cl index 74ea96551d..239e039e10 100644 --- a/src/core/CL/cl_kernels/gemmlowp.cl +++ b/src/core/CL/cl_kernels/gemmlowp.cl @@ -990,8 +990,6 @@ __kernel void gemmlowp_mm_native(IMAGE_DECLARATION(lhs), } #endif // defined(M0) && defined(N0) && defined(K0) && defined(K) -#endif // defined(DATA_TYPE) && defined(ACC_DATA_TYPE) - #if defined(COLS_A) /** OpenCL kernel used to compute the row-vectors of sums of all the entries in each row of Matrix A. * @@ -1000,7 +998,7 @@ __kernel void gemmlowp_mm_native(IMAGE_DECLARATION(lhs), * * @attention The number of matrix A columns needs to be passed at compile time using -DCOLS_A * @note The input data type must be passed at compile time using -DDATA_TYPE (i.e. -DDATA_TYPE=uchar) - * @note The data type for the accumulation must be passed at compile time using -DDATA_ACC_TYPE (i.e. -DDATA_ACC_TYPE=uint) + * @note The data type for the accumulation must be passed at compile time using -DACC_DATA_TYPE (i.e. -DACC_DATA_TYPE=uint) * * @param[in] src_ptr Pointer to the source tensor. Supported data type: QASYMM8 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) @@ -1024,9 +1022,9 @@ __kernel void gemmlowp_matrix_a_reduction(TENSOR3D_DECLARATION(src), Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src); Image dst = CONVERT_TO_IMAGE_STRUCT(dst); - VEC_DATA_TYPE(DATA_ACC_TYPE, 4) - sum_row_32 = (VEC_DATA_TYPE(DATA_ACC_TYPE, 4))0; - DATA_ACC_TYPE sum_row = 0; + VEC_DATA_TYPE(ACC_DATA_TYPE, 4) + sum_row_32 = (VEC_DATA_TYPE(ACC_DATA_TYPE, 4))0; + ACC_DATA_TYPE sum_row = 0; __global const DATA_TYPE *matrix_a = (__global const DATA_TYPE *)(src.ptr + get_global_id(0) * src_stride_y + get_global_id(1) * src_stride_z); @@ -1037,14 +1035,14 @@ __kernel void gemmlowp_matrix_a_reduction(TENSOR3D_DECLARATION(src), { const VEC_DATA_TYPE(DATA_TYPE, 16) a0 = vload16(0, matrix_a + i); - sum_row_32 += CONVERT(a0.s0123, VEC_DATA_TYPE(DATA_ACC_TYPE, 4)) + CONVERT(a0.s4567, VEC_DATA_TYPE(DATA_ACC_TYPE, 4)) + CONVERT(a0.s89AB, VEC_DATA_TYPE(DATA_ACC_TYPE, 4)) + CONVERT(a0.sCDEF, - VEC_DATA_TYPE(DATA_ACC_TYPE, 4)); + sum_row_32 += CONVERT(a0.s0123, VEC_DATA_TYPE(ACC_DATA_TYPE, 4)) + CONVERT(a0.s4567, VEC_DATA_TYPE(ACC_DATA_TYPE, 4)) + CONVERT(a0.s89AB, VEC_DATA_TYPE(ACC_DATA_TYPE, 4)) + CONVERT(a0.sCDEF, + VEC_DATA_TYPE(ACC_DATA_TYPE, 4)); } // This for loop performs the leftover accumulations for(; i < COLS_A; ++i) { - sum_row += (DATA_ACC_TYPE)matrix_a[i]; + sum_row += (ACC_DATA_TYPE)matrix_a[i]; } sum_row += sum_row_32.s0 + sum_row_32.s1 + sum_row_32.s2 + sum_row_32.s3; @@ -1060,7 +1058,7 @@ __kernel void gemmlowp_matrix_a_reduction(TENSOR3D_DECLARATION(src), * * @attention The number of matrix A columns needs to be passed at compile time using -DCOLS_A * @note The input data type must be passed at compile time using -DDATA_TYPE (i.e. -DDATA_TYPE=uchar) - * @note The data type for the accumulation must be passed at compile time using -DDATA_ACC_TYPE (i.e. -DDATA_ACC_TYPE=uint) + * @note The data type for the accumulation must be passed at compile time using -DACC_DATA_TYPE (i.e. -DACC_DATA_TYPE=uint) * * @param[in] src_ptr Pointer to the source tensor. Supported data type: QASYMM8 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) @@ -1084,7 +1082,7 @@ __kernel void gemmlowp_matrix_a_reduction_dot8(TENSOR3D_DECLARATION(src), Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src); Image dst = CONVERT_TO_IMAGE_STRUCT(dst); - DATA_ACC_TYPE sum_row = 0; + ACC_DATA_TYPE sum_row = 0; __global const DATA_TYPE *matrix_a = (__global const DATA_TYPE *)(src.ptr + get_global_id(0) * src_stride_y + get_global_id(1) * src_stride_z); @@ -1112,7 +1110,7 @@ __kernel void gemmlowp_matrix_a_reduction_dot8(TENSOR3D_DECLARATION(src), // This for loop performs the leftover accumulations for(; i < COLS_A; ++i) { - sum_row += (DATA_ACC_TYPE)matrix_a[i]; + sum_row += (ACC_DATA_TYPE)matrix_a[i]; } *((__global int *)dst.ptr) = (int)sum_row; @@ -1128,7 +1126,7 @@ __kernel void gemmlowp_matrix_a_reduction_dot8(TENSOR3D_DECLARATION(src), * * @attention The number of matrix B columns and rows needs to be passed at compile time using -DCOLS_B and -DROWS_B * @note The input data type must be passed at compile time using -DDATA_TYPE (i.e. -DDATA_TYPE=uchar) - * @note The data type for the accumulation must be passed at compile time using -DDATA_ACC_TYPE (i.e. -DDATA_ACC_TYPE=uint) + * @note The data type for the accumulation must be passed at compile time using -DACC_DATA_TYPE (i.e. -DACC_DATA_TYPE=uint) * * @param[in] src_ptr Pointer to the source tensor. Supported data type: QASYMM8 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) @@ -1152,8 +1150,8 @@ __kernel void gemmlowp_matrix_b_reduction(TENSOR3D_DECLARATION(src), Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src); Image dst = CONVERT_TO_IMAGE_STRUCT(dst); - VEC_DATA_TYPE(DATA_ACC_TYPE, 16) - sum_col_32 = (VEC_DATA_TYPE(DATA_ACC_TYPE, 16))0; + VEC_DATA_TYPE(ACC_DATA_TYPE, 16) + sum_col_32 = (VEC_DATA_TYPE(ACC_DATA_TYPE, 16))0; __global const DATA_TYPE *matrix_b = (__global const DATA_TYPE *)(src.ptr + get_global_id(1) * src_stride_z); @@ -1170,7 +1168,7 @@ __kernel void gemmlowp_matrix_b_reduction(TENSOR3D_DECLARATION(src), const VEC_DATA_TYPE(DATA_TYPE, 16) b3 = vload16(0, matrix_b + 3 * src_stride_y); - sum_col_32 += CONVERT(b0, VEC_DATA_TYPE(DATA_ACC_TYPE, 16)) + CONVERT(b1, VEC_DATA_TYPE(DATA_ACC_TYPE, 16)) + CONVERT(b2, VEC_DATA_TYPE(DATA_ACC_TYPE, 16)) + CONVERT(b3, VEC_DATA_TYPE(DATA_ACC_TYPE, + sum_col_32 += CONVERT(b0, VEC_DATA_TYPE(ACC_DATA_TYPE, 16)) + CONVERT(b1, VEC_DATA_TYPE(ACC_DATA_TYPE, 16)) + CONVERT(b2, VEC_DATA_TYPE(ACC_DATA_TYPE, 16)) + CONVERT(b3, VEC_DATA_TYPE(ACC_DATA_TYPE, 16)); matrix_b += 4 * src_stride_y; @@ -1182,7 +1180,7 @@ __kernel void gemmlowp_matrix_b_reduction(TENSOR3D_DECLARATION(src), const VEC_DATA_TYPE(DATA_TYPE, 16) b0 = vload16(0, matrix_b); - sum_col_32 += CONVERT(b0, VEC_DATA_TYPE(DATA_ACC_TYPE, 16)); + sum_col_32 += CONVERT(b0, VEC_DATA_TYPE(ACC_DATA_TYPE, 16)); matrix_b += src_stride_y; } @@ -1191,6 +1189,8 @@ __kernel void gemmlowp_matrix_b_reduction(TENSOR3D_DECLARATION(src), } #endif // defined(COLS_B) && defined(ROWS_B) +#endif // defined(DATA_TYPE) && defined(ACC_DATA_TYPE) + #if defined(K_OFFSET) /* Helper function used to calculate the offset contribution after matrix multiplication. diff --git a/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp b/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp index 7900c83f3d..b51d9fb886 100644 --- a/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp @@ -113,7 +113,7 @@ void CLGEMMLowpMatrixAReductionKernel::configure(const ICLTensor *mtx_a, ICLTens CLBuildOptions build_opts; build_opts.add_option("-DCOLS_A=" + support::cpp11::to_string(mtx_a->info()->dimension(0))); build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(mtx_a->info()->data_type())); - build_opts.add_option("-DDATA_ACC_TYPE=" + get_cl_dot8_acc_type_from_data_type(mtx_a->info()->data_type())); + build_opts.add_option("-DACC_DATA_TYPE=" + get_cl_dot8_acc_type_from_data_type(mtx_a->info()->data_type())); const bool is_dot8_supported = dot8_supported(CLKernelLibrary::get().get_device()); @@ -181,7 +181,7 @@ void CLGEMMLowpMatrixBReductionKernel::configure(const ICLTensor *mtx_b, ICLTens build_opts.add_option("-DCOLS_B=" + support::cpp11::to_string(mtx_b->info()->dimension(0))); build_opts.add_option("-DROWS_B=" + support::cpp11::to_string(mtx_b->info()->dimension(1))); build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(mtx_b->info()->data_type())); - build_opts.add_option("-DDATA_ACC_TYPE=" + get_cl_dot8_acc_type_from_data_type(mtx_b->info()->data_type())); + build_opts.add_option("-DACC_DATA_TYPE=" + get_cl_dot8_acc_type_from_data_type(mtx_b->info()->data_type())); // Create kernel _kernel = static_cast(CLKernelLibrary::get().create_kernel("gemmlowp_matrix_b_reduction", build_opts.options())); -- cgit v1.2.1