aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2020-01-15 10:30:51 +0000
committerMichele Di Giorgio <michele.digiorgio@arm.com>2020-01-15 14:04:03 +0000
commite7b333ea718364ebeafd032461eb7b13aa8f3354 (patch)
tree409a56ee79ad708b90dcc934f61aad52ac78ca35
parentb66aa3b0f0fd81ae4eb383734045a55351776c7e (diff)
downloadComputeLibrary-e7b333ea718364ebeafd032461eb7b13aa8f3354.tar.gz
COMPMID-3007: Nightly CLGEMM fails in creating kernel on firefly64
Change-Id: Icdbe61761ba7955f6032b06fa72de0df438c28a5 Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/2588 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
-rw-r--r--src/core/CL/cl_kernels/gemmlowp.cl34
-rw-r--r--src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp4
2 files changed, 19 insertions, 19 deletions
diff --git a/src/core/CL/cl_kernels/gemmlowp.cl b/src/core/CL/cl_kernels/gemmlowp.cl
index 74ea96551d..239e039e10 100644
--- a/src/core/CL/cl_kernels/gemmlowp.cl
+++ b/src/core/CL/cl_kernels/gemmlowp.cl
@@ -990,8 +990,6 @@ __kernel void gemmlowp_mm_native(IMAGE_DECLARATION(lhs),
}
#endif // defined(M0) && defined(N0) && defined(K0) && defined(K)
-#endif // defined(DATA_TYPE) && defined(ACC_DATA_TYPE)
-
#if defined(COLS_A)
/** OpenCL kernel used to compute the row-vectors of sums of all the entries in each row of Matrix A.
*
@@ -1000,7 +998,7 @@ __kernel void gemmlowp_mm_native(IMAGE_DECLARATION(lhs),
*
* @attention The number of matrix A columns needs to be passed at compile time using -DCOLS_A
* @note The input data type must be passed at compile time using -DDATA_TYPE (i.e. -DDATA_TYPE=uchar)
- * @note The data type for the accumulation must be passed at compile time using -DDATA_ACC_TYPE (i.e. -DDATA_ACC_TYPE=uint)
+ * @note The data type for the accumulation must be passed at compile time using -DACC_DATA_TYPE (i.e. -DACC_DATA_TYPE=uint)
*
* @param[in] src_ptr Pointer to the source tensor. Supported data type: QASYMM8
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
@@ -1024,9 +1022,9 @@ __kernel void gemmlowp_matrix_a_reduction(TENSOR3D_DECLARATION(src),
Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
- VEC_DATA_TYPE(DATA_ACC_TYPE, 4)
- sum_row_32 = (VEC_DATA_TYPE(DATA_ACC_TYPE, 4))0;
- DATA_ACC_TYPE sum_row = 0;
+ VEC_DATA_TYPE(ACC_DATA_TYPE, 4)
+ sum_row_32 = (VEC_DATA_TYPE(ACC_DATA_TYPE, 4))0;
+ ACC_DATA_TYPE sum_row = 0;
__global const DATA_TYPE *matrix_a = (__global const DATA_TYPE *)(src.ptr + get_global_id(0) * src_stride_y + get_global_id(1) * src_stride_z);
@@ -1037,14 +1035,14 @@ __kernel void gemmlowp_matrix_a_reduction(TENSOR3D_DECLARATION(src),
{
const VEC_DATA_TYPE(DATA_TYPE, 16) a0 = vload16(0, matrix_a + i);
- sum_row_32 += CONVERT(a0.s0123, VEC_DATA_TYPE(DATA_ACC_TYPE, 4)) + CONVERT(a0.s4567, VEC_DATA_TYPE(DATA_ACC_TYPE, 4)) + CONVERT(a0.s89AB, VEC_DATA_TYPE(DATA_ACC_TYPE, 4)) + CONVERT(a0.sCDEF,
- VEC_DATA_TYPE(DATA_ACC_TYPE, 4));
+ sum_row_32 += CONVERT(a0.s0123, VEC_DATA_TYPE(ACC_DATA_TYPE, 4)) + CONVERT(a0.s4567, VEC_DATA_TYPE(ACC_DATA_TYPE, 4)) + CONVERT(a0.s89AB, VEC_DATA_TYPE(ACC_DATA_TYPE, 4)) + CONVERT(a0.sCDEF,
+ VEC_DATA_TYPE(ACC_DATA_TYPE, 4));
}
// This for loop performs the leftover accumulations
for(; i < COLS_A; ++i)
{
- sum_row += (DATA_ACC_TYPE)matrix_a[i];
+ sum_row += (ACC_DATA_TYPE)matrix_a[i];
}
sum_row += sum_row_32.s0 + sum_row_32.s1 + sum_row_32.s2 + sum_row_32.s3;
@@ -1060,7 +1058,7 @@ __kernel void gemmlowp_matrix_a_reduction(TENSOR3D_DECLARATION(src),
*
* @attention The number of matrix A columns needs to be passed at compile time using -DCOLS_A
* @note The input data type must be passed at compile time using -DDATA_TYPE (i.e. -DDATA_TYPE=uchar)
- * @note The data type for the accumulation must be passed at compile time using -DDATA_ACC_TYPE (i.e. -DDATA_ACC_TYPE=uint)
+ * @note The data type for the accumulation must be passed at compile time using -DACC_DATA_TYPE (i.e. -DACC_DATA_TYPE=uint)
*
* @param[in] src_ptr Pointer to the source tensor. Supported data type: QASYMM8
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
@@ -1084,7 +1082,7 @@ __kernel void gemmlowp_matrix_a_reduction_dot8(TENSOR3D_DECLARATION(src),
Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
- DATA_ACC_TYPE sum_row = 0;
+ ACC_DATA_TYPE sum_row = 0;
__global const DATA_TYPE *matrix_a = (__global const DATA_TYPE *)(src.ptr + get_global_id(0) * src_stride_y + get_global_id(1) * src_stride_z);
@@ -1112,7 +1110,7 @@ __kernel void gemmlowp_matrix_a_reduction_dot8(TENSOR3D_DECLARATION(src),
// This for loop performs the leftover accumulations
for(; i < COLS_A; ++i)
{
- sum_row += (DATA_ACC_TYPE)matrix_a[i];
+ sum_row += (ACC_DATA_TYPE)matrix_a[i];
}
*((__global int *)dst.ptr) = (int)sum_row;
@@ -1128,7 +1126,7 @@ __kernel void gemmlowp_matrix_a_reduction_dot8(TENSOR3D_DECLARATION(src),
*
* @attention The number of matrix B columns and rows needs to be passed at compile time using -DCOLS_B and -DROWS_B
* @note The input data type must be passed at compile time using -DDATA_TYPE (i.e. -DDATA_TYPE=uchar)
- * @note The data type for the accumulation must be passed at compile time using -DDATA_ACC_TYPE (i.e. -DDATA_ACC_TYPE=uint)
+ * @note The data type for the accumulation must be passed at compile time using -DACC_DATA_TYPE (i.e. -DACC_DATA_TYPE=uint)
*
* @param[in] src_ptr Pointer to the source tensor. Supported data type: QASYMM8
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
@@ -1152,8 +1150,8 @@ __kernel void gemmlowp_matrix_b_reduction(TENSOR3D_DECLARATION(src),
Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
- VEC_DATA_TYPE(DATA_ACC_TYPE, 16)
- sum_col_32 = (VEC_DATA_TYPE(DATA_ACC_TYPE, 16))0;
+ VEC_DATA_TYPE(ACC_DATA_TYPE, 16)
+ sum_col_32 = (VEC_DATA_TYPE(ACC_DATA_TYPE, 16))0;
__global const DATA_TYPE *matrix_b = (__global const DATA_TYPE *)(src.ptr + get_global_id(1) * src_stride_z);
@@ -1170,7 +1168,7 @@ __kernel void gemmlowp_matrix_b_reduction(TENSOR3D_DECLARATION(src),
const VEC_DATA_TYPE(DATA_TYPE, 16)
b3 = vload16(0, matrix_b + 3 * src_stride_y);
- sum_col_32 += CONVERT(b0, VEC_DATA_TYPE(DATA_ACC_TYPE, 16)) + CONVERT(b1, VEC_DATA_TYPE(DATA_ACC_TYPE, 16)) + CONVERT(b2, VEC_DATA_TYPE(DATA_ACC_TYPE, 16)) + CONVERT(b3, VEC_DATA_TYPE(DATA_ACC_TYPE,
+ sum_col_32 += CONVERT(b0, VEC_DATA_TYPE(ACC_DATA_TYPE, 16)) + CONVERT(b1, VEC_DATA_TYPE(ACC_DATA_TYPE, 16)) + CONVERT(b2, VEC_DATA_TYPE(ACC_DATA_TYPE, 16)) + CONVERT(b3, VEC_DATA_TYPE(ACC_DATA_TYPE,
16));
matrix_b += 4 * src_stride_y;
@@ -1182,7 +1180,7 @@ __kernel void gemmlowp_matrix_b_reduction(TENSOR3D_DECLARATION(src),
const VEC_DATA_TYPE(DATA_TYPE, 16)
b0 = vload16(0, matrix_b);
- sum_col_32 += CONVERT(b0, VEC_DATA_TYPE(DATA_ACC_TYPE, 16));
+ sum_col_32 += CONVERT(b0, VEC_DATA_TYPE(ACC_DATA_TYPE, 16));
matrix_b += src_stride_y;
}
@@ -1191,6 +1189,8 @@ __kernel void gemmlowp_matrix_b_reduction(TENSOR3D_DECLARATION(src),
}
#endif // defined(COLS_B) && defined(ROWS_B)
+#endif // defined(DATA_TYPE) && defined(ACC_DATA_TYPE)
+
#if defined(K_OFFSET)
/* Helper function used to calculate the offset contribution after matrix multiplication.
diff --git a/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp b/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp
index 7900c83f3d..b51d9fb886 100644
--- a/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp
@@ -113,7 +113,7 @@ void CLGEMMLowpMatrixAReductionKernel::configure(const ICLTensor *mtx_a, ICLTens
CLBuildOptions build_opts;
build_opts.add_option("-DCOLS_A=" + support::cpp11::to_string(mtx_a->info()->dimension(0)));
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(mtx_a->info()->data_type()));
- build_opts.add_option("-DDATA_ACC_TYPE=" + get_cl_dot8_acc_type_from_data_type(mtx_a->info()->data_type()));
+ build_opts.add_option("-DACC_DATA_TYPE=" + get_cl_dot8_acc_type_from_data_type(mtx_a->info()->data_type()));
const bool is_dot8_supported = dot8_supported(CLKernelLibrary::get().get_device());
@@ -181,7 +181,7 @@ void CLGEMMLowpMatrixBReductionKernel::configure(const ICLTensor *mtx_b, ICLTens
build_opts.add_option("-DCOLS_B=" + support::cpp11::to_string(mtx_b->info()->dimension(0)));
build_opts.add_option("-DROWS_B=" + support::cpp11::to_string(mtx_b->info()->dimension(1)));
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(mtx_b->info()->data_type()));
- build_opts.add_option("-DDATA_ACC_TYPE=" + get_cl_dot8_acc_type_from_data_type(mtx_b->info()->data_type()));
+ build_opts.add_option("-DACC_DATA_TYPE=" + get_cl_dot8_acc_type_from_data_type(mtx_b->info()->data_type()));
// Create kernel
_kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("gemmlowp_matrix_b_reduction", build_opts.options()));