aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/gemm.cl
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2017-10-20 18:52:20 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:35:24 +0000
commit96880cf00707d394938ec7fe31c21c79a2ac3f0c (patch)
treee7022f32813dd4df943b9e3b9d0948019d9b2af2 /src/core/CL/cl_kernels/gemm.cl
parent0a7a8d19d7c172a05402578b1e8ddd8ecb825643 (diff)
downloadComputeLibrary-96880cf00707d394938ec7fe31c21c79a2ac3f0c.tar.gz
COMPMID-640: FullyConnectedLayer failures on both NEON/CL
Change-Id: Idd830cff054114123229c189e423b753b8064146 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/92623 Reviewed-by: Robert Hughes <robert.hughes@arm.com> Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/gemm.cl')
-rw-r--r--src/core/CL/cl_kernels/gemm.cl6
1 files changed, 3 insertions, 3 deletions
diff --git a/src/core/CL/cl_kernels/gemm.cl b/src/core/CL/cl_kernels/gemm.cl
index 35a2e4704f..7f2a08bc2c 100644
--- a/src/core/CL/cl_kernels/gemm.cl
+++ b/src/core/CL/cl_kernels/gemm.cl
@@ -1040,7 +1040,7 @@ __kernel void gemm_mm_floating_point(IMAGE_DECLARATION(src0),
VECTOR_TYPE acc3 = 0.0f;
#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
- for(; src_addr.s0 <= (end_row_vec_a - 2 * sizeof(DATA_TYPE)); src_addr += (int2)(2 * sizeof(DATA_TYPE), 2 * src1_stride_y))
+ for(; src_addr.s0 <= (end_row_vec_a - 2 * (int)sizeof(DATA_TYPE)); src_addr += (int2)(2 * sizeof(DATA_TYPE), 2 * src1_stride_y))
{
// Load values from matrix A
VEC_DATA_TYPE(DATA_TYPE, 2)
@@ -1344,7 +1344,7 @@ __kernel void gemm_mm_qs16(IMAGE_DECLARATION(src0),
#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
// This for loop performs 4 accumulations per iteration
- for(; src_addr.s0 <= (end_row_vec_a - 2 * sizeof(short)); src_addr += (int2)(2 * sizeof(short), 2 * src1_stride_y))
+ for(; src_addr.s0 <= (end_row_vec_a - 2 * (int)sizeof(short)); src_addr += (int2)(2 * sizeof(short), 2 * src1_stride_y))
{
short2 a0 = vload2(0, (__global short *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y));
#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
@@ -1629,7 +1629,7 @@ __kernel void gemm_lc_vm_f32(IMAGE_DECLARATION(src0),
float4 acc = 0.0f;
- for(; src_addr.s0 <= (end_row_vec_a - 2 * sizeof(float)); src_addr += (int2)(2 * sizeof(float), 2 * src1_stride_y))
+ for(; src_addr.s0 <= (end_row_vec_a - 2 * (int)sizeof(float)); src_addr += (int2)(2 * sizeof(float), 2 * src1_stride_y))
{
float2 a0 = vload2(0, (__global float *)(src0_ptr + src_addr.s0));
float4 b0 = vload4(0, (__global float *)(src1_ptr + src_addr.s1));