From dbbe4a31f8eac4bceffa3805640c658bed25ce18 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Wed, 18 Jul 2018 18:06:32 +0100 Subject: COMPMID-1412; (Nightly) OCLGrind failures in input_transform CL NHWC Change-Id: I14d19be90b6c56f2259a2c94fd793829cfb55328 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/140538 Tested-by: Jenkins Reviewed-by: Pablo Tello Reviewed-by: Giorgio Arena --- src/core/CL/cl_kernels/winograd_input_transform.cl | 150 ++++++++++----------- 1 file changed, 75 insertions(+), 75 deletions(-) diff --git a/src/core/CL/cl_kernels/winograd_input_transform.cl b/src/core/CL/cl_kernels/winograd_input_transform.cl index fcd1b3b9ce..da18e4ab5b 100644 --- a/src/core/CL/cl_kernels/winograd_input_transform.cl +++ b/src/core/CL/cl_kernels/winograd_input_transform.cl @@ -616,8 +616,8 @@ __kernel void winograd_input_transform_4x4_3x3_stepz1_nhwc( // Clamp coordinates. This clamp is valid for all rows int4 y_coord0 = (int4)(y * OUTPUT_TILE_W) + (int4)(0, 1, 2, 3) - (int4)PAD_LEFT; int2 y_coord1 = (int2)(y * OUTPUT_TILE_W) + (int2)(4, 5) - (int2)PAD_LEFT; - y_coord0 = clamp(y_coord0, -1, SRC_DIM_1); - y_coord1 = clamp(y_coord1, -1, SRC_DIM_1); + y_coord0 = clamp(y_coord0, (int4) - 1, (int4)SRC_DIM_1); + y_coord1 = clamp(y_coord1, (int2) - 1, (int2)SRC_DIM_1); int z_coord; int4 valid_y0; @@ -625,17 +625,17 @@ __kernel void winograd_input_transform_4x4_3x3_stepz1_nhwc( #if !defined(WINOGRAD_INPUT_TRANSFORM_HORIZONTAL) && !defined(WINOGRAD_INPUT_TRANSFORM_VERTICAL) // Row4 - z_coord = (z * 4) - PAD_TOP + 4; + z_coord = (z * 4) - (int)PAD_TOP + 4; // If z < 0, set y to -1 - valid_y0 = select(y_coord0, -1, (int4)z_coord < 0); - valid_y1 = select(y_coord1, -1, (int2)z_coord < 0); + valid_y0 = select(y_coord0, (int4) - 1, (int4)z_coord < 0); + valid_y1 = select(y_coord1, (int2) - 1, (int2)z_coord < 0); // If z >= SRC_DIM_2, set y to SRC_DIM_2 - valid_y0 = select(valid_y0, SRC_DIM_1, (int4)z_coord >= SRC_DIM_2); - valid_y1 = select(valid_y1, SRC_DIM_1, (int2)z_coord >= SRC_DIM_2); + valid_y0 = select(valid_y0, (int4)SRC_DIM_1, (int4)z_coord >= (int)SRC_DIM_2); + valid_y1 = select(valid_y1, (int2)SRC_DIM_1, (int2)z_coord >= (int)SRC_DIM_2); // Clamp z coordinate - z_coord = clamp(z_coord, 0, SRC_DIM_2 - 1); + z_coord = clamp(z_coord, 0, (int)SRC_DIM_2 - 1); float d40 = *(__global float *)(src_addr + valid_y0.s0 * (int)src_stride_y + z_coord * src_stride_z); float d41 = *(__global float *)(src_addr + valid_y0.s1 * (int)src_stride_y + z_coord * src_stride_z); @@ -661,14 +661,14 @@ __kernel void winograd_input_transform_4x4_3x3_stepz1_nhwc( #if !defined(WINOGRAD_INPUT_TRANSFORM_VERTICAL) // Row0 - z_coord = (z * OUTPUT_TILE_H) - PAD_TOP + 0; + z_coord = (z * (int)OUTPUT_TILE_H) - (int)PAD_TOP + 0; #if PAD_TOP != 0 - valid_y0 = select(y_coord0, -1, (int4)z_coord < 0); - valid_y1 = select(y_coord1, -1, (int2)z_coord < 0); - valid_y0 = select(valid_y0, SRC_DIM_1, (int4)z_coord >= SRC_DIM_2); - valid_y1 = select(valid_y1, SRC_DIM_1, (int2)z_coord >= SRC_DIM_2); - z_coord = clamp(z_coord, 0, SRC_DIM_2 - 1); + valid_y0 = select(y_coord0, (int4) - 1, (int4)z_coord < 0); + valid_y1 = select(y_coord1, (int2) - 1, (int2)z_coord < 0); + valid_y0 = select(valid_y0, (int)SRC_DIM_1, (int4)z_coord >= (int)SRC_DIM_2); + valid_y1 = select(valid_y1, (int)SRC_DIM_1, (int2)z_coord >= (int)SRC_DIM_2); + z_coord = clamp(z_coord, 0, (int)SRC_DIM_2 - 1); #else // PAD_TOP != 0 valid_y0 = y_coord0; valid_y1 = y_coord1; @@ -689,8 +689,8 @@ __kernel void winograd_input_transform_4x4_3x3_stepz1_nhwc( valid_y0 = select(valid_y0, (int4)SRC_DIM_1, z_coords0 >= (int4)SRC_DIM_2); valid_y1 = select(valid_y1, (int2)SRC_DIM_1, z_coords1 >= (int2)SRC_DIM_2); - z_coords0 = clamp((int4)z_coords0, (int4)0, (int4)(SRC_DIM_2 - 1)); - z_coords1 = clamp((int2)z_coords1, (int2)0, (int2)(SRC_DIM_2 - 1)); + z_coords0 = clamp((int4)z_coords0, (int4)0, (int4)((int)SRC_DIM_2 - 1)); + z_coords1 = clamp((int2)z_coords1, (int2)0, (int2)((int)SRC_DIM_2 - 1)); float d00 = *(__global float *)(src_addr + valid_y0.s0 * (int)src_stride_y + z_coords0.s0 * src_stride_z); float d01 = *(__global float *)(src_addr + valid_y0.s1 * (int)src_stride_y + z_coords0.s1 * src_stride_z); @@ -709,12 +709,12 @@ __kernel void winograd_input_transform_4x4_3x3_stepz1_nhwc( #if !defined(WINOGRAD_INPUT_TRANSFORM_HORIZONTAL) && !defined(WINOGRAD_INPUT_TRANSFORM_VERTICAL) // Row2 - z_coord = (z * OUTPUT_TILE_H) - PAD_TOP + 2; - valid_y0 = select(y_coord0, -1, (int4)z_coord < 0); - valid_y1 = select(y_coord1, -1, (int2)z_coord < 0); - valid_y0 = select(valid_y0, SRC_DIM_1, (int4)z_coord >= SRC_DIM_2); - valid_y1 = select(valid_y1, SRC_DIM_1, (int2)z_coord >= SRC_DIM_2); - z_coord = clamp(z_coord, 0, SRC_DIM_2 - 1); + z_coord = (z * (int)OUTPUT_TILE_H) - (int)PAD_TOP + 2; + valid_y0 = select(y_coord0, (int4) - 1, (int4)z_coord < 0); + valid_y1 = select(y_coord1, (int2) - 1, (int2)z_coord < 0); + valid_y0 = select(valid_y0, (int4)SRC_DIM_1, (int4)z_coord >= (int)SRC_DIM_2); + valid_y1 = select(valid_y1, (int2)SRC_DIM_1, (int2)z_coord >= (int)SRC_DIM_2); + z_coord = clamp(z_coord, 0, (int)SRC_DIM_2 - 1); float d20 = *(__global float *)(src_addr + valid_y0.s0 * (int)src_stride_y + z_coord * src_stride_z); float d21 = *(__global float *)(src_addr + valid_y0.s1 * (int)src_stride_y + z_coord * src_stride_z); @@ -782,7 +782,7 @@ __kernel void winograd_input_transform_4x4_3x3_stepz1_nhwc( #if !defined(WINOGRAD_INPUT_TRANSFORM_HORIZONTAL) && !defined(WINOGRAD_INPUT_TRANSFORM_VERTICAL) // Row1 - z_coord = (z * OUTPUT_TILE_H) - PAD_TOP + 1; + z_coord = (z * (int)OUTPUT_TILE_H) - (int)PAD_TOP + 1; // Row1 can never be out of bounds valid_y0 = y_coord0; valid_y1 = y_coord1; @@ -795,13 +795,13 @@ __kernel void winograd_input_transform_4x4_3x3_stepz1_nhwc( float d15 = *(__global float *)(src_addr + valid_y1.s1 * (int)src_stride_y + z_coord * src_stride_z); // Row3 - z_coord = (z * OUTPUT_TILE_H) - PAD_TOP + 3; - valid_y0 = select(y_coord0, -1, (int4)z_coord < 0); - valid_y1 = select(y_coord1, -1, (int2)z_coord < 0); - valid_y0 = select(valid_y0, SRC_DIM_1, (int4)z_coord >= SRC_DIM_2); - valid_y1 = select(valid_y1, SRC_DIM_1, (int2)z_coord >= SRC_DIM_2); - z_coord = clamp(z_coord, 0, SRC_DIM_2 - 1); - z_coord = clamp(z_coord, 0, SRC_DIM_2 - 1); + z_coord = (z * (int)OUTPUT_TILE_H) - (int)PAD_TOP + 3; + valid_y0 = select(y_coord0, (int4) - 1, (int4)z_coord < 0); + valid_y1 = select(y_coord1, (int2) - 1, (int2)z_coord < 0); + valid_y0 = select(valid_y0, (int4)SRC_DIM_1, (int4)z_coord >= (int)SRC_DIM_2); + valid_y1 = select(valid_y1, (int2)SRC_DIM_1, (int2)z_coord >= (int)SRC_DIM_2); + z_coord = clamp(z_coord, 0, (int)SRC_DIM_2 - 1); + z_coord = clamp(z_coord, 0, (int)SRC_DIM_2 - 1); float d30 = *(__global float *)(src_addr + valid_y0.s0 * (int)src_stride_y + z_coord * src_stride_z); float d31 = *(__global float *)(src_addr + valid_y0.s1 * (int)src_stride_y + z_coord * src_stride_z); @@ -918,13 +918,13 @@ __kernel void winograd_input_transform_4x4_3x3_stepz1_nhwc( dst_addr += dst_plane_stride; // Row5 - z_coord = (z * OUTPUT_TILE_H) - PAD_TOP + 5; - valid_y0 = select(y_coord0, -1, (int4)z_coord < 0); - valid_y1 = select(y_coord1, -1, (int2)z_coord < 0); - valid_y0 = select(valid_y0, SRC_DIM_1, (int4)z_coord >= SRC_DIM_2); - valid_y1 = select(valid_y1, SRC_DIM_1, (int2)z_coord >= SRC_DIM_2); - z_coord = clamp(z_coord, 0, SRC_DIM_2 - 1); - z_coord = clamp(z_coord, 0, SRC_DIM_2 - 1); + z_coord = (z * (int)OUTPUT_TILE_H) - (int)PAD_TOP + 5; + valid_y0 = select(y_coord0, (int4) - 1, (int4)z_coord < 0); + valid_y1 = select(y_coord1, (int2) - 1, (int2)z_coord < 0); + valid_y0 = select(valid_y0, (int4)SRC_DIM_1, (int4)z_coord >= (int)SRC_DIM_2); + valid_y1 = select(valid_y1, (int2)SRC_DIM_1, (int2)z_coord >= (int)SRC_DIM_2); + z_coord = clamp(z_coord, 0, (int)SRC_DIM_2 - 1); + z_coord = clamp(z_coord, 0, (int)SRC_DIM_2 - 1); float d50 = *(__global float *)(src_addr + valid_y0.s0 * (int)src_stride_y + z_coord * src_stride_z); float d51 = *(__global float *)(src_addr + valid_y0.s1 * (int)src_stride_y + z_coord * src_stride_z); @@ -998,7 +998,7 @@ __kernel void winograd_input_transform_4x4_5x5_stepz1_nhwc( #if defined(WINOGRAD_INPUT_TRANSFORM_HORIZONTAL) // Clamp coordinates. This clamp is valid for all rows int8 y_coord = (int8)(y * OUTPUT_TILE_W) + (int8)(0, 1, 2, 3, 4, 5, 6, 7) - (int8)PAD_LEFT; - y_coord = clamp(y_coord, -1, SRC_DIM_1); + y_coord = clamp(y_coord, (int8) - 1, (int8)SRC_DIM_1); // Row0 // We can skip the border clamping along the z dimension as we cannot read out-of-bound in case of 5x1 kernels @@ -1025,14 +1025,14 @@ __kernel void winograd_input_transform_4x4_5x5_stepz1_nhwc( #elif defined(WINOGRAD_INPUT_TRANSFORM_VERTICAL) // defined(WINOGRAD_INPUT_TRANSFORM_HORIZONTAL) // We can skip the border clamping along the y dimension as we cannot read out-of-bound in case of 1x5 kernels - int y_coord = y * OUTPUT_TILE_W; + int y_coord = y * (int)OUTPUT_TILE_W; // Row0 // We can skip the border clamping along the z dimension as we cannot read out-of-bound in case of 5x1 kernels int8 z_coord = (int8)(z * OUTPUT_TILE_H) + (int8)(0, 1, 2, 3, 4, 5, 6, 7) - (int8)PAD_TOP; - int8 valid_y = select((int8)y_coord, (int8) - 1, z_coord < (int8)0); // If z < 0, set y to -1 - valid_y = select(valid_y, SRC_DIM_1, z_coord >= (int8)SRC_DIM_2); // If z >= SRC_DIM_2, set y to SRC_DIM_2 - z_coord = clamp(z_coord, 0, SRC_DIM_2 - 1); // Clamp z coordinate + int8 valid_y = select((int8)y_coord, (int8) - 1, z_coord < (int8)0); // If z < 0, set y to -1 + valid_y = select(valid_y, (int8)SRC_DIM_1, z_coord >= (int8)SRC_DIM_2); // If z >= SRC_DIM_2, set y to SRC_DIM_2 + z_coord = clamp(z_coord, (int8)0, (int8)SRC_DIM_2 - 1); // Clamp z coordinate // Load the input tile float8 in_row0; @@ -1057,13 +1057,13 @@ __kernel void winograd_input_transform_4x4_5x5_stepz1_nhwc( // Clamp coordinates. This clamp is valid for all rows int8 y_coord = (int8)(y * OUTPUT_TILE_W) + (int8)(0, 1, 2, 3, 4, 5, 6, 7) - (int8)PAD_LEFT; - y_coord = clamp(y_coord, -1, SRC_DIM_1); + y_coord = clamp(y_coord, (int8) - 1, (int8)SRC_DIM_1); // Row0 - int z_coord = (z * OUTPUT_TILE_H) - PAD_TOP + 0; - int8 valid_y = select(y_coord, -1, (int8)z_coord < 0); // If z < 0, set y to -1 - valid_y = select(valid_y, SRC_DIM_1, (int8)z_coord >= SRC_DIM_2); // If z >= SRC_DIM_2, set y to SRC_DIM_2 - z_coord = clamp(z_coord, 0, SRC_DIM_2 - 1); // Clamp z coordinate + int z_coord = (z * (int)OUTPUT_TILE_H) - (int)PAD_TOP + 0; + int8 valid_y = select(y_coord, (int8) - 1, (int8)z_coord < 0); // If z < 0, set y to -1 + valid_y = select(valid_y, (int8)SRC_DIM_1, (int8)z_coord >= (int)SRC_DIM_2); // If z >= SRC_DIM_2, set y to SRC_DIM_2 + z_coord = clamp(z_coord, 0, (int)SRC_DIM_2 - 1); // Clamp z coordinate // Load the input tile in_row0.s0 = *(__global float *)(src_addr + valid_y.s0 * (int)src_stride_y + z_coord * src_stride_z); @@ -1076,10 +1076,10 @@ __kernel void winograd_input_transform_4x4_5x5_stepz1_nhwc( in_row0.s7 = *(__global float *)(src_addr + valid_y.s7 * (int)src_stride_y + z_coord * src_stride_z); // Row1 - z_coord = (z * OUTPUT_TILE_H) - PAD_TOP + 1; - valid_y = select(y_coord, -1, (int8)z_coord < 0); - valid_y = select(valid_y, SRC_DIM_1, (int8)z_coord >= SRC_DIM_2); - z_coord = clamp(z_coord, 0, SRC_DIM_2 - 1); + z_coord = (z * (int)OUTPUT_TILE_H) - (int)PAD_TOP + 1; + valid_y = select(y_coord, (int8) - 1, (int8)z_coord < 0); + valid_y = select(valid_y, (int8)SRC_DIM_1, (int8)z_coord >= (int)SRC_DIM_2); + z_coord = clamp(z_coord, 0, (int)SRC_DIM_2 - 1); in_row1.s0 = *(__global float *)(src_addr + valid_y.s0 * (int)src_stride_y + z_coord * src_stride_z); in_row1.s1 = *(__global float *)(src_addr + valid_y.s1 * (int)src_stride_y + z_coord * src_stride_z); @@ -1091,10 +1091,10 @@ __kernel void winograd_input_transform_4x4_5x5_stepz1_nhwc( in_row1.s7 = *(__global float *)(src_addr + valid_y.s7 * (int)src_stride_y + z_coord * src_stride_z); // Row2 - z_coord = (z * OUTPUT_TILE_H) - PAD_TOP + 2; - valid_y = select(y_coord, -1, (int8)z_coord < 0); - valid_y = select(valid_y, SRC_DIM_1, (int8)z_coord >= SRC_DIM_2); - z_coord = clamp(z_coord, 0, SRC_DIM_2 - 1); + z_coord = (z * (int)OUTPUT_TILE_H) - (int)PAD_TOP + 2; + valid_y = select(y_coord, (int8) - 1, (int8)z_coord < 0); + valid_y = select(valid_y, (int8)SRC_DIM_1, (int8)z_coord >= (int)SRC_DIM_2); + z_coord = clamp(z_coord, 0, (int)SRC_DIM_2 - 1); in_row2.s0 = *(__global float *)(src_addr + valid_y.s0 * (int)src_stride_y + z_coord * src_stride_z); in_row2.s1 = *(__global float *)(src_addr + valid_y.s1 * (int)src_stride_y + z_coord * src_stride_z); @@ -1106,10 +1106,10 @@ __kernel void winograd_input_transform_4x4_5x5_stepz1_nhwc( in_row2.s7 = *(__global float *)(src_addr + valid_y.s7 * (int)src_stride_y + z_coord * src_stride_z); // Row3 - z_coord = (z * OUTPUT_TILE_H) - PAD_TOP + 3; - valid_y = select(y_coord, -1, (int8)z_coord < 0); - valid_y = select(valid_y, SRC_DIM_1, (int8)z_coord >= SRC_DIM_2); - z_coord = clamp(z_coord, 0, SRC_DIM_2 - 1); + z_coord = (z * (int)OUTPUT_TILE_H) - (int)PAD_TOP + 3; + valid_y = select(y_coord, (int8) - 1, (int8)z_coord < 0); + valid_y = select(valid_y, (int8)SRC_DIM_1, (int8)z_coord >= (int)SRC_DIM_2); + z_coord = clamp(z_coord, 0, (int)SRC_DIM_2 - 1); in_row3.s0 = *(__global float *)(src_addr + valid_y.s0 * (int)src_stride_y + z_coord * src_stride_z); in_row3.s1 = *(__global float *)(src_addr + valid_y.s1 * (int)src_stride_y + z_coord * src_stride_z); @@ -1121,10 +1121,10 @@ __kernel void winograd_input_transform_4x4_5x5_stepz1_nhwc( in_row3.s7 = *(__global float *)(src_addr + valid_y.s7 * (int)src_stride_y + z_coord * src_stride_z); // Row4 - z_coord = (z * OUTPUT_TILE_H) - PAD_TOP + 4; - valid_y = select(y_coord, -1, (int8)z_coord < 0); - valid_y = select(valid_y, SRC_DIM_1, (int8)z_coord >= SRC_DIM_2); - z_coord = clamp(z_coord, 0, SRC_DIM_2 - 1); + z_coord = (z * (int)OUTPUT_TILE_H) - (int)PAD_TOP + 4; + valid_y = select(y_coord, (int8) - 1, (int8)z_coord < 0); + valid_y = select(valid_y, (int8)SRC_DIM_1, (int8)z_coord >= (int)SRC_DIM_2); + z_coord = clamp(z_coord, 0, (int)SRC_DIM_2 - 1); in_row4.s0 = *(__global float *)(src_addr + valid_y.s0 * (int)src_stride_y + z_coord * src_stride_z); in_row4.s1 = *(__global float *)(src_addr + valid_y.s1 * (int)src_stride_y + z_coord * src_stride_z); @@ -1136,10 +1136,10 @@ __kernel void winograd_input_transform_4x4_5x5_stepz1_nhwc( in_row4.s7 = *(__global float *)(src_addr + valid_y.s7 * (int)src_stride_y + z_coord * src_stride_z); // Row5 - z_coord = (z * OUTPUT_TILE_H) - PAD_TOP + 5; - valid_y = select(y_coord, -1, (int8)z_coord < 0); - valid_y = select(valid_y, SRC_DIM_1, (int8)z_coord >= SRC_DIM_2); - z_coord = clamp(z_coord, 0, SRC_DIM_2 - 1); + z_coord = (z * (int)OUTPUT_TILE_H) - (int)PAD_TOP + 5; + valid_y = select(y_coord, (int8) - 1, (int8)z_coord < 0); + valid_y = select(valid_y, (int8)SRC_DIM_1, (int8)z_coord >= (int)SRC_DIM_2); + z_coord = clamp(z_coord, 0, (int)SRC_DIM_2 - 1); in_row5.s0 = *(__global float *)(src_addr + valid_y.s0 * (int)src_stride_y + z_coord * src_stride_z); in_row5.s1 = *(__global float *)(src_addr + valid_y.s1 * (int)src_stride_y + z_coord * src_stride_z); @@ -1151,10 +1151,10 @@ __kernel void winograd_input_transform_4x4_5x5_stepz1_nhwc( in_row5.s7 = *(__global float *)(src_addr + valid_y.s7 * (int)src_stride_y + z_coord * src_stride_z); // Row6 - z_coord = (z * OUTPUT_TILE_H) - PAD_TOP + 6; - valid_y = select(y_coord, -1, (int8)z_coord < 0); - valid_y = select(valid_y, SRC_DIM_1, (int8)z_coord >= SRC_DIM_2); - z_coord = clamp(z_coord, 0, SRC_DIM_2 - 1); + z_coord = (z * (int)OUTPUT_TILE_H) - (int)PAD_TOP + 6; + valid_y = select(y_coord, (int8) - 1, (int8)z_coord < 0); + valid_y = select(valid_y, (int8)SRC_DIM_1, (int8)z_coord >= (int)SRC_DIM_2); + z_coord = clamp(z_coord, 0, (int)SRC_DIM_2 - 1); in_row6.s0 = *(__global float *)(src_addr + valid_y.s0 * (int)src_stride_y + z_coord * src_stride_z); in_row6.s1 = *(__global float *)(src_addr + valid_y.s1 * (int)src_stride_y + z_coord * src_stride_z); @@ -1166,10 +1166,10 @@ __kernel void winograd_input_transform_4x4_5x5_stepz1_nhwc( in_row6.s7 = *(__global float *)(src_addr + valid_y.s7 * (int)src_stride_y + z_coord * src_stride_z); // Row7 - z_coord = (z * OUTPUT_TILE_H) - PAD_TOP + 7; - valid_y = select(y_coord, -1, (int8)z_coord < 0); - valid_y = select(valid_y, SRC_DIM_1, (int8)z_coord >= SRC_DIM_2); - z_coord = clamp(z_coord, 0, SRC_DIM_2 - 1); + z_coord = (z * (int)OUTPUT_TILE_H) - (int)PAD_TOP + 7; + valid_y = select(y_coord, (int8) - 1, (int8)z_coord < 0); + valid_y = select(valid_y, (int8)SRC_DIM_1, (int8)z_coord >= (int)SRC_DIM_2); + z_coord = clamp(z_coord, 0, (int)SRC_DIM_2 - 1); in_row7.s0 = *(__global float *)(src_addr + valid_y.s0 * (int)src_stride_y + z_coord * src_stride_z); in_row7.s1 = *(__global float *)(src_addr + valid_y.s1 * (int)src_stride_y + z_coord * src_stride_z); -- cgit v1.2.1