diff options
author | Gian Marco Iodice <gianmarco.iodice@arm.com> | 2021-04-13 15:53:20 +0100 |
---|---|---|
committer | Gian Marco Iodice <gianmarco.iodice@arm.com> | 2021-04-13 16:02:42 +0000 |
commit | 598e3a8f32c68129958b9f6a40c684842f708f8a (patch) | |
tree | 9affb9fcf1bc2b58450596d37a1f30c227180e52 /src/core/CL/cl_kernels/winograd_output_transform.cl | |
parent | f4edddb8968c6e424333a066cf6fbbf9c1426f13 (diff) | |
download | ComputeLibrary-598e3a8f32c68129958b9f6a40c684842f708f8a.tar.gz |
Fix TILE initialization in direct convolution and winograd transforms
- The array initializer for the TILE object cannot always be utilized and so we
do require to manually initialize the TILE with the LOOP_UNROLLING macro
- Resolves COMPMID-4371
Change-Id: I2598354b9fae84c5e3bd11219fffdcdc297215e1
Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5417
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/winograd_output_transform.cl')
-rw-r--r-- | src/core/CL/cl_kernels/winograd_output_transform.cl | 74 |
1 files changed, 55 insertions, 19 deletions
diff --git a/src/core/CL/cl_kernels/winograd_output_transform.cl b/src/core/CL/cl_kernels/winograd_output_transform.cl index f257825967..837e43419a 100644 --- a/src/core/CL/cl_kernels/winograd_output_transform.cl +++ b/src/core/CL/cl_kernels/winograd_output_transform.cl @@ -243,6 +243,12 @@ __kernel void winograd_output_transform_2x2_7x7_nhwc( src_indirect_y[i].v += bout * (int)(_ISRC_HEIGHT * 8); } + // Initialize the input tile + LOOP_UNROLLING(int, i, 0, 8, 1) + { + in[i].v = 0; + } + // Load the values across the 8 channels to compose the 8x1 tile T_LOAD_INDIRECT(DATA_TYPE, 8, N0, BUFFER, src, cout, src_stride_y, src_indirect_y, in); @@ -294,6 +300,12 @@ __kernel void winograd_output_transform_2x2_7x7_nhwc( src_indirect_y[i].v += bout * (int)(_ISRC_HEIGHT * 64); } + // Initialize the input tile + LOOP_UNROLLING(int, i, 0, 64, 1) + { + in[i].v = 0; + } + // Load the values across the 64 channels to compose the 8x8 tile T_LOAD_INDIRECT(DATA_TYPE, 64, N0, BUFFER, src, cout, src_stride_y, src_indirect_y, in); @@ -615,9 +627,9 @@ __kernel void winograd_output_transform_4x4_3x3_nhwc( #if defined(WINOGRAD_OUTPUT_TRANSFORM_HORIZONTAL) || defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL) - TILE(DATA_TYPE, 6, N0, in) = { { { 0 } } }; - TILE(DATA_TYPE, 4, N0, out) = { { { 0 } } }; - TILE(uint, 6, 1, src_indirect_y) = { { { 0 } } }; + TILE(DATA_TYPE, 6, N0, in); + TILE(DATA_TYPE, 4, N0, out); + TILE(uint, 6, 1, src_indirect_y); LOOP_UNROLLING(int, i, 0, 6, 1) { @@ -625,6 +637,12 @@ __kernel void winograd_output_transform_4x4_3x3_nhwc( src_indirect_y[i].v += bout * (int)(SRC_HEIGHT * 6); } + // Initialize the input tile + LOOP_UNROLLING(int, i, 0, 6, 1) + { + in[i].v = 0; + } + // Load the values across the 36 channels to compose the 6x6 or 6x1 tile T_LOAD_INDIRECT(DATA_TYPE, 6, N0, BUFFER, src, cout, src_stride_y, src_indirect_y, in); @@ -648,7 +666,7 @@ __kernel void winograd_output_transform_4x4_3x3_nhwc( T_ACTIVATION(DATA_TYPE, 4, N0, ACTIVATION_TYPE, A_VAL, B_VAL, out, out); - TILE(uint, 4, 1, dst_indirect_y) = { { { 0 } } }; + TILE(uint, 4, 1, dst_indirect_y); // Calculate the destination indirect Y #if defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL) @@ -673,9 +691,9 @@ __kernel void winograd_output_transform_4x4_3x3_nhwc( #else // defined(WINOGRAD_OUTPUT_TRANSFORM_HORIZONTAL) || defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL) // Calculate the indirect Y for the source tensor - TILE(DATA_TYPE, 36, N0, in) = { { { 0 } } }; - TILE(DATA_TYPE, 4, N0, tmp) = { { { 0 } } }; - TILE(uint, 36, 1, src_indirect_y) = { { { 0 } } }; + TILE(DATA_TYPE, 36, N0, in); + TILE(DATA_TYPE, 4, N0, tmp); + TILE(uint, 36, 1, src_indirect_y); LOOP_UNROLLING(int, i, 0, 36, 1) { @@ -683,6 +701,12 @@ __kernel void winograd_output_transform_4x4_3x3_nhwc( src_indirect_y[i].v += bout * (int)(SRC_HEIGHT * 36); } + // Initialize the input tile + LOOP_UNROLLING(int, i, 0, 36, 1) + { + in[i].v = 0; + } + // Load the values across the 36 channels to compose the 6x6 or 6x1 tile T_LOAD_INDIRECT(DATA_TYPE, 36, N0, BUFFER, src, cout, src_stride_y, src_indirect_y, in); @@ -700,7 +724,7 @@ __kernel void winograd_output_transform_4x4_3x3_nhwc( } // Compute the output tile - TILE(DATA_TYPE, 16, N0, out) = { { { 0 } } }; + TILE(DATA_TYPE, 16, N0, out); LOOP_UNROLLING(int, i, 0, 4, 1) { @@ -729,7 +753,7 @@ __kernel void winograd_output_transform_4x4_3x3_nhwc( T_ACTIVATION(DATA_TYPE, 16, N0, ACTIVATION_TYPE, A_VAL, B_VAL, out, out); - TILE(uint, 16, 1, dst_indirect_y) = { { { 0 } } }; + TILE(uint, 16, 1, dst_indirect_y); // Calculate the destination indirect Y LOOP_UNROLLING(int, yk, 0, 4, 1) @@ -1043,10 +1067,10 @@ __kernel void winograd_output_transform_4x4_5x5_nhwc( const int bout = GET_SPATIAL_IDX(2, 1, 0); // BATCH SIZE IDX #if defined(WINOGRAD_OUTPUT_TRANSFORM_HORIZONTAL) || defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL) - TILE(DATA_TYPE, 8, N0, in) = { { { 0 } } }; - TILE(DATA_TYPE, 4, N0, out) = { { { 0 } } }; - TILE(DATA_TYPE, 4, N0, tmp) = { { { 0 } } }; - TILE(uint, 8, 1, src_indirect_y) = { { { 0 } } }; + TILE(DATA_TYPE, 8, N0, in); + TILE(DATA_TYPE, 4, N0, out); + TILE(DATA_TYPE, 4, N0, tmp); + TILE(uint, 8, 1, src_indirect_y); LOOP_UNROLLING(int, i, 0, 8, 1) { @@ -1054,6 +1078,12 @@ __kernel void winograd_output_transform_4x4_5x5_nhwc( src_indirect_y[i].v += bout * (int)(SRC_HEIGHT * 8); } + // Initialize the input tile + LOOP_UNROLLING(int, i, 0, 8, 1) + { + in[i].v = 0; + } + // "in" contains 1x8 or 8x1 tile here T_LOAD_INDIRECT(DATA_TYPE, 8, N0, BUFFER, src, cout, src_stride_y, src_indirect_y, in); @@ -1081,7 +1111,7 @@ __kernel void winograd_output_transform_4x4_5x5_nhwc( T_ACTIVATION(DATA_TYPE, 4, N0, ACTIVATION_TYPE, A_VAL, B_VAL, out, out); - TILE(uint, 4, 1, dst_indirect_y) = { { { 0 } } }; + TILE(uint, 4, 1, dst_indirect_y); // Calculate the destination indirect Y #if defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL) @@ -1105,9 +1135,9 @@ __kernel void winograd_output_transform_4x4_5x5_nhwc( #else // defined(WINOGRAD_OUTPUT_TRANSFORM_HORIZONTAL) || defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL) // Calculate the indirect Y for the source tensor - TILE(DATA_TYPE, 64, N0, in) = { { { 0 } } }; - TILE(DATA_TYPE, 6, N0, tmp) = { { { 0 } } }; - TILE(uint, 64, 1, src_indirect_y) = { { { 0 } } }; + TILE(DATA_TYPE, 64, N0, in); + TILE(DATA_TYPE, 6, N0, tmp); + TILE(uint, 64, 1, src_indirect_y); LOOP_UNROLLING(int, i, 0, 64, 1) { @@ -1115,6 +1145,12 @@ __kernel void winograd_output_transform_4x4_5x5_nhwc( src_indirect_y[i].v += bout * (int)(SRC_HEIGHT * 64); } + // Initialize the input tile + LOOP_UNROLLING(int, i, 0, 64, 1) + { + in[i].v = 0; + } + // "in" here is 8x8 tile T_LOAD_INDIRECT(DATA_TYPE, 64, N0, BUFFER, src, cout, src_stride_y, src_indirect_y, in); @@ -1138,7 +1174,7 @@ __kernel void winograd_output_transform_4x4_5x5_nhwc( } // Compute the output tile - TILE(DATA_TYPE, 16, N0, out) = { { { 0 } } }; + TILE(DATA_TYPE, 16, N0, out); // in * A, with in = A^T * in as above LOOP_UNROLLING(int, i, 0, 4, 1) @@ -1173,7 +1209,7 @@ __kernel void winograd_output_transform_4x4_5x5_nhwc( T_ACTIVATION(DATA_TYPE, 16, N0, ACTIVATION_TYPE, A_VAL, B_VAL, out, out); - TILE(uint, 16, 1, dst_indirect_y) = { { { 0 } } }; + TILE(uint, 16, 1, dst_indirect_y); // Calculate the destination indirect Y LOOP_UNROLLING(int, yk, 0, 4, 1) |