aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/winograd_output_transform.cl
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2018-07-18 11:45:30 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:54 +0000
commitd02eb45b7431d6e7fe9ef32e16475c2f9917e62a (patch)
tree3e9483d3069347e2d40be2a858116b24fa100d77 /src/core/CL/cl_kernels/winograd_output_transform.cl
parentd9cb05779cefabb090dce680ba807f3e8db169e6 (diff)
downloadComputeLibrary-d02eb45b7431d6e7fe9ef32e16475c2f9917e62a.tar.gz
COMPMID-1411 (Nightly) CLWinograd 5x5 mismatches
Change-Id: I1ed50c2593338c9204d4051b3bb7e90158c94681 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/140398 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/winograd_output_transform.cl')
-rw-r--r--src/core/CL/cl_kernels/winograd_output_transform.cl52
1 files changed, 26 insertions, 26 deletions
diff --git a/src/core/CL/cl_kernels/winograd_output_transform.cl b/src/core/CL/cl_kernels/winograd_output_transform.cl
index 7065da1cc5..2228f80e51 100644
--- a/src/core/CL/cl_kernels/winograd_output_transform.cl
+++ b/src/core/CL/cl_kernels/winograd_output_transform.cl
@@ -543,13 +543,13 @@ __kernel void winograd_output_transform_4x4_3x3_nhwc(
*((__global float *)(dst_ptr + offset.s3)) = out03;
#elif defined(WINOGRAD_OUTPUT_TRANSFORM_HORIZONTAL)
// Store the 4x1 output tile
- int offset = dst_offset_first_element_in_bytes + x_out * sizeof(float) + y_out * dst_stride_y + z_out * dst_stride_z;
- int4 mult_y = min(dst_size - offset, 1);
+ int offset = dst_offset_first_element_in_bytes + x_out * sizeof(float) + y_out * dst_stride_y + z_out * dst_stride_z;
+ int mult_y = min(dst_size - offset, 1);
- *((__global float *)(dst_ptr + mult_y.s0 * 0 * dst_stride_y + offset)) = out00;
- *((__global float *)(dst_ptr + mult_y.s0 * 1 * dst_stride_y + offset)) = out01;
- *((__global float *)(dst_ptr + mult_y.s0 * 2 * dst_stride_y + offset)) = out02;
- *((__global float *)(dst_ptr + mult_y.s0 * 3 * dst_stride_y + offset)) = out03;
+ *((__global float *)(dst_ptr + mult_y * 0 * dst_stride_y + offset)) = out00;
+ *((__global float *)(dst_ptr + mult_y * 1 * dst_stride_y + offset)) = out01;
+ *((__global float *)(dst_ptr + mult_y * 2 * dst_stride_y + offset)) = out02;
+ *((__global float *)(dst_ptr + mult_y * 3 * dst_stride_y + offset)) = out03;
#else // defined(WINOGRAD_OUTPUT_TRANSFORM_HORIZONTAL)
// Get output address
int4 offset = (int4)(dst_offset_first_element_in_bytes + x_out * sizeof(float) + y_out * dst_stride_y + z_out * dst_stride_z);
@@ -681,14 +681,14 @@ __kernel void winograd_output_transform_4x4_5x5_nchw(
#endif // defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL)
#else // defined(WINOGRAD_OUTPUT_TRANSFORM_HORIZONTAL) || defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL)
- float d10 = *((__global float *)(src_addr + 8 * src_stride_z));
- float d11 = *((__global float *)(src_addr + 9 * src_stride_z));
- float d12 = *((__global float *)(src_addr + 10 * src_stride_z));
- float d13 = *((__global float *)(src_addr + 11 * src_stride_z));
- float d14 = *((__global float *)(src_addr + 12 * src_stride_z));
- float d15 = *((__global float *)(src_addr + 13 * src_stride_z));
- float d16 = *((__global float *)(src_addr + 14 * src_stride_z));
- float d17 = *((__global float *)(src_addr + 15 * src_stride_z));
+ float d10 = *((__global float *)(src_addr + 8 * src_stride_z));
+ float d11 = *((__global float *)(src_addr + 9 * src_stride_z));
+ float d12 = *((__global float *)(src_addr + 10 * src_stride_z));
+ float d13 = *((__global float *)(src_addr + 11 * src_stride_z));
+ float d14 = *((__global float *)(src_addr + 12 * src_stride_z));
+ float d15 = *((__global float *)(src_addr + 13 * src_stride_z));
+ float d16 = *((__global float *)(src_addr + 14 * src_stride_z));
+ float d17 = *((__global float *)(src_addr + 15 * src_stride_z));
float d20 = *((__global float *)(src_addr + 16 * src_stride_z));
float d21 = *((__global float *)(src_addr + 17 * src_stride_z));
@@ -998,18 +998,18 @@ __kernel void winograd_output_transform_4x4_5x5_nhwc(
*(__global float *)(dst_ptr + mult_y.s0 * 1 * dst_stride_y + offset.s0) = out_col1.s0;
*(__global float *)(dst_ptr + mult_y.s0 * 2 * dst_stride_y + offset.s0) = out_col2.s0;
*(__global float *)(dst_ptr + mult_y.s0 * 3 * dst_stride_y + offset.s0) = out_col3.s0;
- *(__global float *)(dst_ptr + mult_y.s0 * 0 * dst_stride_y + offset.s1) = out_col0.s1;
- *(__global float *)(dst_ptr + mult_y.s0 * 1 * dst_stride_y + offset.s1) = out_col1.s1;
- *(__global float *)(dst_ptr + mult_y.s0 * 2 * dst_stride_y + offset.s1) = out_col2.s1;
- *(__global float *)(dst_ptr + mult_y.s0 * 3 * dst_stride_y + offset.s1) = out_col3.s1;
- *(__global float *)(dst_ptr + mult_y.s0 * 0 * dst_stride_y + offset.s2) = out_col0.s2;
- *(__global float *)(dst_ptr + mult_y.s0 * 1 * dst_stride_y + offset.s2) = out_col1.s2;
- *(__global float *)(dst_ptr + mult_y.s0 * 2 * dst_stride_y + offset.s2) = out_col2.s2;
- *(__global float *)(dst_ptr + mult_y.s0 * 3 * dst_stride_y + offset.s2) = out_col3.s2;
- *(__global float *)(dst_ptr + mult_y.s0 * 0 * dst_stride_y + offset.s3) = out_col0.s3;
- *(__global float *)(dst_ptr + mult_y.s0 * 1 * dst_stride_y + offset.s3) = out_col1.s3;
- *(__global float *)(dst_ptr + mult_y.s0 * 2 * dst_stride_y + offset.s3) = out_col2.s3;
- *(__global float *)(dst_ptr + mult_y.s0 * 3 * dst_stride_y + offset.s3) = out_col3.s3;
+ *(__global float *)(dst_ptr + mult_y.s1 * 0 * dst_stride_y + offset.s1) = out_col0.s1;
+ *(__global float *)(dst_ptr + mult_y.s1 * 1 * dst_stride_y + offset.s1) = out_col1.s1;
+ *(__global float *)(dst_ptr + mult_y.s1 * 2 * dst_stride_y + offset.s1) = out_col2.s1;
+ *(__global float *)(dst_ptr + mult_y.s1 * 3 * dst_stride_y + offset.s1) = out_col3.s1;
+ *(__global float *)(dst_ptr + mult_y.s2 * 0 * dst_stride_y + offset.s2) = out_col0.s2;
+ *(__global float *)(dst_ptr + mult_y.s2 * 1 * dst_stride_y + offset.s2) = out_col1.s2;
+ *(__global float *)(dst_ptr + mult_y.s2 * 2 * dst_stride_y + offset.s2) = out_col2.s2;
+ *(__global float *)(dst_ptr + mult_y.s2 * 3 * dst_stride_y + offset.s2) = out_col3.s2;
+ *(__global float *)(dst_ptr + mult_y.s3 * 0 * dst_stride_y + offset.s3) = out_col0.s3;
+ *(__global float *)(dst_ptr + mult_y.s3 * 1 * dst_stride_y + offset.s3) = out_col1.s3;
+ *(__global float *)(dst_ptr + mult_y.s3 * 2 * dst_stride_y + offset.s3) = out_col2.s3;
+ *(__global float *)(dst_ptr + mult_y.s3 * 3 * dst_stride_y + offset.s3) = out_col3.s3;
#endif // defined(WINOGRAD_OUTPUT_TRANSFORM_HORIZONTAL) || defined(WINOGRAD_OUTPUT_TRANSFORM_VERTICAL)
}