aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/direct_convolution_quantized.cl
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2020-03-02 09:49:29 +0000
committerGiorgio Arena <giorgio.arena@arm.com>2020-03-02 13:09:58 +0000
commit3c4bf0c4eab5ead756c472f17ddf008b882cc905 (patch)
treef08887347422cf6284d20a7b280c9bc8718b009d /src/core/CL/cl_kernels/direct_convolution_quantized.cl
parent2dc7e407141a1b213a31b9fa78a958d6652d4889 (diff)
downloadComputeLibrary-3c4bf0c4eab5ead756c472f17ddf008b882cc905.tar.gz
COMPMID-3234 CLDirectConvolutionLayer QASYMM8 NHWC mismatches
Change-Id: Ic29d20d77fe0a77c28a635132a69a2609a3dcc1a Signed-off-by: Giorgio Arena <giorgio.arena@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2815 Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/direct_convolution_quantized.cl')
-rw-r--r--src/core/CL/cl_kernels/direct_convolution_quantized.cl24
1 files changed, 12 insertions, 12 deletions
diff --git a/src/core/CL/cl_kernels/direct_convolution_quantized.cl b/src/core/CL/cl_kernels/direct_convolution_quantized.cl
index 3324e9caeb..e48c26e702 100644
--- a/src/core/CL/cl_kernels/direct_convolution_quantized.cl
+++ b/src/core/CL/cl_kernels/direct_convolution_quantized.cl
@@ -321,18 +321,18 @@ __kernel void direct_convolution_quantized(
int8 values0 = 0;
+ const int id0 = get_global_id(0);
const int y_coord = (get_global_id(2) * STRIDE_Y) - PAD_TOP;
__global DATA_TYPE *weights_addr = (__global DATA_TYPE *)tensor3D_offset(&weights, 0, 0, 0);
- __global DATA_TYPE *src_addr = (__global DATA_TYPE *)offset(&src, 0, 0) - src_stride_x * get_global_id(0) + y_coord * (int)src_stride_z;
+ __global DATA_TYPE *src_addr = (__global DATA_TYPE *)offset(&src, 0, 0) - src_stride_x * id0 + y_coord * (int)src_stride_z;
- const int kernel_index = get_global_id(2);
- weights_addr += kernel_index * weights_stride_w;
+ weights_addr += id0 * weights_stride_w;
for(volatile int d = 0; d < WEIGHTS_DEPTH; ++d)
{
#if KERNEL_SIZE == 5
-#if(PAD_TOP == 1)
+#if(PAD_TOP == 1) || (PAD_BOTTM == 1)
if(y_coord < 0) // special case Z = -1 doesn't exists
{
CONVOLUTION1x5(values0, (__global DATA_TYPE *)(src_addr + 1 * src_stride_z), (__global DATA_TYPE *)(weights_addr + 1 * weights_stride_z));
@@ -355,7 +355,7 @@ __kernel void direct_convolution_quantized(
CONVOLUTION1x5(values0, (__global DATA_TYPE *)(src_addr + 3 * src_stride_z), (__global DATA_TYPE *)(weights_addr + 3 * weights_stride_z));
CONVOLUTION1x5(values0, (__global DATA_TYPE *)(src_addr + 4 * src_stride_z), (__global DATA_TYPE *)(weights_addr + 4 * weights_stride_z));
}
-#elif(PAD_TOP == 2)
+#elif(PAD_TOP == 2) || (PAD_BOTTM == 2)
if(y_coord < -1)
{
CONVOLUTION1x5(values0, (__global DATA_TYPE *)(src_addr + 2 * src_stride_z), (__global DATA_TYPE *)(weights_addr + 2 * weights_stride_z));
@@ -390,22 +390,22 @@ __kernel void direct_convolution_quantized(
CONVOLUTION1x5(values0, (__global DATA_TYPE *)(src_addr + 3 * src_stride_z), (__global DATA_TYPE *)(weights_addr + 3 * weights_stride_z));
CONVOLUTION1x5(values0, (__global DATA_TYPE *)(src_addr + 4 * src_stride_z), (__global DATA_TYPE *)(weights_addr + 4 * weights_stride_z));
}
-#else /* PAD_TOP == 2 */
+#else /* PAD_TOP == 2 || || PAD_BOTTM == 2 */
CONVOLUTION1x5(values0, (__global DATA_TYPE *)(src_addr + 0 * src_stride_z), (__global DATA_TYPE *)(weights_addr + 0 * weights_stride_z));
CONVOLUTION1x5(values0, (__global DATA_TYPE *)(src_addr + 1 * src_stride_z), (__global DATA_TYPE *)(weights_addr + 1 * weights_stride_z));
CONVOLUTION1x5(values0, (__global DATA_TYPE *)(src_addr + 2 * src_stride_z), (__global DATA_TYPE *)(weights_addr + 2 * weights_stride_z));
CONVOLUTION1x5(values0, (__global DATA_TYPE *)(src_addr + 3 * src_stride_z), (__global DATA_TYPE *)(weights_addr + 3 * weights_stride_z));
CONVOLUTION1x5(values0, (__global DATA_TYPE *)(src_addr + 4 * src_stride_z), (__global DATA_TYPE *)(weights_addr + 4 * weights_stride_z));
-#endif /* PAD_TOP == 1 */
+#endif /* PAD_TOP == 1 || || PAD_BOTTM == 1 */
#elif KERNEL_SIZE == 3
-#if PAD_TOP > 0
+#if(PAD_TOP > 0) || (PAD_BOTTOM > 0)
if(y_coord < 0) // special case Z = -1 doesn't exists
{
//skip first row and load the two next ones
CONVOLUTION1x3(values0, (__global DATA_TYPE *)(src_addr + 1 * src_stride_z), (__global DATA_TYPE *)(weights_addr + 1 * weights_stride_z));
CONVOLUTION1x3(values0, (__global DATA_TYPE *)(src_addr + 2 * src_stride_z), (__global DATA_TYPE *)(weights_addr + 2 * weights_stride_z));
}
- else if(y_coord == (SRC_HEIGHT - PAD_TOP - 1))
+ else if(y_coord == (SRC_HEIGHT - PAD_BOTTOM - 1))
{
// special case when computing the last row of the output we must read the last three rows from the input buffer (including padding) but the
// Z axis has no padding at all.
@@ -418,11 +418,11 @@ __kernel void direct_convolution_quantized(
CONVOLUTION1x3(values0, (__global DATA_TYPE *)(src_addr + 1 * src_stride_z), (__global DATA_TYPE *)(weights_addr + 1 * weights_stride_z));
CONVOLUTION1x3(values0, (__global DATA_TYPE *)(src_addr + 2 * src_stride_z), (__global DATA_TYPE *)(weights_addr + 2 * weights_stride_z));
}
-#else // PAD_TOP > 0
+#else // PAD_TOP > 0 || PAD_BOTTOM > 0
CONVOLUTION1x3(values0, (__global DATA_TYPE *)(src_addr + 0 * src_stride_z), (__global DATA_TYPE *)(weights_addr + 0 * weights_stride_z));
CONVOLUTION1x3(values0, (__global DATA_TYPE *)(src_addr + 1 * src_stride_z), (__global DATA_TYPE *)(weights_addr + 1 * weights_stride_z));
CONVOLUTION1x3(values0, (__global DATA_TYPE *)(src_addr + 2 * src_stride_z), (__global DATA_TYPE *)(weights_addr + 2 * weights_stride_z));
-#endif // PAD_TOP > 0
+#endif // PAD_TOP > 0 || PAD_BOTTOM > 0
#elif KERNEL_SIZE == 1
int weight = convert_int(*(__global DATA_TYPE *)weights_addr);
int8 input_value = convert_int8(INPUT_VALUE((__global DATA_TYPE *)src_addr, src_stride_y));
@@ -435,7 +435,7 @@ __kernel void direct_convolution_quantized(
#ifdef HAS_BIAS
Vector biases = CONVERT_TO_VECTOR_STRUCT_NO_STEP(biases);
- __global int *bias_addr = ((__global int *)(vector_offset(&biases, get_global_id(0))));
+ __global int *bias_addr = ((__global int *)(vector_offset(&biases, id0)));
values0 += (int8)(*bias_addr);
#endif /* defined(HAS_BIAS) */