From 51146c5006290541f029c534ed6a07cb8f579b21 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Fri, 12 Jul 2019 14:42:29 +0100 Subject: COMPMID-2468: (Nightly) Bug in CL QSYMM16 Change-Id: I08001e878520485d7281e5fcc60ea81686992961 Signed-off-by: Michalis Spyrou Reviewed-on: https://review.mlplatform.org/c/1534 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio --- src/core/CL/cl_kernels/gemmlowp.cl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/core/CL/cl_kernels/gemmlowp.cl b/src/core/CL/cl_kernels/gemmlowp.cl index 4b869554c5..d6494fe380 100644 --- a/src/core/CL/cl_kernels/gemmlowp.cl +++ b/src/core/CL/cl_kernels/gemmlowp.cl @@ -2863,7 +2863,7 @@ __kernel void gemmlowp_output_stage_quantize_down_fixedpoint(TENSOR3D_DECLARATIO #if defined(RESULT_FIXEDPOINT_MULTIPLIER) && defined(RESULT_SHIFT) -/** This OpenCL kernel is used to quantize down the int32 accumulator values of GEMMLowp to QASYMM16 +/** This OpenCL kernel is used to quantize down the int32 accumulator values of GEMMLowp to QSYMM16 * * This kernel takes a final int32 accumulator value (the output of @ref CLGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QSYMM16 value. * The following computations will be performed by the kernel: @@ -2913,15 +2913,15 @@ __kernel void gemmlowp_output_stage_quantize_down_fixedpoint_qsymm16(TENSOR3D_DE int y = get_global_id(1); int z = get_global_id(2); - __global short *src_addr = src_ptr + src_offset_first_element_in_bytes + x * sizeof(int) + y * src_stride_y + z * src_stride_z; + __global uchar *src_addr = src_ptr + src_offset_first_element_in_bytes + x * sizeof(int) + y * src_stride_y + z * src_stride_z; - __global short *dst_addr = dst_ptr + dst_offset_first_element_in_bytes + x * 2 + y * dst_stride_y + z * dst_stride_z; + __global uchar *dst_addr = dst_ptr + dst_offset_first_element_in_bytes + x * 2 + y * dst_stride_y + z * dst_stride_z; int4 input_values = vload4(0, (__global int *)src_addr); #if defined(ADD_BIAS) // Add bias - __global short *bias_addr = biases_ptr + biases_offset_first_element_in_bytes + x * sizeof(int); + __global uchar *bias_addr = biases_ptr + biases_offset_first_element_in_bytes + x * sizeof(int); int4 biases_values = vload4(0, (__global int *)bias_addr); input_values += (int4)biases_values; @@ -2940,7 +2940,7 @@ __kernel void gemmlowp_output_stage_quantize_down_fixedpoint_qsymm16(TENSOR3D_DE #endif // defined(MAX_BOUND) // Store the result - vstore4(res, 0, dst_addr); + vstore4(res, 0, (__global short *)dst_addr); } #endif // defined(RESULT_FIXEDPOINT_MULTIPLIER) && defined(RESULT_SHIFT) -- cgit v1.2.1