aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2019-07-12 14:42:29 +0100
committerMichele Di Giorgio <michele.digiorgio@arm.com>2019-07-12 16:14:26 +0000
commit51146c5006290541f029c534ed6a07cb8f579b21 (patch)
tree05504a102321f17f347a659b25ad7ba7ed0ac93d
parent4c7585178385241f87288b7903d760d4b4822c6e (diff)
downloadComputeLibrary-51146c5006290541f029c534ed6a07cb8f579b21.tar.gz
COMPMID-2468: (Nightly) Bug in CL QSYMM16
Change-Id: I08001e878520485d7281e5fcc60ea81686992961 Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com> Reviewed-on: https://review.mlplatform.org/c/1534 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
-rw-r--r--src/core/CL/cl_kernels/gemmlowp.cl10
1 files changed, 5 insertions, 5 deletions
diff --git a/src/core/CL/cl_kernels/gemmlowp.cl b/src/core/CL/cl_kernels/gemmlowp.cl
index 4b869554c5..d6494fe380 100644
--- a/src/core/CL/cl_kernels/gemmlowp.cl
+++ b/src/core/CL/cl_kernels/gemmlowp.cl
@@ -2863,7 +2863,7 @@ __kernel void gemmlowp_output_stage_quantize_down_fixedpoint(TENSOR3D_DECLARATIO
#if defined(RESULT_FIXEDPOINT_MULTIPLIER) && defined(RESULT_SHIFT)
-/** This OpenCL kernel is used to quantize down the int32 accumulator values of GEMMLowp to QASYMM16
+/** This OpenCL kernel is used to quantize down the int32 accumulator values of GEMMLowp to QSYMM16
*
* This kernel takes a final int32 accumulator value (the output of @ref CLGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QSYMM16 value.
* The following computations will be performed by the kernel:
@@ -2913,15 +2913,15 @@ __kernel void gemmlowp_output_stage_quantize_down_fixedpoint_qsymm16(TENSOR3D_DE
int y = get_global_id(1);
int z = get_global_id(2);
- __global short *src_addr = src_ptr + src_offset_first_element_in_bytes + x * sizeof(int) + y * src_stride_y + z * src_stride_z;
+ __global uchar *src_addr = src_ptr + src_offset_first_element_in_bytes + x * sizeof(int) + y * src_stride_y + z * src_stride_z;
- __global short *dst_addr = dst_ptr + dst_offset_first_element_in_bytes + x * 2 + y * dst_stride_y + z * dst_stride_z;
+ __global uchar *dst_addr = dst_ptr + dst_offset_first_element_in_bytes + x * 2 + y * dst_stride_y + z * dst_stride_z;
int4 input_values = vload4(0, (__global int *)src_addr);
#if defined(ADD_BIAS)
// Add bias
- __global short *bias_addr = biases_ptr + biases_offset_first_element_in_bytes + x * sizeof(int);
+ __global uchar *bias_addr = biases_ptr + biases_offset_first_element_in_bytes + x * sizeof(int);
int4 biases_values = vload4(0, (__global int *)bias_addr);
input_values += (int4)biases_values;
@@ -2940,7 +2940,7 @@ __kernel void gemmlowp_output_stage_quantize_down_fixedpoint_qsymm16(TENSOR3D_DE
#endif // defined(MAX_BOUND)
// Store the result
- vstore4(res, 0, dst_addr);
+ vstore4(res, 0, (__global short *)dst_addr);
}
#endif // defined(RESULT_FIXEDPOINT_MULTIPLIER) && defined(RESULT_SHIFT)