aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/gemmlowp.cl
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2020-10-27 12:44:17 +0000
committerMichele Di Giorgio <michele.digiorgio@arm.com>2020-10-29 17:05:56 +0000
commit27d92fd5da6ad16c9e3b38d82402a86cf7b208aa (patch)
tree61438af5d104a55bbf4a90735ad430f99c73e45c /src/core/CL/cl_kernels/gemmlowp.cl
parent3673839cde43cc82c186a196c7e1ce3155457b0e (diff)
downloadComputeLibrary-27d92fd5da6ad16c9e3b38d82402a86cf7b208aa.tar.gz
COMPMID-3928: Fix output conversion in gemmlowp_mm_native
This patch solves the following issues that arose from nightly tests: - The accumulated result of gemmlowp_mm_native can be either uint or int and in order to be stored in memory we need to convert it to int. - The RHS matrix still needs padding on the X dimension. Hence, revert few changes to add the necessary padding elements. - Replace zero padding validation tests with assertion in the configure method of the kernel. Change-Id: Ib6614a91bd0e98f2b850f52eef14d4fbf55517c8 Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4259 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/gemmlowp.cl')
-rw-r--r--src/core/CL/cl_kernels/gemmlowp.cl5
1 files changed, 3 insertions, 2 deletions
diff --git a/src/core/CL/cl_kernels/gemmlowp.cl b/src/core/CL/cl_kernels/gemmlowp.cl
index 059c2e14df..bde7dd016f 100644
--- a/src/core/CL/cl_kernels/gemmlowp.cl
+++ b/src/core/CL/cl_kernels/gemmlowp.cl
@@ -1100,8 +1100,9 @@ __kernel void gemmlowp_mm_native(IMAGE_DECLARATION(lhs),
const bool cond_y = y == 0;
const bool cond_x = ((x + 1) * N0 >= N);
- // Store output block
- STORE_BLOCK_BOUNDARY_AWARE(M0, N0, int, c, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x);
+ // Convert and store output block
+ REPEAT_VAR_INIT_CONVERT(M0, VEC_DATA_TYPE(int, N0), c, res); // resN = CONVERT(cN, VEC_DATA_TYPE(int, N0));
+ STORE_BLOCK_BOUNDARY_AWARE(M0, N0, int, res, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x);
}
#endif // defined(M0) && defined(N0) && defined(K0) && defined(K) && defined(PARTIAL_STORE_M0) && defined(PARTIAL_STORE_N0)