diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2018-09-21 16:33:15 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:55:19 +0000 |
commit | 932491f44d51940d82514417a82e43cb11b06bd4 (patch) | |
tree | cbd5a422dcc0650c0163c7db93792c42e40e3886 /src/core/CL/cl_kernels/gemmlowp.cl | |
parent | eb027e933758b1e749f0f6bd2817ee8979ef903c (diff) | |
download | ComputeLibrary-932491f44d51940d82514417a82e43cb11b06bd4.tar.gz |
COMPMID-1519: Add support for 3D input/output in CLGEMMLowpOutputStage
Change-Id: I637add70310d2da4d82b236a6352af9d33be17a1
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/149706
Reviewed-by: Isabella Gottardi <isabella.gottardi@arm.com>
Reviewed-by: Michele DiGiorgio <michele.digiorgio@arm.com>
Tested-by: bsgcomp <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/gemmlowp.cl')
-rw-r--r-- | src/core/CL/cl_kernels/gemmlowp.cl | 12 |
1 files changed, 12 insertions, 0 deletions
diff --git a/src/core/CL/cl_kernels/gemmlowp.cl b/src/core/CL/cl_kernels/gemmlowp.cl index e52f1ea486..e8124e7aa8 100644 --- a/src/core/CL/cl_kernels/gemmlowp.cl +++ b/src/core/CL/cl_kernels/gemmlowp.cl @@ -2222,17 +2222,29 @@ __kernel void gemmlowp_output_stage_quantize_down(TENSOR3D_DECLARATION(src), * @param[in] dst_step_y dst_gx_stride_y * number of elements along Y processed per workitem(in bytes) * @param[in] dst_stride_z Stride of the source tensor in Z dimension (in bytes) * @param[in] dst_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_stride_w Stride of the source tensor in W dimension (in bytes) + * @param[in] dst_step_w src_stride_w * number of elements along W processed per workitem(in bytes) * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor */ __kernel void gemmlowp_output_stage_quantize_down_fixedpoint(TENSOR3D_DECLARATION(src), #if defined(ADD_BIAS) VECTOR_DECLARATION(biases), #endif // defined(ADD_BIAS) +#if defined(DST_HEIGHT) + TENSOR4D_DECLARATION(dst)) +#else // defined(DST_HEIGHT) TENSOR3D_DECLARATION(dst)) +#endif // defined(DST_HEIGHT) { // Compute source and destination addresses Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src); +#if defined(DST_HEIGHT) + Tensor4D dst = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(dst, 1); + dst.ptr += get_global_id(0) * dst_step_x + (get_global_id(1) % DST_HEIGHT) * dst_step_y + (get_global_id(1) / DST_HEIGHT) * dst_step_z + get_global_id(2) * dst_step_w; +#else // defined(DST_HEIGHT) Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst); +#endif // defined(DST_HEIGHT) + #if defined(ADD_BIAS) Vector biases = CONVERT_TO_VECTOR_STRUCT(biases); #endif // defined(ADD_BIAS) |