diff options
author | Gian Marco Iodice <gianmarco.iodice@arm.com> | 2019-03-15 10:13:05 +0000 |
---|---|---|
committer | Gian Marco Iodice <gianmarco.iodice@arm.com> | 2019-03-20 11:21:46 +0000 |
commit | b0c5037d94ba7073ccabb0ebaff54db320f184c4 (patch) | |
tree | 126f2332df60b6eff1e630b2585b2bd407501a20 /src/core/CL/ICLKernel.cpp | |
parent | 5ed7b5bc98feb848874730c9bb9c30759e58d453 (diff) | |
download | ComputeLibrary-b0c5037d94ba7073ccabb0ebaff54db320f184c4.tar.gz |
COMPMID-2043: Add support for "dummy threads" in CLGEMMReshaped
Change-Id: I89403b97503fbb99f6a32f5d62b8c535ab26a7be
Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-on: https://review.mlplatform.org/c/877
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/ICLKernel.cpp')
-rw-r--r-- | src/core/CL/ICLKernel.cpp | 11 |
1 files changed, 9 insertions, 2 deletions
diff --git a/src/core/CL/ICLKernel.cpp b/src/core/CL/ICLKernel.cpp index 995fcb481b..2d28a496c9 100644 --- a/src/core/CL/ICLKernel.cpp +++ b/src/core/CL/ICLKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -36,7 +36,7 @@ using namespace arm_compute; -void arm_compute::enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint) +void arm_compute::enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint, bool use_dummy_work_items) { if(kernel.kernel()() == nullptr) { @@ -58,6 +58,13 @@ void arm_compute::enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Wind return; } + // Use dummy work-items + if(use_dummy_work_items) + { + gws.get()[0] = get_next_power_two(gws[0]); + gws.get()[1] = get_next_power_two(gws[1]); + } + cl::NDRange valid_lws; if(lws_hint[0] * lws_hint[1] * lws_hint[2] > kernel.get_max_workgroup_size()) { |