From 275f99cb09606191c5589952d57175be655de74a Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Fri, 23 Aug 2019 12:44:11 +0100 Subject: COMPMID-2451: Use kernel lws_hint() on enqueue Avoid querying device's default lws on kernel enqueue as this is already cached in the kernel during configuration. Change-Id: Ia26ecb712caeb8f042356815e0cfd23522764d27 Signed-off-by: Georgios Pinitas Reviewed-on: https://review.mlplatform.org/c/1803 Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp | 4 ++-- src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp | 2 +- src/core/CL/kernels/CLBitwiseAndKernel.cpp | 4 ++-- src/core/CL/kernels/CLBitwiseOrKernel.cpp | 4 ++-- src/core/CL/kernels/CLBitwiseXorKernel.cpp | 4 ++-- src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp | 2 +- src/core/CL/kernels/CLChannelCombineKernel.cpp | 2 +- src/core/CL/kernels/CLChannelExtractKernel.cpp | 4 ++-- src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp | 4 ++-- src/core/CL/kernels/CLCopyKernel.cpp | 4 ++-- src/core/CL/kernels/CLCropKernel.cpp | 2 +- src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp | 4 ++-- src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp | 2 +- src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp | 2 +- .../CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel.cpp | 2 +- .../CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp | 2 +- src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp | 2 +- src/core/CL/kernels/CLDequantizationLayerKernel.cpp | 2 +- src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp | 2 +- src/core/CL/kernels/CLFillBorderKernel.cpp | 2 +- src/core/CL/kernels/CLFloorKernel.cpp | 4 ++-- src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp | 2 +- ...GEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp | 2 +- ...GEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp | 4 ++-- .../CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.cpp | 4 ++-- .../kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp | 4 ++-- src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp | 2 +- src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp | 2 +- src/core/CL/kernels/CLGatherKernel.cpp | 2 +- src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp | 2 +- src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp | 2 +- src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp | 6 +++--- src/core/CL/kernels/CLLKTrackerKernel.cpp | 10 +++++----- src/core/CL/kernels/CLMeanStdDevKernel.cpp | 4 ++-- src/core/CL/kernels/CLMemsetKernel.cpp | 2 +- src/core/CL/kernels/CLMinMaxLayerKernel.cpp | 2 +- src/core/CL/kernels/CLMinMaxLocationKernel.cpp | 6 +++--- src/core/CL/kernels/CLPadLayerKernel.cpp | 2 +- src/core/CL/kernels/CLPermuteKernel.cpp | 2 +- src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp | 4 ++-- src/core/CL/kernels/CLPriorBoxLayerKernel.cpp | 4 ++-- src/core/CL/kernels/CLQuantizationLayerKernel.cpp | 2 +- src/core/CL/kernels/CLROIAlignLayerKernel.cpp | 2 +- src/core/CL/kernels/CLROIPoolingLayerKernel.cpp | 2 +- src/core/CL/kernels/CLReductionOperationKernel.cpp | 8 ++++---- src/core/CL/kernels/CLRemapKernel.cpp | 4 ++-- src/core/CL/kernels/CLReshapeLayerKernel.cpp | 4 ++-- src/core/CL/kernels/CLScharr3x3Kernel.cpp | 2 +- src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp | 2 +- src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp | 2 +- src/core/CL/kernels/CLStackLayerKernel.cpp | 2 +- src/core/CL/kernels/CLTileKernel.cpp | 4 ++-- src/core/CL/kernels/CLUpsampleLayerKernel.cpp | 2 +- src/core/CL/kernels/CLWeightsReshapeKernel.cpp | 2 +- src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp | 2 +- src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp | 4 ++-- 56 files changed, 86 insertions(+), 86 deletions(-) (limited to 'src') diff --git a/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp b/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp index 0c1206adfb..557046e831 100644 --- a/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp +++ b/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -96,7 +96,7 @@ void CLAbsoluteDifferenceKernel::run(const Window &window, cl::CommandQueue &que add_2D_tensor_argument(idx, _input1, slice); add_2D_tensor_argument(idx, _input2, slice); add_2D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_2D(slice)); } diff --git a/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp b/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp index aa06d3af8c..075a7b1b3e 100644 --- a/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp +++ b/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp @@ -173,7 +173,7 @@ void CLBatchToSpaceLayerKernel::run(const Window &window, cl::CommandQueue &queu add_1D_tensor_argument(idx, _block_shape, vector_slice); } add_4D_tensor_argument(idx, _output, slice_out); - enqueue(queue, *this, slice_in); + enqueue(queue, *this, slice_in, lws_hint()); ++batch_id; } diff --git a/src/core/CL/kernels/CLBitwiseAndKernel.cpp b/src/core/CL/kernels/CLBitwiseAndKernel.cpp index dd301cd02e..2d05f2e2bd 100644 --- a/src/core/CL/kernels/CLBitwiseAndKernel.cpp +++ b/src/core/CL/kernels/CLBitwiseAndKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -82,7 +82,7 @@ void CLBitwiseAndKernel::run(const Window &window, cl::CommandQueue &queue) add_2D_tensor_argument(idx, _input1, slice); add_2D_tensor_argument(idx, _input2, slice); add_2D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_2D(slice)); } diff --git a/src/core/CL/kernels/CLBitwiseOrKernel.cpp b/src/core/CL/kernels/CLBitwiseOrKernel.cpp index aa84618258..b3efab8b1f 100644 --- a/src/core/CL/kernels/CLBitwiseOrKernel.cpp +++ b/src/core/CL/kernels/CLBitwiseOrKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -83,7 +83,7 @@ void CLBitwiseOrKernel::run(const Window &window, cl::CommandQueue &queue) add_2D_tensor_argument(idx, _input1, slice); add_2D_tensor_argument(idx, _input2, slice); add_2D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_2D(slice)); } diff --git a/src/core/CL/kernels/CLBitwiseXorKernel.cpp b/src/core/CL/kernels/CLBitwiseXorKernel.cpp index ad1f923253..d8ac486d0f 100644 --- a/src/core/CL/kernels/CLBitwiseXorKernel.cpp +++ b/src/core/CL/kernels/CLBitwiseXorKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -83,7 +83,7 @@ void CLBitwiseXorKernel::run(const Window &window, cl::CommandQueue &queue) add_2D_tensor_argument(idx, _input1, slice); add_2D_tensor_argument(idx, _input2, slice); add_2D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_2D(slice)); } diff --git a/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp b/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp index 7c30a942db..045bd02d73 100644 --- a/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp +++ b/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp @@ -127,6 +127,6 @@ void CLBoundingBoxTransformKernel::run(const Window &window, cl::CommandQueue &q add_2D_tensor_argument(idx, _deltas, slice); // Note that we don't need to loop over the slices, as we are sure that we are dealing with all 2D tensors - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, lws_hint()); } } // namespace arm_compute diff --git a/src/core/CL/kernels/CLChannelCombineKernel.cpp b/src/core/CL/kernels/CLChannelCombineKernel.cpp index 126614a599..d029efe110 100644 --- a/src/core/CL/kernels/CLChannelCombineKernel.cpp +++ b/src/core/CL/kernels/CLChannelCombineKernel.cpp @@ -281,7 +281,7 @@ void CLChannelCombineKernel::run(const Window &window, cl::CommandQueue &queue) _kernel.setArg(idx++, slice.y().end()); } - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_2D(slice)); } diff --git a/src/core/CL/kernels/CLChannelExtractKernel.cpp b/src/core/CL/kernels/CLChannelExtractKernel.cpp index 8bddba837a..d2a0f984da 100644 --- a/src/core/CL/kernels/CLChannelExtractKernel.cpp +++ b/src/core/CL/kernels/CLChannelExtractKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -181,7 +181,7 @@ void CLChannelExtractKernel::run(const Window &window, cl::CommandQueue &queue) unsigned int idx = 0; add_2D_tensor_argument(idx, _input, slice); add_2D_tensor_argument(idx, _output, win_sub); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_2D(slice)); } diff --git a/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp index ace3fd5840..81856769b2 100644 --- a/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp +++ b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -107,6 +107,6 @@ void CLConvertFullyConnectedWeightsKernel::run(const Window &window, cl::Command unsigned int idx = 0; add_2D_tensor_argument(idx, _input, window); add_2D_tensor_argument(idx, _output, window); - enqueue(queue, *this, window); + enqueue(queue, *this, window, lws_hint()); } } // namespace arm_compute diff --git a/src/core/CL/kernels/CLCopyKernel.cpp b/src/core/CL/kernels/CLCopyKernel.cpp index c87768aeb7..c49251ba45 100644 --- a/src/core/CL/kernels/CLCopyKernel.cpp +++ b/src/core/CL/kernels/CLCopyKernel.cpp @@ -259,7 +259,7 @@ void CLCopyKernel::run(const Window &window, cl::CommandQueue &queue) unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice); add_3D_tensor_argument(idx, _output, out_slice); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_3D(slice) && _output_window.slide_window_slice_3D(out_slice)); } @@ -272,7 +272,7 @@ void CLCopyKernel::run(const Window &window, cl::CommandQueue &queue) unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice); add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, lws_hint()); } while(collapsed.slide_window_slice_3D(slice)); } diff --git a/src/core/CL/kernels/CLCropKernel.cpp b/src/core/CL/kernels/CLCropKernel.cpp index f8a2456d4a..90e054b047 100644 --- a/src/core/CL/kernels/CLCropKernel.cpp +++ b/src/core/CL/kernels/CLCropKernel.cpp @@ -127,6 +127,6 @@ void CLCropKernel::run(const Window &window, cl::CommandQueue &queue) add_3D_tensor_argument(idx, _output, window); add_argument(idx, _start.x); add_argument(idx, _start.y); - enqueue(queue, *this, window); + enqueue(queue, *this, window, lws_hint()); } } // namespace arm_compute diff --git a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp index 295fb5c997..50f654680c 100644 --- a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp +++ b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp @@ -129,7 +129,7 @@ void CLDeconvolutionLayerUpsampleKernel::run(const Window &window, cl::CommandQu unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice_in); add_3D_tensor_argument(idx, _output, slice_out); - enqueue(queue, *this, slice_out); + enqueue(queue, *this, slice_out, lws_hint()); } while(collapsed.slide_window_slice_3D(slice_in) && collapsed.slide_window_slice_3D(slice_out)); break; @@ -148,7 +148,7 @@ void CLDeconvolutionLayerUpsampleKernel::run(const Window &window, cl::CommandQu unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice_in); add_3D_tensor_argument(idx, _output, slice_out); - enqueue(queue, *this, slice_out); + enqueue(queue, *this, slice_out, lws_hint()); } while(window.slide_window_slice_3D(slice_in) && window.slide_window_slice_3D(slice_out)); break; diff --git a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp index 2d8de755b3..c985811fbc 100644 --- a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp @@ -147,7 +147,7 @@ void CLDepthConcatenateLayerKernel::run(const Window &window, cl::CommandQueue & unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice); add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_3D(slice)); } diff --git a/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp b/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp index f2d65e813c..49a5590231 100644 --- a/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp +++ b/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp @@ -119,7 +119,7 @@ void CLDepthToSpaceLayerKernel::run(const Window &window, cl::CommandQueue &queu add_3D_tensor_argument(idx, _input, slice_in); add_argument(idx, batch_id); add_4D_tensor_argument(idx, _output, slice_out); - enqueue(queue, *this, slice_in); + enqueue(queue, *this, slice_in, lws_hint()); ++batch_id; } diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel.cpp index b73ccf5ddc..582c600c61 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel.cpp @@ -141,7 +141,7 @@ void CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel::run(const Window &w unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice); add_2D_tensor_argument(idx, _output, slice_out); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_3D(slice) && window.slide_window_slice_2D(slice_out)); } diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp index 6b6438abef..1fd6312295 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp @@ -121,6 +121,6 @@ void CLDepthwiseConvolutionLayerReshapeWeightsKernel::run(const Window &window, unsigned int idx = 0; add_3D_tensor_argument(idx, _input, window); add_2D_tensor_argument(idx, _output, window); - enqueue(queue, *this, window); + enqueue(queue, *this, window, lws_hint()); } } // namespace arm_compute diff --git a/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp b/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp index 2dad72944e..0f029fda74 100644 --- a/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp @@ -119,7 +119,7 @@ void CLDepthwiseVectorToTensorKernel::run(const Window &window, cl::CommandQueue unsigned int idx = 0; add_1D_tensor_argument(idx, _input, slice); add_3D_tensor_argument(idx, _output, slice_out); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_1D(slice) && window.slide_window_slice_3D(slice_out)); } diff --git a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp index 12d36cdb9f..10a2878be7 100644 --- a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp @@ -131,7 +131,7 @@ void CLDequantizationLayerKernel::run(const Window &window, cl::CommandQueue &qu unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice); add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, lws_hint()); } while(window_collapsed.slide_window_slice_3D(slice)); } diff --git a/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp b/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp index 03edcef781..c4ab50411a 100644 --- a/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp +++ b/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp @@ -128,7 +128,7 @@ void CLElementWiseUnaryLayerKernel::run(const Window &window, cl::CommandQueue & unsigned int idx = 0; add_1D_tensor_argument(idx, _input, collapsed); add_1D_tensor_argument(idx, _output, collapsed); - enqueue(queue, *this, collapsed); + enqueue(queue, *this, collapsed, lws_hint()); } while(window.slide_window_slice_1D(collapsed)); } diff --git a/src/core/CL/kernels/CLFillBorderKernel.cpp b/src/core/CL/kernels/CLFillBorderKernel.cpp index c63b3be9af..475a52b3db 100644 --- a/src/core/CL/kernels/CLFillBorderKernel.cpp +++ b/src/core/CL/kernels/CLFillBorderKernel.cpp @@ -180,7 +180,7 @@ void CLFillBorderKernel::run(const Window &window, cl::CommandQueue &queue) { unsigned int idx = 0; add_3D_tensor_argument(idx, _tensor, slice); - enqueue(queue, *this, slice, cl::NullRange); + enqueue(queue, *this, slice, lws_hint()); } while(collapsed.slide_window_slice_3D(slice)); } diff --git a/src/core/CL/kernels/CLFloorKernel.cpp b/src/core/CL/kernels/CLFloorKernel.cpp index 831173d076..7c39d87512 100644 --- a/src/core/CL/kernels/CLFloorKernel.cpp +++ b/src/core/CL/kernels/CLFloorKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -124,7 +124,7 @@ void CLFloorKernel::run(const Window &window, cl::CommandQueue &queue) unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice); add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, lws_hint()); } while(collapsed.slide_window_slice_3D(slice)); } diff --git a/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp b/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp index bf827bf2c2..423e583059 100644 --- a/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp +++ b/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp @@ -209,6 +209,6 @@ void CLFuseBatchNormalizationKernel::run(const arm_compute::Window &window, cl:: { add_1D_tensor_argument(idx, _bn_gamma, slice_1d); } - enqueue(queue, *this, slice_3d); + enqueue(queue, *this, slice_3d, lws_hint()); } } // namespace arm_compute diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp index 557e82dc50..8720123366 100644 --- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp @@ -174,7 +174,7 @@ void CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::run(const Window unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice); add_3D_tensor_argument(idx1, _output, slice); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, lws_hint()); } while(collapsed.slide_window_slice_3D(slice)); } diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp index b7eff0f8ec..7c066381ce 100644 --- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -176,7 +176,7 @@ void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::run(const Window unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice); add_3D_tensor_argument(idx1, _output, slice); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, lws_hint()); } while(collapsed.slide_window_slice_3D(slice)); } diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.cpp index b7730d5060..2967a73866 100644 --- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -173,7 +173,7 @@ void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel::run(const Window &win unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice); add_3D_tensor_argument(idx1, _output, slice); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, lws_hint()); } while(collapsed.slide_window_slice_3D(slice)); } diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp index 621bd2b54b..8175f60275 100644 --- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -170,7 +170,7 @@ void CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel::run(const Window &window, cl unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice); add_3D_tensor_argument(idx1, _output, slice); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, lws_hint()); } while(collapsed.slide_window_slice_3D(slice)); } diff --git a/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp b/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp index 0c0b0ec817..3a59b43823 100644 --- a/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp @@ -214,7 +214,7 @@ void CLGEMMLowpMatrixBReductionKernel::run(const Window &window, cl::CommandQueu unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice_in); add_2D_tensor_argument(idx, _output, slice_out); - enqueue(queue, *this, slice_out); + enqueue(queue, *this, slice_out, lws_hint()); } while(collapsed.slide_window_slice_2D(slice_out)); } diff --git a/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp b/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp index 5b9e68d2bd..2ca4132b15 100644 --- a/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp +++ b/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp @@ -166,7 +166,7 @@ void CLGEMMReshapeRHSMatrixKernel::run(const Window &window, cl::CommandQueue &q unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice); add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_3D(slice)); } \ No newline at end of file diff --git a/src/core/CL/kernels/CLGatherKernel.cpp b/src/core/CL/kernels/CLGatherKernel.cpp index 412821b8bd..4e91cf2cc9 100644 --- a/src/core/CL/kernels/CLGatherKernel.cpp +++ b/src/core/CL/kernels/CLGatherKernel.cpp @@ -131,6 +131,6 @@ void CLGatherKernel::run(const Window &window, cl::CommandQueue &queue) add_4D_tensor_argument(idx, _input, window_collapsed); add_1D_tensor_argument(idx, _indices, window_collapsed); add_4D_tensor_argument(idx, _output, window_collapsed); - enqueue(queue, *this, window_collapsed); + enqueue(queue, *this, window_collapsed, lws_hint()); } } // namespace arm_compute diff --git a/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp b/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp index ab95ddca54..79e364caf7 100644 --- a/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp +++ b/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp @@ -124,6 +124,6 @@ void CLComputeAllAnchorsKernel::run(const Window &window, cl::CommandQueue &queu // Note that we don't need to loop over the slices, as we are launching exactly // as many threads as all the anchors generated - enqueue(queue, *this, collapsed); + enqueue(queue, *this, collapsed, lws_hint()); } } // namespace arm_compute diff --git a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp index 70af541ef1..ea292c0b7b 100644 --- a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp @@ -162,5 +162,5 @@ void CLHeightConcatenateLayerKernel::run(const Window &window, cl::CommandQueue unsigned int idx = 0; add_4D_tensor_argument(idx, _input, window); add_4D_tensor_argument(idx, _output, window); - enqueue(queue, *this, window); + enqueue(queue, *this, window, lws_hint()); } diff --git a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp index 8816138e2e..2ef977bebf 100644 --- a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp +++ b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp @@ -178,7 +178,7 @@ void CLL2NormalizeLayerKernel::run(const Window &window, cl::CommandQueue &queue add_2D_tensor_argument(idx, _input, in_slice); add_2D_tensor_argument(idx, _sum, sum_slice); add_2D_tensor_argument(idx, _output, in_slice); - enqueue(queue, *this, in_slice); + enqueue(queue, *this, in_slice, lws_hint()); } while(window.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(sum_slice)); } @@ -194,7 +194,7 @@ void CLL2NormalizeLayerKernel::run(const Window &window, cl::CommandQueue &queue add_2D_tensor_argument(idx, _input, in_slice); add_2D_tensor_argument(idx, _sum, sum_slice); add_2D_tensor_argument(idx, _output, in_slice); - enqueue(queue, *this, in_slice); + enqueue(queue, *this, in_slice, lws_hint()); } while(window.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(sum_slice)); } @@ -210,7 +210,7 @@ void CLL2NormalizeLayerKernel::run(const Window &window, cl::CommandQueue &queue add_3D_tensor_argument(idx, _input, in_slice); add_3D_tensor_argument(idx, _sum, sum_slice); add_3D_tensor_argument(idx, _output, in_slice); - enqueue(queue, *this, in_slice); + enqueue(queue, *this, in_slice, lws_hint()); } while(window.slide_window_slice_3D(in_slice) && window.slide_window_slice_3D(sum_slice)); } diff --git a/src/core/CL/kernels/CLLKTrackerKernel.cpp b/src/core/CL/kernels/CLLKTrackerKernel.cpp index 40ed630c89..68a210c115 100644 --- a/src/core/CL/kernels/CLLKTrackerKernel.cpp +++ b/src/core/CL/kernels/CLLKTrackerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -83,7 +83,7 @@ void CLLKTrackerInitKernel::run(const Window &window, cl::CommandQueue &queue) ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - enqueue(queue, *this, window); + enqueue(queue, *this, window, lws_hint()); } void CLLKTrackerFinalizeKernel::configure(ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points) @@ -112,7 +112,7 @@ void CLLKTrackerFinalizeKernel::run(const Window &window, cl::CommandQueue &queu ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - enqueue(queue, *this, window); + enqueue(queue, *this, window, lws_hint()); } CLLKTrackerStage0Kernel::CLLKTrackerStage0Kernel() @@ -202,7 +202,7 @@ void CLLKTrackerStage0Kernel::run(const Window &window, cl::CommandQueue &queue) add_2D_tensor_argument(idx, _old_scharr_gx, window); add_2D_tensor_argument(idx, _old_scharr_gy, window); - enqueue(queue, *this, window); + enqueue(queue, *this, window, lws_hint()); } CLLKTrackerStage1Kernel::CLLKTrackerStage1Kernel() @@ -284,5 +284,5 @@ void CLLKTrackerStage1Kernel::run(const Window &window, cl::CommandQueue &queue) unsigned int idx = 0; add_2D_tensor_argument(idx, _new_input, window); - enqueue(queue, *this, window); + enqueue(queue, *this, window, lws_hint()); } diff --git a/src/core/CL/kernels/CLMeanStdDevKernel.cpp b/src/core/CL/kernels/CLMeanStdDevKernel.cpp index 0cde9c5fe6..7bfd6d6e53 100644 --- a/src/core/CL/kernels/CLMeanStdDevKernel.cpp +++ b/src/core/CL/kernels/CLMeanStdDevKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -132,7 +132,7 @@ void CLMeanStdDevKernel::run(const Window &window, cl::CommandQueue &queue) // Set slice step equal to height to force gws[1] to 1, // as each thread calculates the sum across all rows and columns equal to the number of elements processed by each work-item slice.set_dimension_step(Window::DimY, _input->info()->dimension(1)); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_2D(slice)); diff --git a/src/core/CL/kernels/CLMemsetKernel.cpp b/src/core/CL/kernels/CLMemsetKernel.cpp index 80caf9406e..142642097f 100644 --- a/src/core/CL/kernels/CLMemsetKernel.cpp +++ b/src/core/CL/kernels/CLMemsetKernel.cpp @@ -103,7 +103,7 @@ void CLMemsetKernel::run(const Window &window, cl::CommandQueue &queue) { unsigned int idx = 0; add_3D_tensor_argument(idx, _tensor, slice); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, lws_hint()); } while(collapsed.slide_window_slice_3D(slice)); } diff --git a/src/core/CL/kernels/CLMinMaxLayerKernel.cpp b/src/core/CL/kernels/CLMinMaxLayerKernel.cpp index 92b5f8d505..05912743b3 100644 --- a/src/core/CL/kernels/CLMinMaxLayerKernel.cpp +++ b/src/core/CL/kernels/CLMinMaxLayerKernel.cpp @@ -157,7 +157,7 @@ void CLMinMaxLayerKernel::run(const Window &window, cl::CommandQueue &queue) // Set inputs add_3D_tensor_argument(idx, _input, slice); add_1D_tensor_argument(idx, _output, output_slice); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, lws_hint()); } while(window_collapsed.slide_window_slice_3D(slice)); } diff --git a/src/core/CL/kernels/CLMinMaxLocationKernel.cpp b/src/core/CL/kernels/CLMinMaxLocationKernel.cpp index 0c7f3bc070..e865b45e73 100644 --- a/src/core/CL/kernels/CLMinMaxLocationKernel.cpp +++ b/src/core/CL/kernels/CLMinMaxLocationKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -134,7 +134,7 @@ void CLMinMaxKernel::run(const Window &window, cl::CommandQueue &queue) { unsigned int idx = 0; add_2D_tensor_argument(idx, _input, slice); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_2D(slice)); @@ -226,7 +226,7 @@ void CLMinMaxLocationKernel::run(const Window &window, cl::CommandQueue &queue) { unsigned int idx = 0; add_2D_tensor_argument(idx, _input, slice); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_2D(slice)); } diff --git a/src/core/CL/kernels/CLPadLayerKernel.cpp b/src/core/CL/kernels/CLPadLayerKernel.cpp index d35164d309..5270e31bbe 100644 --- a/src/core/CL/kernels/CLPadLayerKernel.cpp +++ b/src/core/CL/kernels/CLPadLayerKernel.cpp @@ -136,7 +136,7 @@ void CLPadLayerKernel::run(const Window &window, cl::CommandQueue &queue) add_3D_tensor_argument(idx, _input, slice_in); add_3D_tensor_argument(idx, _output, slice_out); - enqueue(queue, *this, slice_out); + enqueue(queue, *this, slice_out, lws_hint()); } while(window.slide_window_slice_3D(slice_out) && win_in.slide_window_slice_3D(slice_in)); } diff --git a/src/core/CL/kernels/CLPermuteKernel.cpp b/src/core/CL/kernels/CLPermuteKernel.cpp index a5fc1a705f..9cb72b3c04 100644 --- a/src/core/CL/kernels/CLPermuteKernel.cpp +++ b/src/core/CL/kernels/CLPermuteKernel.cpp @@ -141,7 +141,7 @@ void CLPermuteKernel::run(const Window &window, cl::CommandQueue &queue) unsigned int idx = 0; add_4D_tensor_argument(idx, _input, slice_in); add_4D_tensor_argument(idx, _output, slice_out); - enqueue(queue, *this, slice_in); + enqueue(queue, *this, slice_in, lws_hint()); } while(window.slide_window_slice_4D(slice_in) && window.slide_window_slice_4D(slice_out)); } diff --git a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp index 76403d18d4..50cdc9c7f4 100644 --- a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp +++ b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp @@ -274,7 +274,7 @@ void CLPixelWiseMultiplicationKernel::run(const Window &window, cl::CommandQueue add_3D_tensor_argument(idx, _input1, slice_input1); add_3D_tensor_argument(idx, _input2, slice_input2); add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, lws_hint()); ARM_COMPUTE_UNUSED(collapsed.slide_window_slice_3D(slice_input1)); ARM_COMPUTE_UNUSED(collapsed.slide_window_slice_3D(slice_input2)); @@ -409,7 +409,7 @@ void CLComplexPixelWiseMultiplicationKernel::run(const Window &window, cl::Comma add_3D_tensor_argument(idx, _input1, slice_input1); add_3D_tensor_argument(idx, _input2, slice_input2); add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, lws_hint()); ARM_COMPUTE_UNUSED(collapsed.slide_window_slice_3D(slice_input1)); ARM_COMPUTE_UNUSED(collapsed.slide_window_slice_3D(slice_input2)); diff --git a/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp b/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp index c76d839551..12ea19394b 100644 --- a/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp +++ b/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -214,6 +214,6 @@ void CLPriorBoxLayerKernel::run(const Window &window, cl::CommandQueue &queue) unsigned int idx = 0; add_2D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, lws_hint()); } } // namespace arm_compute diff --git a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp index 22d4e3345f..493255f1cd 100644 --- a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp @@ -126,7 +126,7 @@ void CLQuantizationLayerKernel::run(const Window &window, cl::CommandQueue &queu unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice); add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, lws_hint()); } while(window_collapsed.slide_window_slice_3D(slice)); } diff --git a/src/core/CL/kernels/CLROIAlignLayerKernel.cpp b/src/core/CL/kernels/CLROIAlignLayerKernel.cpp index 47dc62c4f4..50729f2421 100644 --- a/src/core/CL/kernels/CLROIAlignLayerKernel.cpp +++ b/src/core/CL/kernels/CLROIAlignLayerKernel.cpp @@ -149,6 +149,6 @@ void CLROIAlignLayerKernel::run(const Window &window, cl::CommandQueue &queue) add_argument(idx, _input->info()->strides_in_bytes()[3]); add_argument(idx, _output->info()->strides_in_bytes()[3]); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, lws_hint()); } } // namespace arm_compute diff --git a/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp b/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp index c32ec1bb2b..9858a4de36 100644 --- a/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp +++ b/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp @@ -143,6 +143,6 @@ void CLROIPoolingLayerKernel::run(const Window &window, cl::CommandQueue &queue) add_argument(idx, _input->info()->strides_in_bytes()[3]); add_argument(idx, _output->info()->strides_in_bytes()[3]); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, lws_hint()); } } // namespace arm_compute diff --git a/src/core/CL/kernels/CLReductionOperationKernel.cpp b/src/core/CL/kernels/CLReductionOperationKernel.cpp index 9db8ae6cde..ee33a72792 100644 --- a/src/core/CL/kernels/CLReductionOperationKernel.cpp +++ b/src/core/CL/kernels/CLReductionOperationKernel.cpp @@ -285,7 +285,7 @@ void CLReductionOperationKernel::run(const Window &window, cl::CommandQueue &que unsigned int idx = 0; add_1D_tensor_argument(idx, _input, in_slice); add_1D_tensor_argument(idx, _output, out_slice); - enqueue(queue, *this, in_slice); + enqueue(queue, *this, in_slice, lws_hint()); } while(window_in.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(out_slice)); } @@ -331,7 +331,7 @@ void CLReductionOperationKernel::run(const Window &window, cl::CommandQueue &que unsigned int idx = 0; add_2D_tensor_argument(idx, _input, in_slice); add_2D_tensor_argument(idx, _output, out_slice); - enqueue(queue, *this, in_slice); + enqueue(queue, *this, in_slice, lws_hint()); } while(window_in.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(out_slice)); } @@ -349,7 +349,7 @@ void CLReductionOperationKernel::run(const Window &window, cl::CommandQueue &que unsigned int idx = 0; add_3D_tensor_argument(idx, _input, in_slice); add_3D_tensor_argument(idx, _output, out_slice); - enqueue(queue, *this, in_slice); + enqueue(queue, *this, in_slice, lws_hint()); } while(window_in.slide_window_slice_3D(in_slice) && window.slide_window_slice_3D(out_slice)); } @@ -367,7 +367,7 @@ void CLReductionOperationKernel::run(const Window &window, cl::CommandQueue &que unsigned int idx = 0; add_4D_tensor_argument(idx, _input, in_slice); add_4D_tensor_argument(idx, _output, out_slice); - enqueue(queue, *this, in_slice); + enqueue(queue, *this, in_slice, lws_hint()); } while(window_in.slide_window_slice_4D(in_slice) && window.slide_window_slice_4D(out_slice)); } diff --git a/src/core/CL/kernels/CLRemapKernel.cpp b/src/core/CL/kernels/CLRemapKernel.cpp index 33c5f2d402..12161fcd70 100644 --- a/src/core/CL/kernels/CLRemapKernel.cpp +++ b/src/core/CL/kernels/CLRemapKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -105,7 +105,7 @@ void CLRemapKernel::run(const Window &window, cl::CommandQueue &queue) add_2D_tensor_argument(idx, _output, slice); add_2D_tensor_argument(idx, _map_x, slice); add_2D_tensor_argument(idx, _map_y, slice); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_2D(slice)); } diff --git a/src/core/CL/kernels/CLReshapeLayerKernel.cpp b/src/core/CL/kernels/CLReshapeLayerKernel.cpp index aa1339dd1e..040e442845 100644 --- a/src/core/CL/kernels/CLReshapeLayerKernel.cpp +++ b/src/core/CL/kernels/CLReshapeLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -122,6 +122,6 @@ void CLReshapeLayerKernel::run(const Window &window, cl::CommandQueue &queue) unsigned int idx = 0; add_3D_tensor_argument(idx, _input, window_collapsed); add_3D_tensor_argument(idx, _output, window_collapsed); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, lws_hint()); } /** [CLReshapeLayerKernel Kernel] **/ diff --git a/src/core/CL/kernels/CLScharr3x3Kernel.cpp b/src/core/CL/kernels/CLScharr3x3Kernel.cpp index 1c1fedca15..94b0d38c52 100644 --- a/src/core/CL/kernels/CLScharr3x3Kernel.cpp +++ b/src/core/CL/kernels/CLScharr3x3Kernel.cpp @@ -118,7 +118,7 @@ void CLScharr3x3Kernel::run(const Window &window, cl::CommandQueue &queue) add_2D_tensor_argument_if((_run_scharr_x), idx, _output_x, slice); add_2D_tensor_argument_if((_run_scharr_y), idx, _output_y, slice); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_2D(slice)); } diff --git a/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp b/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp index d2f676b00c..a3441cd5bf 100644 --- a/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp +++ b/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp @@ -201,7 +201,7 @@ void CLSpaceToBatchLayerKernel::run(const Window &window, cl::CommandQueue &queu add_argument(idx, batch_id); add_3D_tensor_argument(idx, _output, slice_out); - enqueue(queue, *this, slice_out); + enqueue(queue, *this, slice_out, lws_hint()); ++batch_id; } while(window.slide_window_slice_3D(slice_out)); diff --git a/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp b/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp index 57d8305e47..20454b32a8 100644 --- a/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp +++ b/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp @@ -120,7 +120,7 @@ void CLSpaceToDepthLayerKernel::run(const Window &window, cl::CommandQueue &queu add_4D_tensor_argument(idx, _input, slice_in); add_argument(idx, batch_id); add_3D_tensor_argument(idx, _output, slice_out); - enqueue(queue, *this, slice_out); + enqueue(queue, *this, slice_out, lws_hint()); ++batch_id; } diff --git a/src/core/CL/kernels/CLStackLayerKernel.cpp b/src/core/CL/kernels/CLStackLayerKernel.cpp index ccbe1fc016..d5bbae6d4d 100644 --- a/src/core/CL/kernels/CLStackLayerKernel.cpp +++ b/src/core/CL/kernels/CLStackLayerKernel.cpp @@ -132,5 +132,5 @@ void CLStackLayerKernel::run(const Window &window, cl::CommandQueue &queue) unsigned int idx = 0; add_4D_tensor_argument(idx, _input, slice_in); add_4D_tensor_argument(idx, _output, slice_out); - enqueue(queue, *this, slice_in); + enqueue(queue, *this, slice_in, lws_hint()); } diff --git a/src/core/CL/kernels/CLTileKernel.cpp b/src/core/CL/kernels/CLTileKernel.cpp index 7559e7ae72..bcd1bdc5f9 100644 --- a/src/core/CL/kernels/CLTileKernel.cpp +++ b/src/core/CL/kernels/CLTileKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -145,7 +145,7 @@ void CLTileKernel::run(const Window &window, cl::CommandQueue &queue) unsigned int idx = 0; add_4D_tensor_argument(idx, _input, slice); add_4D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, lws_hint()); } while(collapsed.slide_window_slice_4D(slice)); } diff --git a/src/core/CL/kernels/CLUpsampleLayerKernel.cpp b/src/core/CL/kernels/CLUpsampleLayerKernel.cpp index ce5ed86332..331b02d2e8 100644 --- a/src/core/CL/kernels/CLUpsampleLayerKernel.cpp +++ b/src/core/CL/kernels/CLUpsampleLayerKernel.cpp @@ -160,7 +160,7 @@ void CLUpsampleLayerKernel::run(const Window &window, cl::CommandQueue &queue) unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice_in); add_3D_tensor_argument(idx, _output, slice_out); - enqueue(queue, *this, slice_out); + enqueue(queue, *this, slice_out, lws_hint()); } while(collapsed_window.slide_window_slice_3D(slice_out) && collapsed_window.slide_window_slice_3D(slice_in)); } diff --git a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp index 590af4d184..9330b3b8a1 100644 --- a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp +++ b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp @@ -156,7 +156,7 @@ void CLWeightsReshapeKernel::run(const Window &window, cl::CommandQueue &queue) } // Run kernel - enqueue(queue, *this, in_slice); + enqueue(queue, *this, in_slice, lws_hint()); } while(window.slide_window_slice_4D(in_slice) && out_window.slide_window_slice_2D(out_slice)); } diff --git a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp index 88cdde7935..92ffc9fa90 100644 --- a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp @@ -137,6 +137,6 @@ void CLWidthConcatenateLayerKernel::run(const Window &window, cl::CommandQueue & unsigned int idx = 0; add_4D_tensor_argument(idx, _input, window); add_4D_tensor_argument(idx, _output, window); - enqueue(queue, *this, window); + enqueue(queue, *this, window, lws_hint()); } } // namespace arm_compute diff --git a/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp b/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp index 55cc465292..5a508e4b3a 100644 --- a/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp +++ b/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -150,5 +150,5 @@ void CLWinogradFilterTransformKernel::run(const Window &window, cl::CommandQueue unsigned int idx = 0; add_4D_tensor_argument(idx, _input, window); add_3D_tensor_argument(idx, _output, window_out); - enqueue(queue, *this, window); + enqueue(queue, *this, window, lws_hint()); } -- cgit v1.2.1