From b6eb35371d222c6b7f61210d97ebd7dd9e197458 Mon Sep 17 00:00:00 2001 From: Anthony Barbier Date: Wed, 8 Aug 2018 13:20:04 +0100 Subject: COMPMID-1478: Stop relying on static default OpenCL objects in cl2.hpp This causes problems when ACL is used as a shared library on Android. Fixes some problems related to creation / destruction order between the Graph's CL backend and core / runtime Change-Id: I716d63fd42f4586df1ffbb6fa97e4db06d3a781b Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/143228 Tested-by: Jenkins Reviewed-by: Michele DiGiorgio Reviewed-by: Gian Marco Iodice --- src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp | 4 ++-- src/core/CL/kernels/CLActivationLayerKernel.cpp | 4 ++-- src/core/CL/kernels/CLArithmeticAdditionKernel.cpp | 2 +- src/core/CL/kernels/CLArithmeticDivisionKernel.cpp | 2 +- src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp | 2 +- src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp | 4 ++-- src/core/CL/kernels/CLBitwiseAndKernel.cpp | 4 ++-- src/core/CL/kernels/CLBitwiseOrKernel.cpp | 4 ++-- src/core/CL/kernels/CLBitwiseXorKernel.cpp | 4 ++-- src/core/CL/kernels/CLBox3x3Kernel.cpp | 4 ++-- src/core/CL/kernels/CLCannyEdgeKernel.cpp | 8 ++++---- src/core/CL/kernels/CLChannelCombineKernel.cpp | 4 ++-- src/core/CL/kernels/CLChannelExtractKernel.cpp | 4 ++-- src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp | 2 +- src/core/CL/kernels/CLCol2ImKernel.cpp | 4 ++-- src/core/CL/kernels/CLColorConvertKernel.cpp | 8 ++++---- src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp | 4 ++-- src/core/CL/kernels/CLConvolutionKernel.cpp | 8 ++++---- src/core/CL/kernels/CLCopyKernel.cpp | 2 +- src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp | 2 +- src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp | 2 +- .../CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp | 4 ++-- .../CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp | 4 ++-- src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp | 4 ++-- src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp | 2 +- src/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.cpp | 2 +- src/core/CL/kernels/CLDequantizationLayerKernel.cpp | 2 +- src/core/CL/kernels/CLDerivativeKernel.cpp | 4 ++-- src/core/CL/kernels/CLDilateKernel.cpp | 4 ++-- src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp | 4 ++-- src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp | 4 ++-- src/core/CL/kernels/CLErodeKernel.cpp | 4 ++-- src/core/CL/kernels/CLFastCornersKernel.cpp | 4 ++-- src/core/CL/kernels/CLFillBorderKernel.cpp | 2 +- src/core/CL/kernels/CLFlattenLayerKernel.cpp | 6 +++--- src/core/CL/kernels/CLFloorKernel.cpp | 2 +- src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp | 4 ++-- src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp | 4 ++-- src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp | 2 +- ...MMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp | 6 +++--- .../CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp | 4 ++-- src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp | 6 +++--- src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp | 4 ++-- src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp | 2 +- src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp | 6 +++--- src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp | 4 ++-- src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp | 4 ++-- src/core/CL/kernels/CLGaussian3x3Kernel.cpp | 4 ++-- src/core/CL/kernels/CLGaussianPyramidKernel.cpp | 4 ++-- src/core/CL/kernels/CLHOGDescriptorKernel.cpp | 4 ++-- src/core/CL/kernels/CLHOGDetectorKernel.cpp | 2 +- src/core/CL/kernels/CLHarrisCornersKernel.cpp | 4 ++-- src/core/CL/kernels/CLHistogramKernel.cpp | 6 +++--- src/core/CL/kernels/CLIm2ColKernel.cpp | 6 +++--- src/core/CL/kernels/CLIntegralImageKernel.cpp | 6 +++--- src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp | 2 +- src/core/CL/kernels/CLLKTrackerKernel.cpp | 8 ++++---- .../CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp | 9 +++++---- src/core/CL/kernels/CLMagnitudePhaseKernel.cpp | 4 ++-- src/core/CL/kernels/CLMeanStdDevKernel.cpp | 2 +- src/core/CL/kernels/CLMedian3x3Kernel.cpp | 4 ++-- src/core/CL/kernels/CLMinMaxLayerKernel.cpp | 2 +- src/core/CL/kernels/CLMinMaxLocationKernel.cpp | 6 +++--- src/core/CL/kernels/CLNonLinearFilterKernel.cpp | 4 ++-- src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp | 4 ++-- src/core/CL/kernels/CLNormalizationLayerKernel.cpp | 2 +- src/core/CL/kernels/CLPermuteKernel.cpp | 2 +- src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp | 2 +- src/core/CL/kernels/CLPoolingLayerKernel.cpp | 6 +++--- src/core/CL/kernels/CLQuantizationLayerKernel.cpp | 2 +- src/core/CL/kernels/CLROIPoolingLayerKernel.cpp | 2 +- src/core/CL/kernels/CLReductionOperationKernel.cpp | 10 +++++----- src/core/CL/kernels/CLRemapKernel.cpp | 4 ++-- src/core/CL/kernels/CLReshapeLayerKernel.cpp | 2 +- src/core/CL/kernels/CLScaleKernel.cpp | 6 +++--- src/core/CL/kernels/CLScharr3x3Kernel.cpp | 4 ++-- src/core/CL/kernels/CLSobel3x3Kernel.cpp | 4 ++-- src/core/CL/kernels/CLSobel5x5Kernel.cpp | 6 +++--- src/core/CL/kernels/CLSobel7x7Kernel.cpp | 6 +++--- src/core/CL/kernels/CLSoftmaxLayerKernel.cpp | 12 ++++++------ src/core/CL/kernels/CLTransposeKernel.cpp | 7 +++---- src/core/CL/kernels/CLWarpAffineKernel.cpp | 2 +- src/core/CL/kernels/CLWarpPerspectiveKernel.cpp | 2 +- src/core/CL/kernels/CLWeightsReshapeKernel.cpp | 2 +- src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp | 2 +- src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp | 2 +- src/core/CL/kernels/CLWinogradInputTransformKernel.cpp | 6 ++---- src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp | 4 ++-- 88 files changed, 178 insertions(+), 180 deletions(-) (limited to 'src/core/CL/kernels') diff --git a/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp b/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp index 685b8e234e..0c1206adfb 100644 --- a/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp +++ b/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -81,7 +81,7 @@ void CLAbsoluteDifferenceKernel::configure(const ICLTensor *input1, const ICLTen output_access.set_valid_region(win, valid_region); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLAbsoluteDifferenceKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp index d8bd2f7ee1..a15e99b8d4 100644 --- a/src/core/CL/kernels/CLActivationLayerKernel.cpp +++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp @@ -179,7 +179,7 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act // Configure kernel window auto win_config = validate_and_configure_window(input->info(), (_run_in_place) ? nullptr : output->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); // Set config_id for enabling LWS tuning _config_id = "activation_layer_"; @@ -215,7 +215,7 @@ void CLActivationLayerKernel::run(const Window &window, cl::CommandQueue &queue) { add_3D_tensor_argument(idx, _output, slice); } - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(collapsed.slide_window_slice_3D(slice)); } diff --git a/src/core/CL/kernels/CLArithmeticAdditionKernel.cpp b/src/core/CL/kernels/CLArithmeticAdditionKernel.cpp index 6d6cb6f98c..2372d458cf 100644 --- a/src/core/CL/kernels/CLArithmeticAdditionKernel.cpp +++ b/src/core/CL/kernels/CLArithmeticAdditionKernel.cpp @@ -154,7 +154,7 @@ void CLArithmeticAdditionKernel::configure(const ICLTensor *input1, const ICLTen // Create kernel _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts)); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); } Status CLArithmeticAdditionKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy) diff --git a/src/core/CL/kernels/CLArithmeticDivisionKernel.cpp b/src/core/CL/kernels/CLArithmeticDivisionKernel.cpp index 9bd0da15a3..e995ba1a41 100644 --- a/src/core/CL/kernels/CLArithmeticDivisionKernel.cpp +++ b/src/core/CL/kernels/CLArithmeticDivisionKernel.cpp @@ -121,7 +121,7 @@ void CLArithmeticDivisionKernel::configure(const ICLTensor *input1, const ICLTen // Create kernel _kernel = static_cast(CLKernelLibrary::get().create_kernel("arithmetic_div", build_opts)); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); } Status CLArithmeticDivisionKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output) diff --git a/src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp b/src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp index aeee6022a7..299ac553e9 100644 --- a/src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp +++ b/src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp @@ -127,7 +127,7 @@ void CLArithmeticSubtractionKernel::configure(const ICLTensor *input1, const ICL // Configure kernel window auto win_config = validate_and_configure_window(input1->info(), input2->info(), output->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); } Status CLArithmeticSubtractionKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy) diff --git a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp index 4c93fb28bf..d4a72076c1 100644 --- a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp @@ -189,7 +189,7 @@ void CLBatchNormalizationLayerKernel::configure(ICLTensor *input, ICLTensor *out (beta != nullptr) ? beta->info() : nullptr, (gamma != nullptr) ? gamma->info() : nullptr); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); _config_id = "batch_normalization_layer_"; _config_id += string_from_data_layout(input->info()->data_layout()); @@ -252,7 +252,7 @@ void CLBatchNormalizationLayerKernel::run(const Window &window, cl::CommandQueue { add_3D_tensor_argument(idx, _output, slice); } - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_3D(slice)); } diff --git a/src/core/CL/kernels/CLBitwiseAndKernel.cpp b/src/core/CL/kernels/CLBitwiseAndKernel.cpp index 5ea4a86da5..dd301cd02e 100644 --- a/src/core/CL/kernels/CLBitwiseAndKernel.cpp +++ b/src/core/CL/kernels/CLBitwiseAndKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -66,7 +66,7 @@ void CLBitwiseAndKernel::configure(const ICLTensor *input1, const ICLTensor *inp output_access.set_valid_region(win, valid_region); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLBitwiseAndKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLBitwiseOrKernel.cpp b/src/core/CL/kernels/CLBitwiseOrKernel.cpp index 2eeef0a993..aa84618258 100644 --- a/src/core/CL/kernels/CLBitwiseOrKernel.cpp +++ b/src/core/CL/kernels/CLBitwiseOrKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -67,7 +67,7 @@ void CLBitwiseOrKernel::configure(const ICLTensor *input1, const ICLTensor *inpu output_access.set_valid_region(win, valid_region); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLBitwiseOrKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLBitwiseXorKernel.cpp b/src/core/CL/kernels/CLBitwiseXorKernel.cpp index c19a78e1c4..ad1f923253 100644 --- a/src/core/CL/kernels/CLBitwiseXorKernel.cpp +++ b/src/core/CL/kernels/CLBitwiseXorKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -67,7 +67,7 @@ void CLBitwiseXorKernel::configure(const ICLTensor *input1, const ICLTensor *inp output_access.set_valid_region(win, valid_region); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLBitwiseXorKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLBox3x3Kernel.cpp b/src/core/CL/kernels/CLBox3x3Kernel.cpp index 0299f6233c..b81697f778 100644 --- a/src/core/CL/kernels/CLBox3x3Kernel.cpp +++ b/src/core/CL/kernels/CLBox3x3Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -73,5 +73,5 @@ void CLBox3x3Kernel::configure(const ICLTensor *input, ICLTensor *output, bool b output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } diff --git a/src/core/CL/kernels/CLCannyEdgeKernel.cpp b/src/core/CL/kernels/CLCannyEdgeKernel.cpp index 5d06d34631..94e5e230f9 100644 --- a/src/core/CL/kernels/CLCannyEdgeKernel.cpp +++ b/src/core/CL/kernels/CLCannyEdgeKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -77,7 +77,7 @@ void CLGradientKernel::configure(const ICLTensor *gx, const ICLTensor *gy, ICLTe mag_access.set_valid_region(win, _gx->info()->valid_region()); phase_access.set_valid_region(win, _gx->info()->valid_region()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLGradientKernel::run(const Window &window, cl::CommandQueue &queue) @@ -145,7 +145,7 @@ void CLEdgeNonMaxSuppressionKernel::configure(const ICLTensor *magnitude, const output_access.set_valid_region(win, _magnitude->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLEdgeNonMaxSuppressionKernel::run(const Window &window, cl::CommandQueue &queue) @@ -230,7 +230,7 @@ void CLEdgeTraceKernel::configure(const ICLTensor *input, ICLTensor *output, int l1_stack_access.set_valid_region(win, _input->info()->valid_region()); l1_stack_counter_access.set_valid_region(win, _input->info()->valid_region()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLEdgeTraceKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLChannelCombineKernel.cpp b/src/core/CL/kernels/CLChannelCombineKernel.cpp index 6e55e666ee..c7b1da41dc 100644 --- a/src/core/CL/kernels/CLChannelCombineKernel.cpp +++ b/src/core/CL/kernels/CLChannelCombineKernel.cpp @@ -128,7 +128,7 @@ void CLChannelCombineKernel::configure(const ICLTensor *plane0, const ICLTensor } output_access.set_valid_region(win, ValidRegion(valid_region.anchor, output->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLChannelCombineKernel::configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output) @@ -232,7 +232,7 @@ void CLChannelCombineKernel::configure(const ICLImage *plane0, const ICLImage *p output_plane1_access.set_valid_region(win, ValidRegion(output_plane1_region.anchor, output->plane(1)->info()->tensor_shape())); output_plane2_access.set_valid_region(win, ValidRegion(plane2->info()->valid_region().anchor, output->plane(2)->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLChannelCombineKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLChannelExtractKernel.cpp b/src/core/CL/kernels/CLChannelExtractKernel.cpp index 65843b8d5d..8bddba837a 100644 --- a/src/core/CL/kernels/CLChannelExtractKernel.cpp +++ b/src/core/CL/kernels/CLChannelExtractKernel.cpp @@ -101,7 +101,7 @@ void CLChannelExtractKernel::configure(const ICLTensor *input, Channel channel, ValidRegion input_valid_region = input->info()->valid_region(); output_access.set_valid_region(win, ValidRegion(input_valid_region.anchor, output->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLChannelExtractKernel::configure(const ICLMultiImage *input, Channel channel, ICLImage *output) @@ -162,7 +162,7 @@ void CLChannelExtractKernel::configure(const ICLMultiImage *input, Channel chann output_access.set_valid_region(win, input_plane->info()->valid_region()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLChannelExtractKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp b/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp index 5f0f0aebf8..be4d68770d 100644 --- a/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp +++ b/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp @@ -125,7 +125,7 @@ void CLChannelShuffleLayerKernel::configure(const ICLTensor *input, ICLTensor *o // Configure kernel window auto win_config = validate_and_configure_window(input->info(), output->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); } Status CLChannelShuffleLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_groups) diff --git a/src/core/CL/kernels/CLCol2ImKernel.cpp b/src/core/CL/kernels/CLCol2ImKernel.cpp index 6274c9082a..6fd3be7f6a 100644 --- a/src/core/CL/kernels/CLCol2ImKernel.cpp +++ b/src/core/CL/kernels/CLCol2ImKernel.cpp @@ -111,7 +111,7 @@ void CLCol2ImKernel::configure(const ICLTensor *input, ICLTensor *output, std::p // Configure kernel window auto win_config = validate_and_configure_window(input->info(), output->info(), _convolved_dims); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); // Set config_id for enabling LWS tuning _config_id = "col2im_"; @@ -156,7 +156,7 @@ void CLCol2ImKernel::run(const Window &window, cl::CommandQueue &queue) unsigned int idx = 0; add_2D_tensor_argument(idx, _input, slice); add_3D_tensor_argument(idx, _output, slice_out); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(collapsed_window.slide_window_slice_2D(slice) && out_window.slide_window_slice_3D(slice_out)); } diff --git a/src/core/CL/kernels/CLColorConvertKernel.cpp b/src/core/CL/kernels/CLColorConvertKernel.cpp index 2b894989e1..e79019eab9 100644 --- a/src/core/CL/kernels/CLColorConvertKernel.cpp +++ b/src/core/CL/kernels/CLColorConvertKernel.cpp @@ -120,7 +120,7 @@ void CLColorConvertKernel::configure(const ICLTensor *input, ICLTensor *output) output_access.set_valid_region(win, input->info()->valid_region()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLColorConvertKernel::configure(const ICLMultiImage *input, ICLImage *output) @@ -189,7 +189,7 @@ void CLColorConvertKernel::configure(const ICLMultiImage *input, ICLImage *outpu input->plane(2)->info()->valid_region()); output_access.set_valid_region(win, ValidRegion(intersect_region.anchor, output->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *output) @@ -285,7 +285,7 @@ void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *outpu output_plane1_access.set_valid_region(win, ValidRegion(input_region.anchor, output->plane(1)->info()->tensor_shape())); output_plane2_access.set_valid_region(win, ValidRegion(input_region.anchor, output->plane(2)->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLColorConvertKernel::configure(const ICLMultiImage *input, ICLMultiImage *output) @@ -369,7 +369,7 @@ void CLColorConvertKernel::configure(const ICLMultiImage *input, ICLMultiImage * output_plane1_access.set_valid_region(win, ValidRegion(intersect_region.anchor, output->plane(1)->info()->tensor_shape())); output_plane2_access.set_valid_region(win, ValidRegion(intersect_region.anchor, output->plane(2)->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLColorConvertKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp index 69ab590540..ace3fd5840 100644 --- a/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp +++ b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp @@ -73,7 +73,7 @@ void CLConvertFullyConnectedWeightsKernel::configure(const ICLTensor *input, ICL // Configure kernel window Window win = calculate_max_window(*input->info(), Steps()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } Status CLConvertFullyConnectedWeightsKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape, @@ -109,4 +109,4 @@ void CLConvertFullyConnectedWeightsKernel::run(const Window &window, cl::Command add_2D_tensor_argument(idx, _output, window); enqueue(queue, *this, window); } -} // namespace arm_compute \ No newline at end of file +} // namespace arm_compute diff --git a/src/core/CL/kernels/CLConvolutionKernel.cpp b/src/core/CL/kernels/CLConvolutionKernel.cpp index 2b08c8dfba..e6777938a2 100644 --- a/src/core/CL/kernels/CLConvolutionKernel.cpp +++ b/src/core/CL/kernels/CLConvolutionKernel.cpp @@ -105,7 +105,7 @@ void CLConvolutionKernel::configure(const ICLTensor *input, ICLTens output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } /****************************************************************************************\ @@ -167,7 +167,7 @@ void CLSeparableConvolutionHorKernel::configure(const ICLTensor *in output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } template @@ -226,7 +226,7 @@ void CLSeparableConvolutionVertKernel::configure(const ICLTensor *i output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } /****************************************************************************************\ @@ -298,7 +298,7 @@ void CLConvolutionRectangleKernel::configure(const ICLTensor *input, ICLTensor * output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLConvolutionRectangleKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLCopyKernel.cpp b/src/core/CL/kernels/CLCopyKernel.cpp index 1fc8b5bfbe..2da67d2666 100644 --- a/src/core/CL/kernels/CLCopyKernel.cpp +++ b/src/core/CL/kernels/CLCopyKernel.cpp @@ -95,7 +95,7 @@ void CLCopyKernel::configure(const ICLTensor *input, ICLTensor *output) // Configure kernel window auto win_config = validate_and_configure_window(input->info(), output->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); } Status CLCopyKernel::validate(const arm_compute::ITensorInfo *input, const arm_compute::ITensorInfo *output) diff --git a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp index 1feac7d815..c6a0031f4a 100644 --- a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp +++ b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp @@ -85,7 +85,7 @@ void CLDeconvolutionLayerUpsampleKernel::configure(const ICLTensor *input, ICLTe AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLDeconvolutionLayerUpsampleKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp index 4055d1c7ab..40023948b1 100644 --- a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp @@ -125,7 +125,7 @@ void CLDepthConcatenateLayerKernel::configure(const ICLTensor *input, unsigned i auto win_config = validate_and_configure_window(input->info(), depth_offset, output->info()); ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); - ICLKernel::configure(std::get<1>(win_config)); + ICLKernel::configure_internal(std::get<1>(win_config)); } Status CLDepthConcatenateLayerKernel::validate(const arm_compute::ITensorInfo *input, diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp index e091e5c2cb..a40aa2856c 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp @@ -280,7 +280,7 @@ void CLDepthwiseConvolutionLayer3x3NCHWKernel::configure(const ICLTensor *input, auto win_config = validate_and_configure_window(input->info(), weights->info(), output->info(), conv_info, depth_multiplier, gpu_target, kernel_name); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); @@ -345,7 +345,7 @@ void CLDepthwiseConvolutionLayer3x3NCHWKernel::run(const Window &window, cl::Com add_3D_tensor_argument(idx, _output, slice_out); add_3D_tensor_argument(idx, _weights, slice_weights); - enqueue(queue, *this, slice_out, _lws_hint); + enqueue(queue, *this, slice_out, lws_hint()); } while(window.slide_window_slice_3D(slice_out) && win_in.slide_window_slice_3D(slice_in)); } diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp index 610bfb51dd..63c350d9a5 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp @@ -244,7 +244,7 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(const ICLTensor *input, // Configure kernel window auto win_config = validate_and_configure_window(input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(), conv_info); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); // Set config_id for enabling LWS tuning _config_id = kernel_name; @@ -314,7 +314,7 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::run(const Window &window, cl::Com add_3D_tensor_argument(idx, _output, slice_out); add_3D_tensor_argument(idx, _weights, slice_out); - enqueue(queue, *this, slice_out, _lws_hint); + enqueue(queue, *this, slice_out, lws_hint()); } while(window.slide_window_slice_3D(slice_out) && win_in.slide_window_slice_3D(slice_in)); } diff --git a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp index cab943629a..d5c333a2c1 100644 --- a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp @@ -101,7 +101,7 @@ void CLDepthwiseIm2ColKernel::configure(const ICLTensor *input, ICLTensor *outpu // CLDepthwiseIm2ColKernel doesn't need padding so update_window_and_padding() can be skipped output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } Status CLDepthwiseIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, unsigned int depth_multiplier) @@ -135,7 +135,7 @@ void CLDepthwiseIm2ColKernel::run(const Window &window, cl::CommandQueue &queue) unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice_in); add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_3D(slice) && window.slide_window_slice_3D(slice_in)); } diff --git a/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp b/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp index 67b2cc9f55..cdc27e8ab1 100644 --- a/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp @@ -87,7 +87,7 @@ void CLDepthwiseVectorToTensorKernel::configure(const ICLTensor *input, ICLTenso // The CLDepthwisevectorToTensorKernel doesn't need padding so update_window_and_padding() can be skipped output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } Status CLDepthwiseVectorToTensorKernel::validate(const ITensorInfo *input, const ITensorInfo *output, size_t conv_w, size_t conv_h) diff --git a/src/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.cpp b/src/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.cpp index c28be3fccf..683dda8d67 100644 --- a/src/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.cpp @@ -95,7 +95,7 @@ void CLDepthwiseWeightsReshapeKernel::configure(const ICLTensor *input, ICLTenso // The CLDepthwiseWeightsReshapeKernel doesn't need padding so update_window_and_padding() can be skipped output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } Status CLDepthwiseWeightsReshapeKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *biases) diff --git a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp index fba721f50b..d4c1bec5f4 100644 --- a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp @@ -96,7 +96,7 @@ void CLDequantizationLayerKernel::configure(const ICLTensor *input, ICLTensor *o ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); - ICLKernel::configure(std::get<1>(win_config)); + ICLKernel::configure_internal(std::get<1>(win_config)); } Status CLDequantizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *min_max) diff --git a/src/core/CL/kernels/CLDerivativeKernel.cpp b/src/core/CL/kernels/CLDerivativeKernel.cpp index 5bfe75140b..af7df14359 100644 --- a/src/core/CL/kernels/CLDerivativeKernel.cpp +++ b/src/core/CL/kernels/CLDerivativeKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -117,7 +117,7 @@ void CLDerivativeKernel::configure(const ICLTensor *input, ICLTensor *output_x, output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLDerivativeKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLDilateKernel.cpp b/src/core/CL/kernels/CLDilateKernel.cpp index 3abd747011..89853d7b19 100644 --- a/src/core/CL/kernels/CLDilateKernel.cpp +++ b/src/core/CL/kernels/CLDilateKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -61,5 +61,5 @@ void CLDilateKernel::configure(const ICLTensor *input, ICLTensor *output, bool b output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } diff --git a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp index 754f0d8f23..6de97d40af 100644 --- a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp +++ b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp @@ -442,7 +442,7 @@ void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICL // Configure kernel window auto win_config = validate_and_configure_window(input->info(), weights->info(), output->info(), conv_info, gpu_target); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); // Set static kernel arguments if(is_data_type_quantized_asymmetric(data_type)) @@ -532,7 +532,7 @@ void CLDirectConvolutionLayerKernel::run(const Window &window, cl::CommandQueue unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice_in); add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_3D(slice) && win_in.slide_window_slice_3D(slice_in)); } diff --git a/src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp index 4e2352cf6e..5f4dacb269 100644 --- a/src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp +++ b/src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp @@ -168,7 +168,7 @@ void CLDirectConvolutionLayerOutputStageKernel::configure(ICLTensor *input, cons // Configure kernel window auto win_config = validate_and_configure_window(input->info(), (bias == nullptr) ? nullptr : bias->info(), (output == nullptr) ? nullptr : output->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); } Status CLDirectConvolutionLayerOutputStageKernel::validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output) @@ -202,7 +202,7 @@ void CLDirectConvolutionLayerOutputStageKernel::run(const Window &window, cl::Co unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice); add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_3D(slice)); } diff --git a/src/core/CL/kernels/CLErodeKernel.cpp b/src/core/CL/kernels/CLErodeKernel.cpp index a7aa88fc5c..e56b71a75e 100644 --- a/src/core/CL/kernels/CLErodeKernel.cpp +++ b/src/core/CL/kernels/CLErodeKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -61,5 +61,5 @@ void CLErodeKernel::configure(const ICLTensor *input, ICLTensor *output, bool bo output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } diff --git a/src/core/CL/kernels/CLFastCornersKernel.cpp b/src/core/CL/kernels/CLFastCornersKernel.cpp index 616e41b5fc..782ab7a7c0 100644 --- a/src/core/CL/kernels/CLFastCornersKernel.cpp +++ b/src/core/CL/kernels/CLFastCornersKernel.cpp @@ -87,7 +87,7 @@ void CLFastCornersKernel::configure(const ICLImage *input, ICLImage *output, flo output_access.set_valid_region(win, input->info()->valid_region(), border_mode == BorderMode::UNDEFINED, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLFastCornersKernel::run(const Window &window, cl::CommandQueue &queue) @@ -148,7 +148,7 @@ void CLCopyToArrayKernel::configure(const ICLImage *input, bool update_number, I Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); update_window_and_padding(win, AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration)); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLCopyToArrayKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLFillBorderKernel.cpp b/src/core/CL/kernels/CLFillBorderKernel.cpp index 3b1edaf46c..baf6bb6024 100644 --- a/src/core/CL/kernels/CLFillBorderKernel.cpp +++ b/src/core/CL/kernels/CLFillBorderKernel.cpp @@ -154,7 +154,7 @@ void CLFillBorderKernel::configure(ICLTensor *tensor, BorderSize border_size, Bo win.set(Window::DimX, Window::Dimension(0, total_valid_width + valid_height)); win.set(Window::DimY, Window::Dimension(0, 1, 1)); win.use_tensor_dimensions(tensor->info()->tensor_shape(), Window::DimZ); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLFillBorderKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLFlattenLayerKernel.cpp b/src/core/CL/kernels/CLFlattenLayerKernel.cpp index 0b5feffcc9..17189143ef 100644 --- a/src/core/CL/kernels/CLFlattenLayerKernel.cpp +++ b/src/core/CL/kernels/CLFlattenLayerKernel.cpp @@ -101,7 +101,7 @@ void CLFlattenLayerKernel::configure(const ICLTensor *input, ICLTensor *output) // Configure kernel window auto win_config = validate_and_configure_window(input->info(), output->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); // Set config_id for enabling LWS tuning _config_id = "flatten"; @@ -144,8 +144,8 @@ void CLFlattenLayerKernel::run(const Window &window, cl::CommandQueue &queue) unsigned int idx = 0; add_3D_tensor_argument(idx, _input, in_slice); add_1D_tensor_argument(idx, _output, out_slice); - enqueue(queue, *this, in_slice, _lws_hint); + enqueue(queue, *this, in_slice, lws_hint()); } while(window.slide_window_slice_3D(in_slice) && out_window.slide_window_slice_1D(out_slice)); } -} // namespace arm_compute \ No newline at end of file +} // namespace arm_compute diff --git a/src/core/CL/kernels/CLFloorKernel.cpp b/src/core/CL/kernels/CLFloorKernel.cpp index f6b0e829a0..20e3a3a66f 100644 --- a/src/core/CL/kernels/CLFloorKernel.cpp +++ b/src/core/CL/kernels/CLFloorKernel.cpp @@ -69,7 +69,7 @@ void CLFloorKernel::configure(const ICLTensor *input, ICLTensor *output) update_window_and_padding(win, input_access, output_access); output_access.set_valid_region(win, input->info()->valid_region()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLFloorKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp b/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp index 6ea1160c69..ae54e77972 100644 --- a/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp +++ b/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp @@ -157,7 +157,7 @@ void CLGEMMInterleave4x4Kernel::configure(const ICLTensor *input, ICLTensor *out // Configure kernel window auto win_config = validate_and_configure_window(input->info(), output->info(), mult_interleave4x4_height, reinterpret_input_as_3d); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); // Set config_id for enabling LWS tuning _config_id = "interleave4x4_"; @@ -210,7 +210,7 @@ void CLGEMMInterleave4x4Kernel::run(const Window &window, cl::CommandQueue &queu unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice); add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_3D(slice)); } diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp index 8a4a1b5820..9adf95fa33 100644 --- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp @@ -188,7 +188,7 @@ void CLGEMMLowpMatrixMultiplyKernel::configure(const ICLTensor *input0, const IC // Configure kernel window auto win_config = validate_and_configure_window(input0->info(), input1->info(), output->info(), is_interleaved_transposed, num_elements_processed); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); const bool is_dot8_supported = dot8_supported(CLKernelLibrary::get().get_device()); @@ -273,7 +273,7 @@ void CLGEMMLowpMatrixMultiplyKernel::run(const Window &window, cl::CommandQueue add_2D_tensor_argument(idx, _input0, slice); add_2D_tensor_argument(idx, _input1, slice_b); add_2D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_2D(slice)); } diff --git a/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp b/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp index 221a1566b9..aa954abde1 100644 --- a/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp @@ -159,7 +159,7 @@ void CLGEMMLowpOffsetContributionKernel::configure(ICLTensor *mm_result, const I vector_sum_row != nullptr ? vector_sum_row->info() : nullptr, a_offset, b_offset); // NOLINT ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); // Set config_id for enabling LWS tuning _config_id = "gemmlowp_offset_contribution_"; diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp index ff2fc646aa..875e26d6cb 100644 --- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -146,7 +146,7 @@ void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::configure(const // Configure kernel window auto win_config = validate_and_configure_window(input->info(), (bias != nullptr) ? bias->info() : nullptr, output->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); } void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::run(const Window &window, cl::CommandQueue &queue) @@ -174,4 +174,4 @@ void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::run(const Window enqueue(queue, *this, slice); } while(collapsed.slide_window_slice_3D(slice)); -} \ No newline at end of file +} diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp index 151a6588d5..57891131c7 100644 --- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -145,7 +145,7 @@ void CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel::configure(const ICLTensor *i // Configure kernel window auto win_config = validate_and_configure_window(input->info(), (bias != nullptr) ? bias->info() : nullptr, output->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); } void CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp b/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp index 6951512167..cd26cd1597 100644 --- a/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -121,7 +121,7 @@ void CLGEMMLowpMatrixAReductionKernel::configure(const ICLTensor *mtx_a, ICLTens // Configure kernel window auto win_config = validate_and_configure_window_matrix_a_reduction(_input->info(), _output->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); } Status CLGEMMLowpMatrixAReductionKernel::validate(const ITensorInfo *mtx_a, const ITensorInfo *vector_sum_row) @@ -175,7 +175,7 @@ void CLGEMMLowpMatrixBReductionKernel::configure(const ICLTensor *mtx_b, ICLTens // Configure kernel window auto win_config = validate_and_configure_window_matrix_b_reduction(_input->info(), _output->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); } Status CLGEMMLowpMatrixBReductionKernel::validate(const ITensorInfo *mtx_b, const ITensorInfo *vector_sum_col) diff --git a/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp index ebe4013bf0..2f1f1bf865 100644 --- a/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp @@ -88,7 +88,7 @@ void CLGEMMMatrixAccumulateBiasesKernel::configure(ICLTensor *accum, const ICLTe // Configure kernel window auto win_config = validate_and_configure_window(accum->info(), biases->info(), gpu_target, vector_size); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); // Add build options CLBuildOptions build_opts; @@ -126,7 +126,7 @@ void CLGEMMMatrixAccumulateBiasesKernel::run(const Window &window, cl::CommandQu add_2D_tensor_argument(idx, _accum, accum_slice); add_1D_tensor_argument(idx, _biases, biases_slice); - enqueue(queue, *this, accum_slice, _lws_hint); + enqueue(queue, *this, accum_slice, lws_hint()); } while(window.slide_window_slice_2D(accum_slice)); } diff --git a/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp index bcc3a01296..0c65bb40c0 100644 --- a/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp @@ -98,7 +98,7 @@ void CLGEMMMatrixAdditionKernel::configure(const ICLTensor *input, ICLTensor *ou // Configure kernel window auto win_config = validate_and_configure_window(input->info(), output->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); } Status CLGEMMMatrixAdditionKernel::validate(const ITensorInfo *input, const ITensorInfo *output, float beta) diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp index 79e2f8b11a..8530ed2fd3 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp @@ -253,7 +253,7 @@ void CLGEMMMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTen // Configure kernel window auto win_config = validate_and_configure_window(input0->info(), input1->info(), output->info(), is_interleaved_transposed, reshape_info, gpu_target, num_elements_processed); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); // Create build options CLBuildOptions build_opts; @@ -316,7 +316,7 @@ void CLGEMMMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTen // The work-group size equal to the Bifrost quad size has been proved to be optimal for these kernels // via exhaustive autotuning over a range of representative layer configurations. - _lws_hint = cl::NDRange(4); + set_lws_hint(cl::NDRange(4)); } else // (MIDGARD and F32) or (F16) { @@ -416,7 +416,7 @@ void CLGEMMMatrixMultiplyKernel::run(const Window &window, cl::CommandQueue &que _kernel.setArg(idx++, static_cast(_input0->info()->strides_in_bytes()[2])); _kernel.setArg(idx++, static_cast(_input1->info()->strides_in_bytes()[2])); _kernel.setArg(idx++, static_cast(_output->info()->strides_in_bytes()[2])); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_3D(slice)); } diff --git a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp index 43a6cf25db..11a4292270 100644 --- a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp @@ -121,7 +121,7 @@ void CLGEMMMatrixVectorMultiplyKernel::configure(const ICLTensor *input0, const auto win_config = validate_and_configure_window(input0->info(), input1->info(), output->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); } Status CLGEMMMatrixVectorMultiplyKernel::validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output) @@ -165,7 +165,7 @@ void CLGEMMMatrixVectorMultiplyKernel::run(const Window &window, cl::CommandQueu unsigned int idx_2 = num_arguments_per_3D_tensor() + num_arguments_per_2D_tensor(); add_3D_tensor_argument(idx_0, _input0, slice_in); add_1D_tensor_argument(idx_2, _output, slice_out); - enqueue(queue, *this, slice_in, _lws_hint); + enqueue(queue, *this, slice_in, lws_hint()); } while(window.slide_window_slice_3D(slice_in) && window.slide_window_slice_3D(slice_out)); } diff --git a/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp b/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp index 7e44fa7118..5b299052d4 100644 --- a/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp +++ b/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp @@ -107,7 +107,7 @@ void CLGEMMTranspose1xWKernel::configure(const ICLTensor *input, ICLTensor *outp unsigned int num_elems_processed_per_iteration = 1; auto win_config = validate_and_configure_window(input->info(), output->info(), num_elems_processed_per_iteration, mult_transpose1xW_width); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); // Create build options CLBuildOptions build_opts; @@ -157,7 +157,7 @@ void CLGEMMTranspose1xWKernel::run(const Window &window, cl::CommandQueue &queue unsigned int idx = 0; add_3D_tensor_argument(idx, _input, in_slice); add_3D_tensor_argument(idx, _output, out_slice); - enqueue(queue, *this, in_slice, _lws_hint); + enqueue(queue, *this, in_slice, lws_hint()); } while(window.slide_window_slice_3D(in_slice) && out_window.slide_window_slice_3D(out_slice)); } diff --git a/src/core/CL/kernels/CLGaussian3x3Kernel.cpp b/src/core/CL/kernels/CLGaussian3x3Kernel.cpp index e5bc3f9656..7e8f3139f2 100644 --- a/src/core/CL/kernels/CLGaussian3x3Kernel.cpp +++ b/src/core/CL/kernels/CLGaussian3x3Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -72,5 +72,5 @@ void CLGaussian3x3Kernel::configure(const ICLTensor *input, ICLTensor *output, b output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } diff --git a/src/core/CL/kernels/CLGaussianPyramidKernel.cpp b/src/core/CL/kernels/CLGaussianPyramidKernel.cpp index a4fda364e3..6b729c8585 100644 --- a/src/core/CL/kernels/CLGaussianPyramidKernel.cpp +++ b/src/core/CL/kernels/CLGaussianPyramidKernel.cpp @@ -95,7 +95,7 @@ void CLGaussianPyramidHorKernel::configure(const ICLTensor *input, ICLTensor *ou output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLGaussianPyramidHorKernel::run(const Window &window, cl::CommandQueue &queue) @@ -177,7 +177,7 @@ void CLGaussianPyramidVertKernel::configure(const ICLTensor *input, ICLTensor *o output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLGaussianPyramidVertKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLHOGDescriptorKernel.cpp b/src/core/CL/kernels/CLHOGDescriptorKernel.cpp index a15aab1f37..26c3b81175 100644 --- a/src/core/CL/kernels/CLHOGDescriptorKernel.cpp +++ b/src/core/CL/kernels/CLHOGDescriptorKernel.cpp @@ -91,7 +91,7 @@ void CLHOGOrientationBinningKernel::configure(const ICLTensor *input_magnitude, output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLHOGOrientationBinningKernel::run(const Window &window, cl::CommandQueue &queue) @@ -174,7 +174,7 @@ void CLHOGBlockNormalizationKernel::configure(const ICLTensor *input, ICLTensor output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLHOGBlockNormalizationKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLHOGDetectorKernel.cpp b/src/core/CL/kernels/CLHOGDetectorKernel.cpp index caca49846f..12bbbaf9f2 100644 --- a/src/core/CL/kernels/CLHOGDetectorKernel.cpp +++ b/src/core/CL/kernels/CLHOGDetectorKernel.cpp @@ -110,7 +110,7 @@ void CLHOGDetectorKernel::configure(const ICLTensor *input, const ICLHOG *hog, I update_window_and_padding(win, AccessWindowRectangle(input->info(), 0, 0, num_elems_read_per_iteration, num_rows_read_per_iteration)); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLHOGDetectorKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLHarrisCornersKernel.cpp b/src/core/CL/kernels/CLHarrisCornersKernel.cpp index 1f757fe34c..5320b6bebc 100644 --- a/src/core/CL/kernels/CLHarrisCornersKernel.cpp +++ b/src/core/CL/kernels/CLHarrisCornersKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -106,7 +106,7 @@ void CLHarrisScoreKernel::configure(const ICLImage *input1, const ICLImage *inpu ValidRegion valid_region = intersect_valid_regions(input1->info()->valid_region(), input2->info()->valid_region()); output_access.set_valid_region(win, valid_region, border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLHarrisScoreKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLHistogramKernel.cpp b/src/core/CL/kernels/CLHistogramKernel.cpp index 7b715abb36..b56ad8d38d 100644 --- a/src/core/CL/kernels/CLHistogramKernel.cpp +++ b/src/core/CL/kernels/CLHistogramKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -107,7 +107,7 @@ void CLHistogramKernel::configure(const ICLImage *input, ICLDistribution1D *outp update_window_and_padding(win, AccessWindowHorizontal(input->info(), 0, pixels_per_item)); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLHistogramKernel::run(const Window &window, cl::CommandQueue &queue) @@ -198,7 +198,7 @@ void CLHistogramBorderKernel::configure(const ICLImage *input, ICLDistribution1D win.set(0, Window::Dimension(start_position, _input->info()->dimension(0))); win.set(1, Window::Dimension(0, _input->info()->dimension(1))); update_window_and_padding(win, AccessWindowHorizontal(input->info(), 0, 1)); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLHistogramBorderKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp index 39654e2190..42bb96c16f 100644 --- a/src/core/CL/kernels/CLIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLIm2ColKernel.cpp @@ -308,7 +308,7 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const auto win_config = validate_and_configure_window(input->info(), output->info(), kernel_dims, conv_info, has_bias, dilation, im2col_config.num_elems_processed_per_iteration, im2col_config.is_padding_required_nchw); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); // Set config_id for enabling LWS tuning _config_id = im2col_config.kernel_name; @@ -386,7 +386,7 @@ void CLIm2ColKernel::run(const Window &window, cl::CommandQueue &queue) add_2D_tensor_argument(idx, _output, slice_out); _kernel.setArg(idx++, static_cast(_input->info()->strides_in_bytes()[3])); _kernel.setArg(idx++, static_cast(_output->info()->strides_in_bytes()[2])); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window_collapsed.slide_window_slice_3D(slice) && window_output.slide_window_slice_2D(slice_out) && window_collapsed.slide_window_slice_3D(slice_in)); -} \ No newline at end of file +} diff --git a/src/core/CL/kernels/CLIntegralImageKernel.cpp b/src/core/CL/kernels/CLIntegralImageKernel.cpp index 69ede457df..6fb39ff0a2 100644 --- a/src/core/CL/kernels/CLIntegralImageKernel.cpp +++ b/src/core/CL/kernels/CLIntegralImageKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -60,7 +60,7 @@ void CLIntegralImageHorKernel::configure(const ICLTensor *input, ICLTensor *outp output_access.set_valid_region(win, input->info()->valid_region()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } CLIntegralImageVertKernel::CLIntegralImageVertKernel() @@ -89,7 +89,7 @@ void CLIntegralImageVertKernel::configure(ICLTensor *in_out) in_out_access.set_valid_region(win, in_out->info()->valid_region()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLIntegralImageVertKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp index 39d9f958d3..54ed51eda2 100644 --- a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp +++ b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp @@ -120,7 +120,7 @@ void CLL2NormalizeLayerKernel::configure(const ICLTensor *input, const ICLTensor auto win_config = validate_and_configure_window(_input->info(), _output->info()); ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); - ICLKernel::configure(std::get<1>(win_config)); + ICLKernel::configure_internal(std::get<1>(win_config)); } Status CLL2NormalizeLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, unsigned int axis, float epsilon) diff --git a/src/core/CL/kernels/CLLKTrackerKernel.cpp b/src/core/CL/kernels/CLLKTrackerKernel.cpp index 078d18e61c..40ed630c89 100644 --- a/src/core/CL/kernels/CLLKTrackerKernel.cpp +++ b/src/core/CL/kernels/CLLKTrackerKernel.cpp @@ -75,7 +75,7 @@ void CLLKTrackerInitKernel::configure(const ICLKeyPointArray *old_points, const Window window; window.set(Window::DimX, Window::Dimension(0, old_points->num_values(), 1)); window.set(Window::DimY, Window::Dimension(0, 1, 1)); - ICLKernel::configure(window); + ICLKernel::configure_internal(window); } void CLLKTrackerInitKernel::run(const Window &window, cl::CommandQueue &queue) @@ -104,7 +104,7 @@ void CLLKTrackerFinalizeKernel::configure(ICLLKInternalKeypointArray *new_points Window window; window.set(Window::DimX, Window::Dimension(0, new_points_internal->num_values(), 1)); window.set(Window::DimY, Window::Dimension(0, 1, 1)); - ICLKernel::configure(window); + ICLKernel::configure_internal(window); } void CLLKTrackerFinalizeKernel::run(const Window &window, cl::CommandQueue &queue) @@ -156,7 +156,7 @@ void CLLKTrackerStage0Kernel::configure(const ICLTensor *old_input, const ICLTen AccessWindowStatic(old_scharr_gy->info(), valid_region.start(0), valid_region.start(1), valid_region.end(0), valid_region.end(1))); - ICLKernel::configure(window); + ICLKernel::configure_internal(window); // Initialize required variables const int level0 = (level == 0) ? 1 : 0; @@ -232,7 +232,7 @@ void CLLKTrackerStage1Kernel::configure(const ICLTensor *new_input, ICLLKInterna AccessWindowStatic(new_input->info(), valid_region.start(0), valid_region.start(1), valid_region.end(0), valid_region.end(1))); - ICLKernel::configure(window); + ICLKernel::configure_internal(window); // Initialize required variables const int level0 = (level == 0) ? 1 : 0; diff --git a/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp index 1a7d95cc2c..ad2f3a4892 100644 --- a/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp @@ -90,13 +90,14 @@ void CLLocallyConnectedMatrixMultiplyKernel::configure(const ICLTensor *input0, _input1 = input1; _output = output; + cl::NDRange lws_hint; if(output->info()->dimension(1) == 196) { - _lws_hint = cl::NDRange(1, 7); + lws_hint = cl::NDRange(1, 7); } else { - _lws_hint = cl::NDRange(8, 8); + lws_hint = cl::NDRange(8, 8); } std::ostringstream mm_arguments; @@ -114,7 +115,7 @@ void CLLocallyConnectedMatrixMultiplyKernel::configure(const ICLTensor *input0, ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); - ICLKernel::configure(std::get<1>(win_config)); + ICLKernel::configure_internal(std::get<1>(win_config), lws_hint); } Status CLLocallyConnectedMatrixMultiplyKernel::validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output) @@ -142,7 +143,7 @@ void CLLocallyConnectedMatrixMultiplyKernel::run(const Window &window, cl::Comma add_2D_tensor_argument(idx, _input0, slice); add_3D_tensor_argument(idx, _input1, slice_matrix_b); add_2D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_2D(slice)); } diff --git a/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp b/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp index c504189169..0b34c59a03 100644 --- a/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp +++ b/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -137,7 +137,7 @@ void CLMagnitudePhaseKernel::configure(const ICLTensor *gx, const ICLTensor *gy, output_magnitude_access.set_valid_region(win, valid_region); output_phase_access.set_valid_region(win, valid_region); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLMagnitudePhaseKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLMeanStdDevKernel.cpp b/src/core/CL/kernels/CLMeanStdDevKernel.cpp index bd31131fe5..0cde9c5fe6 100644 --- a/src/core/CL/kernels/CLMeanStdDevKernel.cpp +++ b/src/core/CL/kernels/CLMeanStdDevKernel.cpp @@ -106,7 +106,7 @@ void CLMeanStdDevKernel::configure(const ICLImage *input, float *mean, cl::Buffe AccessWindowRectangle input_access(input->info(), 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y); update_window_and_padding(win, input_access); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLMeanStdDevKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLMedian3x3Kernel.cpp b/src/core/CL/kernels/CLMedian3x3Kernel.cpp index 3b9fb1fe88..b93179d5f4 100644 --- a/src/core/CL/kernels/CLMedian3x3Kernel.cpp +++ b/src/core/CL/kernels/CLMedian3x3Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -62,5 +62,5 @@ void CLMedian3x3Kernel::configure(const ICLTensor *input, ICLTensor *output, boo output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } diff --git a/src/core/CL/kernels/CLMinMaxLayerKernel.cpp b/src/core/CL/kernels/CLMinMaxLayerKernel.cpp index 9493ddc878..fa7b678e86 100644 --- a/src/core/CL/kernels/CLMinMaxLayerKernel.cpp +++ b/src/core/CL/kernels/CLMinMaxLayerKernel.cpp @@ -105,7 +105,7 @@ void CLMinMaxLayerKernel::configure(const ICLTensor *input, ICLTensor *output) ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); - ICLKernel::configure(std::get<1>(win_config)); + ICLKernel::configure_internal(std::get<1>(win_config)); } Status CLMinMaxLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output) diff --git a/src/core/CL/kernels/CLMinMaxLocationKernel.cpp b/src/core/CL/kernels/CLMinMaxLocationKernel.cpp index 5636592347..0c7f3bc070 100644 --- a/src/core/CL/kernels/CLMinMaxLocationKernel.cpp +++ b/src/core/CL/kernels/CLMinMaxLocationKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -118,7 +118,7 @@ void CLMinMaxKernel::configure(const ICLImage *input, cl::Buffer *min_max) // Configure kernel window Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); update_window_and_padding(win, AccessWindowHorizontal(input->info(), 0, ceil_to_multiple(num_elems_processed_per_iteration, 16))); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLMinMaxKernel::run(const Window &window, cl::CommandQueue &queue) @@ -209,7 +209,7 @@ void CLMinMaxLocationKernel::configure(const ICLImage *input, cl::Buffer *min_ma constexpr unsigned int num_elems_processed_per_iteration = 1; Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); update_window_and_padding(win, AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration)); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLMinMaxLocationKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLNonLinearFilterKernel.cpp b/src/core/CL/kernels/CLNonLinearFilterKernel.cpp index 6afa5822ba..5e419743d0 100644 --- a/src/core/CL/kernels/CLNonLinearFilterKernel.cpp +++ b/src/core/CL/kernels/CLNonLinearFilterKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -94,5 +94,5 @@ void CLNonLinearFilterKernel::configure(const ICLTensor *input, ICLTensor *outpu output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } diff --git a/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp b/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp index 6a96b0effd..4e41f0df42 100644 --- a/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp +++ b/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -68,5 +68,5 @@ void CLNonMaximaSuppression3x3Kernel::configure(const ICLTensor *input, ICLTenso output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } diff --git a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp index edc9e9d58c..8a7b7aed22 100644 --- a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp @@ -135,7 +135,7 @@ void CLNormalizationLayerKernel::configure(const ICLTensor *input, ICLTensor *ou // Configure kernel window auto win_config = validate_and_configure_window(input->info(), output->info(), norm_info); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); // Set config_id for enabling LWS tuning _config_id = "normalization_layer_"; diff --git a/src/core/CL/kernels/CLPermuteKernel.cpp b/src/core/CL/kernels/CLPermuteKernel.cpp index 7c0c95be1c..c6f0f4bc55 100644 --- a/src/core/CL/kernels/CLPermuteKernel.cpp +++ b/src/core/CL/kernels/CLPermuteKernel.cpp @@ -120,7 +120,7 @@ void CLPermuteKernel::configure(const ICLTensor *input, ICLTensor *output, const coord.set_num_dimensions(output->info()->num_dimensions()); output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } Status CLPermuteKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm) diff --git a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp index 4ea093fe04..4ca2ef8aa3 100644 --- a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp +++ b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp @@ -197,7 +197,7 @@ void CLPixelWiseMultiplicationKernel::configure(const ICLTensor *input1, const I _kernel.setArg(idx++, scale); } - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); } Status CLPixelWiseMultiplicationKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, diff --git a/src/core/CL/kernels/CLPoolingLayerKernel.cpp b/src/core/CL/kernels/CLPoolingLayerKernel.cpp index d5ea092c78..df13068239 100644 --- a/src/core/CL/kernels/CLPoolingLayerKernel.cpp +++ b/src/core/CL/kernels/CLPoolingLayerKernel.cpp @@ -269,7 +269,7 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output, auto win_config = validate_and_configure_window(input->info(), output->info(), pool_info); ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); - ICLKernel::configure(std::get<1>(win_config)); + ICLKernel::configure_internal(std::get<1>(win_config)); if(data_layout == DataLayout::NCHW) { @@ -336,7 +336,7 @@ void CLPoolingLayerKernel::run(const Window &window, cl::CommandQueue &queue) unsigned int idx = 0; add_3D_tensor_argument(idx, _input, in_slice); add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window_collapsed.slide_window_slice_3D(slice)); break; @@ -355,7 +355,7 @@ void CLPoolingLayerKernel::run(const Window &window, cl::CommandQueue &queue) unsigned int idx = 0; add_3D_tensor_argument(idx, _input, in_slice); add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_3D(slice) && window.slide_window_slice_3D(in_slice)); break; diff --git a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp index af751f4832..9028b0f604 100644 --- a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp @@ -96,7 +96,7 @@ void CLQuantizationLayerKernel::configure(const ICLTensor *input, ICLTensor *out ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); - ICLKernel::configure(std::get<1>(win_config)); + ICLKernel::configure_internal(std::get<1>(win_config)); } Status CLQuantizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *min_max) diff --git a/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp b/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp index 4048e927f5..23676942a6 100644 --- a/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp +++ b/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp @@ -101,7 +101,7 @@ void CLROIPoolingLayerKernel::configure(const ICLTensor *input, const ICLROIArra update_window_and_padding(window, input_access, output_access); output_access.set_valid_region(window, ValidRegion(Coordinates(), output->info()->tensor_shape())); - ICLKernel::configure(window); + ICLKernel::configure_internal(window); } void CLROIPoolingLayerKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLReductionOperationKernel.cpp b/src/core/CL/kernels/CLReductionOperationKernel.cpp index 09861630ac..bf36ae2c0f 100644 --- a/src/core/CL/kernels/CLReductionOperationKernel.cpp +++ b/src/core/CL/kernels/CLReductionOperationKernel.cpp @@ -114,8 +114,8 @@ void CLReductionOperationKernel::configure(const ICLTensor *input, ICLTensor *ou // Set the number of WG based on the input size. If input width is < 128 // we can use fewer threads than 8. - _lws_hint = cl::NDRange(std::min(8U, num_of_threads)); - _border_size = BorderSize(0, border_width, 0, 0); + cl::NDRange lws_hint = cl::NDRange(std::min(8U, num_of_threads)); + _border_size = BorderSize(0, border_width, 0, 0); // Set build options std::set build_opts; @@ -142,7 +142,7 @@ void CLReductionOperationKernel::configure(const ICLTensor *input, ICLTensor *ou ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); - ICLKernel::configure(std::get<1>(win_config)); + ICLKernel::configure_internal(std::get<1>(win_config), lws_hint); } Status CLReductionOperationKernel::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op) @@ -171,7 +171,7 @@ void CLReductionOperationKernel::run(const Window &window, cl::CommandQueue &que in_slice.set(Window::DimX, Window::Dimension(in_slice.x().start(), in_slice.x().end() + border_width, in_slice.x().step())); // Set local sums buffer - unsigned int local_sum_size = _lws_hint[0] * _input->info()->element_size(); + unsigned int local_sum_size = lws_hint()[0] * _input->info()->element_size(); _kernel.setArg(num_arguments_per_2D_tensor() * 2, local_sum_size, nullptr); do @@ -179,7 +179,7 @@ void CLReductionOperationKernel::run(const Window &window, cl::CommandQueue &que unsigned int idx = 0; add_2D_tensor_argument(idx, _input, in_slice); add_2D_tensor_argument(idx, _output, out_slice); - enqueue(queue, *this, in_slice, _lws_hint); + enqueue(queue, *this, in_slice, lws_hint()); } while(window.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(out_slice)); } diff --git a/src/core/CL/kernels/CLRemapKernel.cpp b/src/core/CL/kernels/CLRemapKernel.cpp index b46bb30c59..33c5f2d402 100644 --- a/src/core/CL/kernels/CLRemapKernel.cpp +++ b/src/core/CL/kernels/CLRemapKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -83,7 +83,7 @@ void CLRemapKernel::configure(const ICLTensor *input, const ICLTensor *map_x, co output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); // Set static arguments unsigned int idx = 4 * num_arguments_per_2D_tensor(); //Skip the input and output parameters diff --git a/src/core/CL/kernels/CLReshapeLayerKernel.cpp b/src/core/CL/kernels/CLReshapeLayerKernel.cpp index ce9d7fff67..c7efa9a82d 100644 --- a/src/core/CL/kernels/CLReshapeLayerKernel.cpp +++ b/src/core/CL/kernels/CLReshapeLayerKernel.cpp @@ -92,7 +92,7 @@ void CLReshapeLayerKernel::configure(const ICLTensor *input, ICLTensor *output) output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLReshapeLayerKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLScaleKernel.cpp b/src/core/CL/kernels/CLScaleKernel.cpp index b1655d5cc1..b2cd4b7adf 100644 --- a/src/core/CL/kernels/CLScaleKernel.cpp +++ b/src/core/CL/kernels/CLScaleKernel.cpp @@ -181,7 +181,7 @@ void CLScaleKernel::configure(const ICLTensor *input, ICLTensor *output, Interpo // Configure kernel window auto win_config = validate_and_configure_window(input->info(), output->info(), policy, border_mode, sampling_policy, border); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); // Create kernel CLBuildOptions build_opts; @@ -223,7 +223,7 @@ void CLScaleKernel::run(const Window &window, cl::CommandQueue &queue) unsigned int idx = 0; add_2D_tensor_argument(idx, _input, slice); add_2D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_2D(slice)); break; @@ -237,7 +237,7 @@ void CLScaleKernel::run(const Window &window, cl::CommandQueue &queue) unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice); add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_3D(slice)); break; diff --git a/src/core/CL/kernels/CLScharr3x3Kernel.cpp b/src/core/CL/kernels/CLScharr3x3Kernel.cpp index 913ef592d4..5182390822 100644 --- a/src/core/CL/kernels/CLScharr3x3Kernel.cpp +++ b/src/core/CL/kernels/CLScharr3x3Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -102,7 +102,7 @@ void CLScharr3x3Kernel::configure(const ICLTensor *input, ICLTensor *output_x, I output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLScharr3x3Kernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLSobel3x3Kernel.cpp b/src/core/CL/kernels/CLSobel3x3Kernel.cpp index 436aaa498a..b4bfe28216 100644 --- a/src/core/CL/kernels/CLSobel3x3Kernel.cpp +++ b/src/core/CL/kernels/CLSobel3x3Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -102,7 +102,7 @@ void CLSobel3x3Kernel::configure(const ICLTensor *input, ICLTensor *output_x, IC output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLSobel3x3Kernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLSobel5x5Kernel.cpp b/src/core/CL/kernels/CLSobel5x5Kernel.cpp index 4c0316f19e..46aa074d61 100644 --- a/src/core/CL/kernels/CLSobel5x5Kernel.cpp +++ b/src/core/CL/kernels/CLSobel5x5Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -102,7 +102,7 @@ void CLSobel5x5HorKernel::configure(const ICLTensor *input, ICLTensor *output_x, output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLSobel5x5HorKernel::run(const Window &window, cl::CommandQueue &queue) @@ -201,7 +201,7 @@ void CLSobel5x5VertKernel::configure(const ICLTensor *input_x, const ICLTensor * output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLSobel5x5VertKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLSobel7x7Kernel.cpp b/src/core/CL/kernels/CLSobel7x7Kernel.cpp index a477953cfb..0c94e88acf 100644 --- a/src/core/CL/kernels/CLSobel7x7Kernel.cpp +++ b/src/core/CL/kernels/CLSobel7x7Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -105,7 +105,7 @@ void CLSobel7x7HorKernel::configure(const ICLTensor *input, ICLTensor *output_x, output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLSobel7x7HorKernel::run(const Window &window, cl::CommandQueue &queue) @@ -204,7 +204,7 @@ void CLSobel7x7VertKernel::configure(const ICLTensor *input_x, const ICLTensor * output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLSobel7x7VertKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp index b9ebdc9583..403256baae 100644 --- a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp +++ b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp @@ -242,7 +242,7 @@ void CLLogits1DMaxShiftExpSumKernel::configure(const ICLTensor *input, ICLTensor build_opts.add_option_if(is_data_type_float(dt) && (beta != 1.0f), "-DBETA=" + float_to_string_with_full_precision(beta)); build_opts.add_options_if(is_data_type_quantized_asymmetric(dt), prepare_quantized_softmax_build_options(input->info()->quantization_info().scale, beta).options()); - _lws_hint = cl::NullRange; + cl::NDRange lws_hint(cl::NullRange); std::string kernel_name = is_data_type_quantized_asymmetric(dt) ? std::string("softmax_layer_max_shift_exp_sum_quantized_serial") : std::string("softmax_layer_max_shift_exp_sum_serial"); ParallelReductionInfo parallel_reduction_info = is_parallel_reduction(reduction_dim_size); @@ -264,7 +264,7 @@ void CLLogits1DMaxShiftExpSumKernel::configure(const ICLTensor *input, ICLTensor build_opts.add_option_if((multiple_grid_size != 0) || ((reduction_dim_size % vector_size) != 0), "-DNON_MULTIPLE_OF_GRID_SIZE"); // Setting _lws_hint in this way can also communicate grid_size to CLLogits1DMaxShiftExpSumKernel::run(). // A single workgroup performs reduction in dimension 0 in the parallel case, hence lws[0]==gws[0]. - _lws_hint = cl::NDRange(_grid_size); + lws_hint = cl::NDRange(_grid_size); } // Create kernel. @@ -277,7 +277,7 @@ void CLLogits1DMaxShiftExpSumKernel::configure(const ICLTensor *input, ICLTensor // Configure window auto win_config = validate_and_configure_window_1DMaxShiftExpSum(input->info(), max->info(), output->info(), sum->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second, lws_hint); } Status CLLogits1DMaxShiftExpSumKernel::validate(const ITensorInfo *input, const ITensorInfo *max, const ITensorInfo *output, const ITensorInfo *sum) @@ -322,7 +322,7 @@ void CLLogits1DMaxShiftExpSumKernel::run(const Window &window, cl::CommandQueue add_3D_tensor_argument(idx, _max, slice); add_3D_tensor_argument(idx, _output, slice); add_3D_tensor_argument(idx, _sum, slice); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window_collapsed.slide_window_slice_3D(slice)); } @@ -365,7 +365,7 @@ void CLLogits1DNormKernel::configure(const ICLTensor *input, const ICLTensor *su // Configure window auto win_config = validate_and_configure_window_1DNorm(input->info(), output->info(), sum->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); } Status CLLogits1DNormKernel::validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output) @@ -394,7 +394,7 @@ void CLLogits1DNormKernel::run(const Window &window, cl::CommandQueue &queue) add_3D_tensor_argument(idx, _input, slice); add_3D_tensor_argument(idx, _sum, sum_slice); add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window_collapsed.slide_window_slice_3D(slice)); } diff --git a/src/core/CL/kernels/CLTransposeKernel.cpp b/src/core/CL/kernels/CLTransposeKernel.cpp index 3d584345d7..695bdf7f40 100644 --- a/src/core/CL/kernels/CLTransposeKernel.cpp +++ b/src/core/CL/kernels/CLTransposeKernel.cpp @@ -117,9 +117,8 @@ void CLTransposeKernel::configure(const ICLTensor *input, ICLTensor *output) ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info())); - _input = input; - _output = output; - _lws_hint = cl::NDRange(2, 8); + _input = input; + _output = output; std::set build_opts; std::ostringstream data_type_in_bytes; @@ -131,5 +130,5 @@ void CLTransposeKernel::configure(const ICLTensor *input, ICLTensor *output) // Configure kernel window auto win_config = validate_and_configure_window(input->info(), output->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second, cl::NDRange(2, 8)); } diff --git a/src/core/CL/kernels/CLWarpAffineKernel.cpp b/src/core/CL/kernels/CLWarpAffineKernel.cpp index e0e09400af..1fae2b1974 100644 --- a/src/core/CL/kernels/CLWarpAffineKernel.cpp +++ b/src/core/CL/kernels/CLWarpAffineKernel.cpp @@ -98,5 +98,5 @@ void CLWarpAffineKernel::configure(const ICLTensor *input, ICLTensor *output, co output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } diff --git a/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp b/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp index d6fcb09658..e537aec058 100644 --- a/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp +++ b/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp @@ -95,5 +95,5 @@ void CLWarpPerspectiveKernel::configure(const ICLTensor *input, ICLTensor *outpu output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } diff --git a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp index 58ecd9ccb3..5ef0f5b152 100644 --- a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp +++ b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp @@ -115,7 +115,7 @@ void CLWeightsReshapeKernel::configure(const ICLTensor *input, const ICLTensor * Window win = calculate_max_window(*input->info(), Steps()); // The CLWeightsReshapeKernel doesn't need padding so update_window_and_padding() can be skipped output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } Status CLWeightsReshapeKernel::validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output, const unsigned int num_groups) diff --git a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp index e2ca05a72a..e5ab8d2304 100644 --- a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp @@ -111,7 +111,7 @@ void CLWidthConcatenateLayerKernel::configure(const ICLTensor *input, unsigned i auto win_config = validate_and_configure_window(input->info(), width_offset, output->info()); ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); - ICLKernel::configure(std::get<1>(win_config)); + ICLKernel::configure_internal(std::get<1>(win_config)); } void CLWidthConcatenateLayerKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp b/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp index e6c713e5e7..818638c89e 100644 --- a/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp +++ b/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp @@ -125,7 +125,7 @@ void CLWinogradFilterTransformKernel::configure(const ICLTensor *input, ICLTenso // Configure kernel window auto win_config = validate_and_configure_window(input->info(), output->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); } Status CLWinogradFilterTransformKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info) diff --git a/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp b/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp index fcfd9e30a1..2309fbfb26 100644 --- a/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp +++ b/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp @@ -180,8 +180,6 @@ void CLWinogradInputTransformKernel::configure(const ICLTensor *input, ICLTensor _step_z = (_input->info()->dimension(2) % 2) != 0 ? 1 : 2; } - _lws_hint = cl::NDRange(1, 1, 8); - // Append stepz and data layout kernel_name += "_stepz"; kernel_name += support::cpp11::to_string(_step_z); @@ -192,7 +190,7 @@ void CLWinogradInputTransformKernel::configure(const ICLTensor *input, ICLTensor // Create window and update padding auto win_config = validate_and_configure_window(input->info(), output->info(), winograd_info); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second, cl::NDRange(1, 1, 8)); _config_id = kernel_name; _config_id += support::cpp11::to_string(input->info()->dimension(0)); @@ -239,7 +237,7 @@ void CLWinogradInputTransformKernel::run(const Window &window, cl::CommandQueue add_3D_tensor_argument(idx, _input, slice); add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_3D(slice)); } diff --git a/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp b/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp index 40d5f6588f..fa42596604 100644 --- a/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp +++ b/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp @@ -172,7 +172,7 @@ void CLWinogradOutputTransformKernel::configure(const ICLTensor *input, const IC // Configure kernel window auto win_config = validate_and_configure_window(input->info(), (bias != nullptr ? bias->info() : nullptr), output->info(), winograd_info.output_tile_size); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); // Set config_id for enabling LWS tuning _config_id = kernel_name; @@ -231,7 +231,7 @@ void CLWinogradOutputTransformKernel::run(const Window &window, cl::CommandQueue unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice); add_3D_tensor_argument(idx, _output, slice_out); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_3D(slice) && window.slide_window_slice_3D(slice_out)); } -- cgit v1.2.1