aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/kernels
diff options
context:
space:
mode:
authorAnthony Barbier <anthony.barbier@arm.com>2018-08-08 13:20:04 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:54 +0000
commitb6eb35371d222c6b7f61210d97ebd7dd9e197458 (patch)
treeaf89729ad68d665916c37abb5fd49e512fa40614 /src/core/CL/kernels
parent1d1f32ce7ef6acea4afd4cf6a929436640b72ccd (diff)
downloadComputeLibrary-b6eb35371d222c6b7f61210d97ebd7dd9e197458.tar.gz
COMPMID-1478: Stop relying on static default OpenCL objects in cl2.hpp
This causes problems when ACL is used as a shared library on Android. Fixes some problems related to creation / destruction order between the Graph's CL backend and core / runtime Change-Id: I716d63fd42f4586df1ffbb6fa97e4db06d3a781b Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/143228 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Michele DiGiorgio <michele.digiorgio@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Diffstat (limited to 'src/core/CL/kernels')
-rw-r--r--src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLActivationLayerKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLArithmeticAdditionKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLArithmeticDivisionKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLBitwiseAndKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLBitwiseOrKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLBitwiseXorKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLBox3x3Kernel.cpp4
-rw-r--r--src/core/CL/kernels/CLCannyEdgeKernel.cpp8
-rw-r--r--src/core/CL/kernels/CLChannelCombineKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLChannelExtractKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLCol2ImKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLColorConvertKernel.cpp8
-rw-r--r--src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLConvolutionKernel.cpp8
-rw-r--r--src/core/CL/kernels/CLCopyKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLDequantizationLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLDerivativeKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLDilateKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLErodeKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLFastCornersKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLFillBorderKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLFlattenLayerKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLFloorKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp4
-rw-r--r--src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLGaussian3x3Kernel.cpp4
-rw-r--r--src/core/CL/kernels/CLGaussianPyramidKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLHOGDescriptorKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLHOGDetectorKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLHarrisCornersKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLHistogramKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLIm2ColKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLIntegralImageKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLLKTrackerKernel.cpp8
-rw-r--r--src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp9
-rw-r--r--src/core/CL/kernels/CLMagnitudePhaseKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLMeanStdDevKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLMedian3x3Kernel.cpp4
-rw-r--r--src/core/CL/kernels/CLMinMaxLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLMinMaxLocationKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLNonLinearFilterKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp4
-rw-r--r--src/core/CL/kernels/CLNormalizationLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLPermuteKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLPoolingLayerKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLQuantizationLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLROIPoolingLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLReductionOperationKernel.cpp10
-rw-r--r--src/core/CL/kernels/CLRemapKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLReshapeLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLScaleKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLScharr3x3Kernel.cpp4
-rw-r--r--src/core/CL/kernels/CLSobel3x3Kernel.cpp4
-rw-r--r--src/core/CL/kernels/CLSobel5x5Kernel.cpp6
-rw-r--r--src/core/CL/kernels/CLSobel7x7Kernel.cpp6
-rw-r--r--src/core/CL/kernels/CLSoftmaxLayerKernel.cpp12
-rw-r--r--src/core/CL/kernels/CLTransposeKernel.cpp7
-rw-r--r--src/core/CL/kernels/CLWarpAffineKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLWarpPerspectiveKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLWeightsReshapeKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLWinogradInputTransformKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp4
88 files changed, 178 insertions, 180 deletions
diff --git a/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp b/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp
index 685b8e234e..0c1206adfb 100644
--- a/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp
+++ b/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -81,7 +81,7 @@ void CLAbsoluteDifferenceKernel::configure(const ICLTensor *input1, const ICLTen
output_access.set_valid_region(win, valid_region);
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLAbsoluteDifferenceKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp
index d8bd2f7ee1..a15e99b8d4 100644
--- a/src/core/CL/kernels/CLActivationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp
@@ -179,7 +179,7 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), (_run_in_place) ? nullptr : output->info());
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure(win_config.second);
+ ICLKernel::configure_internal(win_config.second);
// Set config_id for enabling LWS tuning
_config_id = "activation_layer_";
@@ -215,7 +215,7 @@ void CLActivationLayerKernel::run(const Window &window, cl::CommandQueue &queue)
{
add_3D_tensor_argument(idx, _output, slice);
}
- enqueue(queue, *this, slice, _lws_hint);
+ enqueue(queue, *this, slice, lws_hint());
}
while(collapsed.slide_window_slice_3D(slice));
}
diff --git a/src/core/CL/kernels/CLArithmeticAdditionKernel.cpp b/src/core/CL/kernels/CLArithmeticAdditionKernel.cpp
index 6d6cb6f98c..2372d458cf 100644
--- a/src/core/CL/kernels/CLArithmeticAdditionKernel.cpp
+++ b/src/core/CL/kernels/CLArithmeticAdditionKernel.cpp
@@ -154,7 +154,7 @@ void CLArithmeticAdditionKernel::configure(const ICLTensor *input1, const ICLTen
// Create kernel
_kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts));
- ICLKernel::configure(win_config.second);
+ ICLKernel::configure_internal(win_config.second);
}
Status CLArithmeticAdditionKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy)
diff --git a/src/core/CL/kernels/CLArithmeticDivisionKernel.cpp b/src/core/CL/kernels/CLArithmeticDivisionKernel.cpp
index 9bd0da15a3..e995ba1a41 100644
--- a/src/core/CL/kernels/CLArithmeticDivisionKernel.cpp
+++ b/src/core/CL/kernels/CLArithmeticDivisionKernel.cpp
@@ -121,7 +121,7 @@ void CLArithmeticDivisionKernel::configure(const ICLTensor *input1, const ICLTen
// Create kernel
_kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("arithmetic_div", build_opts));
- ICLKernel::configure(win_config.second);
+ ICLKernel::configure_internal(win_config.second);
}
Status CLArithmeticDivisionKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
diff --git a/src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp b/src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp
index aeee6022a7..299ac553e9 100644
--- a/src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp
+++ b/src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp
@@ -127,7 +127,7 @@ void CLArithmeticSubtractionKernel::configure(const ICLTensor *input1, const ICL
// Configure kernel window
auto win_config = validate_and_configure_window(input1->info(), input2->info(), output->info());
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure(win_config.second);
+ ICLKernel::configure_internal(win_config.second);
}
Status CLArithmeticSubtractionKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy)
diff --git a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp
index 4c93fb28bf..d4a72076c1 100644
--- a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp
@@ -189,7 +189,7 @@ void CLBatchNormalizationLayerKernel::configure(ICLTensor *input, ICLTensor *out
(beta != nullptr) ? beta->info() : nullptr,
(gamma != nullptr) ? gamma->info() : nullptr);
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure(win_config.second);
+ ICLKernel::configure_internal(win_config.second);
_config_id = "batch_normalization_layer_";
_config_id += string_from_data_layout(input->info()->data_layout());
@@ -252,7 +252,7 @@ void CLBatchNormalizationLayerKernel::run(const Window &window, cl::CommandQueue
{
add_3D_tensor_argument(idx, _output, slice);
}
- enqueue(queue, *this, slice, _lws_hint);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window.slide_window_slice_3D(slice));
}
diff --git a/src/core/CL/kernels/CLBitwiseAndKernel.cpp b/src/core/CL/kernels/CLBitwiseAndKernel.cpp
index 5ea4a86da5..dd301cd02e 100644
--- a/src/core/CL/kernels/CLBitwiseAndKernel.cpp
+++ b/src/core/CL/kernels/CLBitwiseAndKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -66,7 +66,7 @@ void CLBitwiseAndKernel::configure(const ICLTensor *input1, const ICLTensor *inp
output_access.set_valid_region(win, valid_region);
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLBitwiseAndKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLBitwiseOrKernel.cpp b/src/core/CL/kernels/CLBitwiseOrKernel.cpp
index 2eeef0a993..aa84618258 100644
--- a/src/core/CL/kernels/CLBitwiseOrKernel.cpp
+++ b/src/core/CL/kernels/CLBitwiseOrKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -67,7 +67,7 @@ void CLBitwiseOrKernel::configure(const ICLTensor *input1, const ICLTensor *inpu
output_access.set_valid_region(win, valid_region);
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLBitwiseOrKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLBitwiseXorKernel.cpp b/src/core/CL/kernels/CLBitwiseXorKernel.cpp
index c19a78e1c4..ad1f923253 100644
--- a/src/core/CL/kernels/CLBitwiseXorKernel.cpp
+++ b/src/core/CL/kernels/CLBitwiseXorKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -67,7 +67,7 @@ void CLBitwiseXorKernel::configure(const ICLTensor *input1, const ICLTensor *inp
output_access.set_valid_region(win, valid_region);
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLBitwiseXorKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLBox3x3Kernel.cpp b/src/core/CL/kernels/CLBox3x3Kernel.cpp
index 0299f6233c..b81697f778 100644
--- a/src/core/CL/kernels/CLBox3x3Kernel.cpp
+++ b/src/core/CL/kernels/CLBox3x3Kernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -73,5 +73,5 @@ void CLBox3x3Kernel::configure(const ICLTensor *input, ICLTensor *output, bool b
output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
diff --git a/src/core/CL/kernels/CLCannyEdgeKernel.cpp b/src/core/CL/kernels/CLCannyEdgeKernel.cpp
index 5d06d34631..94e5e230f9 100644
--- a/src/core/CL/kernels/CLCannyEdgeKernel.cpp
+++ b/src/core/CL/kernels/CLCannyEdgeKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -77,7 +77,7 @@ void CLGradientKernel::configure(const ICLTensor *gx, const ICLTensor *gy, ICLTe
mag_access.set_valid_region(win, _gx->info()->valid_region());
phase_access.set_valid_region(win, _gx->info()->valid_region());
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLGradientKernel::run(const Window &window, cl::CommandQueue &queue)
@@ -145,7 +145,7 @@ void CLEdgeNonMaxSuppressionKernel::configure(const ICLTensor *magnitude, const
output_access.set_valid_region(win, _magnitude->info()->valid_region(), border_undefined, border_size());
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLEdgeNonMaxSuppressionKernel::run(const Window &window, cl::CommandQueue &queue)
@@ -230,7 +230,7 @@ void CLEdgeTraceKernel::configure(const ICLTensor *input, ICLTensor *output, int
l1_stack_access.set_valid_region(win, _input->info()->valid_region());
l1_stack_counter_access.set_valid_region(win, _input->info()->valid_region());
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLEdgeTraceKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLChannelCombineKernel.cpp b/src/core/CL/kernels/CLChannelCombineKernel.cpp
index 6e55e666ee..c7b1da41dc 100644
--- a/src/core/CL/kernels/CLChannelCombineKernel.cpp
+++ b/src/core/CL/kernels/CLChannelCombineKernel.cpp
@@ -128,7 +128,7 @@ void CLChannelCombineKernel::configure(const ICLTensor *plane0, const ICLTensor
}
output_access.set_valid_region(win, ValidRegion(valid_region.anchor, output->info()->tensor_shape()));
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLChannelCombineKernel::configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output)
@@ -232,7 +232,7 @@ void CLChannelCombineKernel::configure(const ICLImage *plane0, const ICLImage *p
output_plane1_access.set_valid_region(win, ValidRegion(output_plane1_region.anchor, output->plane(1)->info()->tensor_shape()));
output_plane2_access.set_valid_region(win, ValidRegion(plane2->info()->valid_region().anchor, output->plane(2)->info()->tensor_shape()));
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLChannelCombineKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLChannelExtractKernel.cpp b/src/core/CL/kernels/CLChannelExtractKernel.cpp
index 65843b8d5d..8bddba837a 100644
--- a/src/core/CL/kernels/CLChannelExtractKernel.cpp
+++ b/src/core/CL/kernels/CLChannelExtractKernel.cpp
@@ -101,7 +101,7 @@ void CLChannelExtractKernel::configure(const ICLTensor *input, Channel channel,
ValidRegion input_valid_region = input->info()->valid_region();
output_access.set_valid_region(win, ValidRegion(input_valid_region.anchor, output->info()->tensor_shape()));
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLChannelExtractKernel::configure(const ICLMultiImage *input, Channel channel, ICLImage *output)
@@ -162,7 +162,7 @@ void CLChannelExtractKernel::configure(const ICLMultiImage *input, Channel chann
output_access.set_valid_region(win, input_plane->info()->valid_region());
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLChannelExtractKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp b/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp
index 5f0f0aebf8..be4d68770d 100644
--- a/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp
+++ b/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp
@@ -125,7 +125,7 @@ void CLChannelShuffleLayerKernel::configure(const ICLTensor *input, ICLTensor *o
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), output->info());
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure(win_config.second);
+ ICLKernel::configure_internal(win_config.second);
}
Status CLChannelShuffleLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_groups)
diff --git a/src/core/CL/kernels/CLCol2ImKernel.cpp b/src/core/CL/kernels/CLCol2ImKernel.cpp
index 6274c9082a..6fd3be7f6a 100644
--- a/src/core/CL/kernels/CLCol2ImKernel.cpp
+++ b/src/core/CL/kernels/CLCol2ImKernel.cpp
@@ -111,7 +111,7 @@ void CLCol2ImKernel::configure(const ICLTensor *input, ICLTensor *output, std::p
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), output->info(), _convolved_dims);
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure(win_config.second);
+ ICLKernel::configure_internal(win_config.second);
// Set config_id for enabling LWS tuning
_config_id = "col2im_";
@@ -156,7 +156,7 @@ void CLCol2ImKernel::run(const Window &window, cl::CommandQueue &queue)
unsigned int idx = 0;
add_2D_tensor_argument(idx, _input, slice);
add_3D_tensor_argument(idx, _output, slice_out);
- enqueue(queue, *this, slice, _lws_hint);
+ enqueue(queue, *this, slice, lws_hint());
}
while(collapsed_window.slide_window_slice_2D(slice) && out_window.slide_window_slice_3D(slice_out));
}
diff --git a/src/core/CL/kernels/CLColorConvertKernel.cpp b/src/core/CL/kernels/CLColorConvertKernel.cpp
index 2b894989e1..e79019eab9 100644
--- a/src/core/CL/kernels/CLColorConvertKernel.cpp
+++ b/src/core/CL/kernels/CLColorConvertKernel.cpp
@@ -120,7 +120,7 @@ void CLColorConvertKernel::configure(const ICLTensor *input, ICLTensor *output)
output_access.set_valid_region(win, input->info()->valid_region());
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLColorConvertKernel::configure(const ICLMultiImage *input, ICLImage *output)
@@ -189,7 +189,7 @@ void CLColorConvertKernel::configure(const ICLMultiImage *input, ICLImage *outpu
input->plane(2)->info()->valid_region());
output_access.set_valid_region(win, ValidRegion(intersect_region.anchor, output->info()->tensor_shape()));
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *output)
@@ -285,7 +285,7 @@ void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *outpu
output_plane1_access.set_valid_region(win, ValidRegion(input_region.anchor, output->plane(1)->info()->tensor_shape()));
output_plane2_access.set_valid_region(win, ValidRegion(input_region.anchor, output->plane(2)->info()->tensor_shape()));
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLColorConvertKernel::configure(const ICLMultiImage *input, ICLMultiImage *output)
@@ -369,7 +369,7 @@ void CLColorConvertKernel::configure(const ICLMultiImage *input, ICLMultiImage *
output_plane1_access.set_valid_region(win, ValidRegion(intersect_region.anchor, output->plane(1)->info()->tensor_shape()));
output_plane2_access.set_valid_region(win, ValidRegion(intersect_region.anchor, output->plane(2)->info()->tensor_shape()));
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLColorConvertKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp
index 69ab590540..ace3fd5840 100644
--- a/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp
+++ b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp
@@ -73,7 +73,7 @@ void CLConvertFullyConnectedWeightsKernel::configure(const ICLTensor *input, ICL
// Configure kernel window
Window win = calculate_max_window(*input->info(), Steps());
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
Status CLConvertFullyConnectedWeightsKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape,
@@ -109,4 +109,4 @@ void CLConvertFullyConnectedWeightsKernel::run(const Window &window, cl::Command
add_2D_tensor_argument(idx, _output, window);
enqueue(queue, *this, window);
}
-} // namespace arm_compute \ No newline at end of file
+} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLConvolutionKernel.cpp b/src/core/CL/kernels/CLConvolutionKernel.cpp
index 2b08c8dfba..e6777938a2 100644
--- a/src/core/CL/kernels/CLConvolutionKernel.cpp
+++ b/src/core/CL/kernels/CLConvolutionKernel.cpp
@@ -105,7 +105,7 @@ void CLConvolutionKernel<matrix_size>::configure(const ICLTensor *input, ICLTens
output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
/****************************************************************************************\
@@ -167,7 +167,7 @@ void CLSeparableConvolutionHorKernel<matrix_size>::configure(const ICLTensor *in
output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
template <unsigned int matrix_size>
@@ -226,7 +226,7 @@ void CLSeparableConvolutionVertKernel<matrix_size>::configure(const ICLTensor *i
output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
/****************************************************************************************\
@@ -298,7 +298,7 @@ void CLConvolutionRectangleKernel::configure(const ICLTensor *input, ICLTensor *
output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLConvolutionRectangleKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLCopyKernel.cpp b/src/core/CL/kernels/CLCopyKernel.cpp
index 1fc8b5bfbe..2da67d2666 100644
--- a/src/core/CL/kernels/CLCopyKernel.cpp
+++ b/src/core/CL/kernels/CLCopyKernel.cpp
@@ -95,7 +95,7 @@ void CLCopyKernel::configure(const ICLTensor *input, ICLTensor *output)
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), output->info());
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure(win_config.second);
+ ICLKernel::configure_internal(win_config.second);
}
Status CLCopyKernel::validate(const arm_compute::ITensorInfo *input, const arm_compute::ITensorInfo *output)
diff --git a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp
index 1feac7d815..c6a0031f4a 100644
--- a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp
+++ b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp
@@ -85,7 +85,7 @@ void CLDeconvolutionLayerUpsampleKernel::configure(const ICLTensor *input, ICLTe
AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLDeconvolutionLayerUpsampleKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp
index 4055d1c7ab..40023948b1 100644
--- a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp
@@ -125,7 +125,7 @@ void CLDepthConcatenateLayerKernel::configure(const ICLTensor *input, unsigned i
auto win_config = validate_and_configure_window(input->info(), depth_offset, output->info());
ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
- ICLKernel::configure(std::get<1>(win_config));
+ ICLKernel::configure_internal(std::get<1>(win_config));
}
Status CLDepthConcatenateLayerKernel::validate(const arm_compute::ITensorInfo *input,
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
index e091e5c2cb..a40aa2856c 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
@@ -280,7 +280,7 @@ void CLDepthwiseConvolutionLayer3x3NCHWKernel::configure(const ICLTensor *input,
auto win_config = validate_and_configure_window(input->info(), weights->info(), output->info(), conv_info, depth_multiplier, gpu_target, kernel_name);
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure(win_config.second);
+ ICLKernel::configure_internal(win_config.second);
_kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
@@ -345,7 +345,7 @@ void CLDepthwiseConvolutionLayer3x3NCHWKernel::run(const Window &window, cl::Com
add_3D_tensor_argument(idx, _output, slice_out);
add_3D_tensor_argument(idx, _weights, slice_weights);
- enqueue(queue, *this, slice_out, _lws_hint);
+ enqueue(queue, *this, slice_out, lws_hint());
}
while(window.slide_window_slice_3D(slice_out) && win_in.slide_window_slice_3D(slice_in));
}
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
index 610bfb51dd..63c350d9a5 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
@@ -244,7 +244,7 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(const ICLTensor *input,
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(), conv_info);
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure(win_config.second);
+ ICLKernel::configure_internal(win_config.second);
// Set config_id for enabling LWS tuning
_config_id = kernel_name;
@@ -314,7 +314,7 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::run(const Window &window, cl::Com
add_3D_tensor_argument(idx, _output, slice_out);
add_3D_tensor_argument(idx, _weights, slice_out);
- enqueue(queue, *this, slice_out, _lws_hint);
+ enqueue(queue, *this, slice_out, lws_hint());
}
while(window.slide_window_slice_3D(slice_out) && win_in.slide_window_slice_3D(slice_in));
}
diff --git a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp
index cab943629a..d5c333a2c1 100644
--- a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp
@@ -101,7 +101,7 @@ void CLDepthwiseIm2ColKernel::configure(const ICLTensor *input, ICLTensor *outpu
// CLDepthwiseIm2ColKernel doesn't need padding so update_window_and_padding() can be skipped
output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
Status CLDepthwiseIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, unsigned int depth_multiplier)
@@ -135,7 +135,7 @@ void CLDepthwiseIm2ColKernel::run(const Window &window, cl::CommandQueue &queue)
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, slice_in);
add_3D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice, _lws_hint);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window.slide_window_slice_3D(slice) && window.slide_window_slice_3D(slice_in));
}
diff --git a/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp b/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp
index 67b2cc9f55..cdc27e8ab1 100644
--- a/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp
@@ -87,7 +87,7 @@ void CLDepthwiseVectorToTensorKernel::configure(const ICLTensor *input, ICLTenso
// The CLDepthwisevectorToTensorKernel doesn't need padding so update_window_and_padding() can be skipped
output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
Status CLDepthwiseVectorToTensorKernel::validate(const ITensorInfo *input, const ITensorInfo *output, size_t conv_w, size_t conv_h)
diff --git a/src/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.cpp b/src/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.cpp
index c28be3fccf..683dda8d67 100644
--- a/src/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.cpp
@@ -95,7 +95,7 @@ void CLDepthwiseWeightsReshapeKernel::configure(const ICLTensor *input, ICLTenso
// The CLDepthwiseWeightsReshapeKernel doesn't need padding so update_window_and_padding() can be skipped
output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
Status CLDepthwiseWeightsReshapeKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *biases)
diff --git a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
index fba721f50b..d4c1bec5f4 100644
--- a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
@@ -96,7 +96,7 @@ void CLDequantizationLayerKernel::configure(const ICLTensor *input, ICLTensor *o
ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
- ICLKernel::configure(std::get<1>(win_config));
+ ICLKernel::configure_internal(std::get<1>(win_config));
}
Status CLDequantizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *min_max)
diff --git a/src/core/CL/kernels/CLDerivativeKernel.cpp b/src/core/CL/kernels/CLDerivativeKernel.cpp
index 5bfe75140b..af7df14359 100644
--- a/src/core/CL/kernels/CLDerivativeKernel.cpp
+++ b/src/core/CL/kernels/CLDerivativeKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -117,7 +117,7 @@ void CLDerivativeKernel::configure(const ICLTensor *input, ICLTensor *output_x,
output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLDerivativeKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLDilateKernel.cpp b/src/core/CL/kernels/CLDilateKernel.cpp
index 3abd747011..89853d7b19 100644
--- a/src/core/CL/kernels/CLDilateKernel.cpp
+++ b/src/core/CL/kernels/CLDilateKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -61,5 +61,5 @@ void CLDilateKernel::configure(const ICLTensor *input, ICLTensor *output, bool b
output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
diff --git a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
index 754f0d8f23..6de97d40af 100644
--- a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
@@ -442,7 +442,7 @@ void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICL
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), weights->info(), output->info(), conv_info, gpu_target);
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure(win_config.second);
+ ICLKernel::configure_internal(win_config.second);
// Set static kernel arguments
if(is_data_type_quantized_asymmetric(data_type))
@@ -532,7 +532,7 @@ void CLDirectConvolutionLayerKernel::run(const Window &window, cl::CommandQueue
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, slice_in);
add_3D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice, _lws_hint);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window.slide_window_slice_3D(slice) && win_in.slide_window_slice_3D(slice_in));
}
diff --git a/src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp
index 4e2352cf6e..5f4dacb269 100644
--- a/src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp
+++ b/src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp
@@ -168,7 +168,7 @@ void CLDirectConvolutionLayerOutputStageKernel::configure(ICLTensor *input, cons
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), (bias == nullptr) ? nullptr : bias->info(), (output == nullptr) ? nullptr : output->info());
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure(win_config.second);
+ ICLKernel::configure_internal(win_config.second);
}
Status CLDirectConvolutionLayerOutputStageKernel::validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output)
@@ -202,7 +202,7 @@ void CLDirectConvolutionLayerOutputStageKernel::run(const Window &window, cl::Co
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, slice);
add_3D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice, _lws_hint);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window.slide_window_slice_3D(slice));
}
diff --git a/src/core/CL/kernels/CLErodeKernel.cpp b/src/core/CL/kernels/CLErodeKernel.cpp
index a7aa88fc5c..e56b71a75e 100644
--- a/src/core/CL/kernels/CLErodeKernel.cpp
+++ b/src/core/CL/kernels/CLErodeKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -61,5 +61,5 @@ void CLErodeKernel::configure(const ICLTensor *input, ICLTensor *output, bool bo
output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
diff --git a/src/core/CL/kernels/CLFastCornersKernel.cpp b/src/core/CL/kernels/CLFastCornersKernel.cpp
index 616e41b5fc..782ab7a7c0 100644
--- a/src/core/CL/kernels/CLFastCornersKernel.cpp
+++ b/src/core/CL/kernels/CLFastCornersKernel.cpp
@@ -87,7 +87,7 @@ void CLFastCornersKernel::configure(const ICLImage *input, ICLImage *output, flo
output_access.set_valid_region(win, input->info()->valid_region(), border_mode == BorderMode::UNDEFINED, border_size());
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLFastCornersKernel::run(const Window &window, cl::CommandQueue &queue)
@@ -148,7 +148,7 @@ void CLCopyToArrayKernel::configure(const ICLImage *input, bool update_number, I
Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
update_window_and_padding(win,
AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration));
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLCopyToArrayKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLFillBorderKernel.cpp b/src/core/CL/kernels/CLFillBorderKernel.cpp
index 3b1edaf46c..baf6bb6024 100644
--- a/src/core/CL/kernels/CLFillBorderKernel.cpp
+++ b/src/core/CL/kernels/CLFillBorderKernel.cpp
@@ -154,7 +154,7 @@ void CLFillBorderKernel::configure(ICLTensor *tensor, BorderSize border_size, Bo
win.set(Window::DimX, Window::Dimension(0, total_valid_width + valid_height));
win.set(Window::DimY, Window::Dimension(0, 1, 1));
win.use_tensor_dimensions(tensor->info()->tensor_shape(), Window::DimZ);
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLFillBorderKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLFlattenLayerKernel.cpp b/src/core/CL/kernels/CLFlattenLayerKernel.cpp
index 0b5feffcc9..17189143ef 100644
--- a/src/core/CL/kernels/CLFlattenLayerKernel.cpp
+++ b/src/core/CL/kernels/CLFlattenLayerKernel.cpp
@@ -101,7 +101,7 @@ void CLFlattenLayerKernel::configure(const ICLTensor *input, ICLTensor *output)
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), output->info());
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure(win_config.second);
+ ICLKernel::configure_internal(win_config.second);
// Set config_id for enabling LWS tuning
_config_id = "flatten";
@@ -144,8 +144,8 @@ void CLFlattenLayerKernel::run(const Window &window, cl::CommandQueue &queue)
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, in_slice);
add_1D_tensor_argument(idx, _output, out_slice);
- enqueue(queue, *this, in_slice, _lws_hint);
+ enqueue(queue, *this, in_slice, lws_hint());
}
while(window.slide_window_slice_3D(in_slice) && out_window.slide_window_slice_1D(out_slice));
}
-} // namespace arm_compute \ No newline at end of file
+} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLFloorKernel.cpp b/src/core/CL/kernels/CLFloorKernel.cpp
index f6b0e829a0..20e3a3a66f 100644
--- a/src/core/CL/kernels/CLFloorKernel.cpp
+++ b/src/core/CL/kernels/CLFloorKernel.cpp
@@ -69,7 +69,7 @@ void CLFloorKernel::configure(const ICLTensor *input, ICLTensor *output)
update_window_and_padding(win, input_access, output_access);
output_access.set_valid_region(win, input->info()->valid_region());
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLFloorKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp b/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp
index 6ea1160c69..ae54e77972 100644
--- a/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp
+++ b/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp
@@ -157,7 +157,7 @@ void CLGEMMInterleave4x4Kernel::configure(const ICLTensor *input, ICLTensor *out
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), output->info(), mult_interleave4x4_height, reinterpret_input_as_3d);
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure(win_config.second);
+ ICLKernel::configure_internal(win_config.second);
// Set config_id for enabling LWS tuning
_config_id = "interleave4x4_";
@@ -210,7 +210,7 @@ void CLGEMMInterleave4x4Kernel::run(const Window &window, cl::CommandQueue &queu
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, slice);
add_3D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice, _lws_hint);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window.slide_window_slice_3D(slice));
}
diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp
index 8a4a1b5820..9adf95fa33 100644
--- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp
@@ -188,7 +188,7 @@ void CLGEMMLowpMatrixMultiplyKernel::configure(const ICLTensor *input0, const IC
// Configure kernel window
auto win_config = validate_and_configure_window(input0->info(), input1->info(), output->info(), is_interleaved_transposed, num_elements_processed);
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure(win_config.second);
+ ICLKernel::configure_internal(win_config.second);
const bool is_dot8_supported = dot8_supported(CLKernelLibrary::get().get_device());
@@ -273,7 +273,7 @@ void CLGEMMLowpMatrixMultiplyKernel::run(const Window &window, cl::CommandQueue
add_2D_tensor_argument(idx, _input0, slice);
add_2D_tensor_argument(idx, _input1, slice_b);
add_2D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice, _lws_hint);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window.slide_window_slice_2D(slice));
}
diff --git a/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp b/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp
index 221a1566b9..aa954abde1 100644
--- a/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp
@@ -159,7 +159,7 @@ void CLGEMMLowpOffsetContributionKernel::configure(ICLTensor *mm_result, const I
vector_sum_row != nullptr ? vector_sum_row->info() : nullptr,
a_offset, b_offset); // NOLINT
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure(win_config.second);
+ ICLKernel::configure_internal(win_config.second);
// Set config_id for enabling LWS tuning
_config_id = "gemmlowp_offset_contribution_";
diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp
index ff2fc646aa..875e26d6cb 100644
--- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -146,7 +146,7 @@ void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::configure(const
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), (bias != nullptr) ? bias->info() : nullptr, output->info());
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure(win_config.second);
+ ICLKernel::configure_internal(win_config.second);
}
void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::run(const Window &window, cl::CommandQueue &queue)
@@ -174,4 +174,4 @@ void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::run(const Window
enqueue(queue, *this, slice);
}
while(collapsed.slide_window_slice_3D(slice));
-} \ No newline at end of file
+}
diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp
index 151a6588d5..57891131c7 100644
--- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -145,7 +145,7 @@ void CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel::configure(const ICLTensor *i
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), (bias != nullptr) ? bias->info() : nullptr, output->info());
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure(win_config.second);
+ ICLKernel::configure_internal(win_config.second);
}
void CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp b/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp
index 6951512167..cd26cd1597 100644
--- a/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -121,7 +121,7 @@ void CLGEMMLowpMatrixAReductionKernel::configure(const ICLTensor *mtx_a, ICLTens
// Configure kernel window
auto win_config = validate_and_configure_window_matrix_a_reduction(_input->info(), _output->info());
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure(win_config.second);
+ ICLKernel::configure_internal(win_config.second);
}
Status CLGEMMLowpMatrixAReductionKernel::validate(const ITensorInfo *mtx_a, const ITensorInfo *vector_sum_row)
@@ -175,7 +175,7 @@ void CLGEMMLowpMatrixBReductionKernel::configure(const ICLTensor *mtx_b, ICLTens
// Configure kernel window
auto win_config = validate_and_configure_window_matrix_b_reduction(_input->info(), _output->info());
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure(win_config.second);
+ ICLKernel::configure_internal(win_config.second);
}
Status CLGEMMLowpMatrixBReductionKernel::validate(const ITensorInfo *mtx_b, const ITensorInfo *vector_sum_col)
diff --git a/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp
index ebe4013bf0..2f1f1bf865 100644
--- a/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp
@@ -88,7 +88,7 @@ void CLGEMMMatrixAccumulateBiasesKernel::configure(ICLTensor *accum, const ICLTe
// Configure kernel window
auto win_config = validate_and_configure_window(accum->info(), biases->info(), gpu_target, vector_size);
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure(win_config.second);
+ ICLKernel::configure_internal(win_config.second);
// Add build options
CLBuildOptions build_opts;
@@ -126,7 +126,7 @@ void CLGEMMMatrixAccumulateBiasesKernel::run(const Window &window, cl::CommandQu
add_2D_tensor_argument(idx, _accum, accum_slice);
add_1D_tensor_argument(idx, _biases, biases_slice);
- enqueue(queue, *this, accum_slice, _lws_hint);
+ enqueue(queue, *this, accum_slice, lws_hint());
}
while(window.slide_window_slice_2D(accum_slice));
}
diff --git a/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp
index bcc3a01296..0c65bb40c0 100644
--- a/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp
@@ -98,7 +98,7 @@ void CLGEMMMatrixAdditionKernel::configure(const ICLTensor *input, ICLTensor *ou
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), output->info());
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure(win_config.second);
+ ICLKernel::configure_internal(win_config.second);
}
Status CLGEMMMatrixAdditionKernel::validate(const ITensorInfo *input, const ITensorInfo *output, float beta)
diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
index 79e2f8b11a..8530ed2fd3 100644
--- a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
@@ -253,7 +253,7 @@ void CLGEMMMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTen
// Configure kernel window
auto win_config = validate_and_configure_window(input0->info(), input1->info(), output->info(), is_interleaved_transposed, reshape_info, gpu_target, num_elements_processed);
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure(win_config.second);
+ ICLKernel::configure_internal(win_config.second);
// Create build options
CLBuildOptions build_opts;
@@ -316,7 +316,7 @@ void CLGEMMMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTen
// The work-group size equal to the Bifrost quad size has been proved to be optimal for these kernels
// via exhaustive autotuning over a range of representative layer configurations.
- _lws_hint = cl::NDRange(4);
+ set_lws_hint(cl::NDRange(4));
}
else // (MIDGARD and F32) or (F16)
{
@@ -416,7 +416,7 @@ void CLGEMMMatrixMultiplyKernel::run(const Window &window, cl::CommandQueue &que
_kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(_input0->info()->strides_in_bytes()[2]));
_kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(_input1->info()->strides_in_bytes()[2]));
_kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(_output->info()->strides_in_bytes()[2]));
- enqueue(queue, *this, slice, _lws_hint);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window.slide_window_slice_3D(slice));
}
diff --git a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp
index 43a6cf25db..11a4292270 100644
--- a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp
@@ -121,7 +121,7 @@ void CLGEMMMatrixVectorMultiplyKernel::configure(const ICLTensor *input0, const
auto win_config = validate_and_configure_window(input0->info(), input1->info(), output->info());
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure(win_config.second);
+ ICLKernel::configure_internal(win_config.second);
}
Status CLGEMMMatrixVectorMultiplyKernel::validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output)
@@ -165,7 +165,7 @@ void CLGEMMMatrixVectorMultiplyKernel::run(const Window &window, cl::CommandQueu
unsigned int idx_2 = num_arguments_per_3D_tensor() + num_arguments_per_2D_tensor();
add_3D_tensor_argument(idx_0, _input0, slice_in);
add_1D_tensor_argument(idx_2, _output, slice_out);
- enqueue(queue, *this, slice_in, _lws_hint);
+ enqueue(queue, *this, slice_in, lws_hint());
}
while(window.slide_window_slice_3D(slice_in) && window.slide_window_slice_3D(slice_out));
}
diff --git a/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp b/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp
index 7e44fa7118..5b299052d4 100644
--- a/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp
@@ -107,7 +107,7 @@ void CLGEMMTranspose1xWKernel::configure(const ICLTensor *input, ICLTensor *outp
unsigned int num_elems_processed_per_iteration = 1;
auto win_config = validate_and_configure_window(input->info(), output->info(), num_elems_processed_per_iteration, mult_transpose1xW_width);
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure(win_config.second);
+ ICLKernel::configure_internal(win_config.second);
// Create build options
CLBuildOptions build_opts;
@@ -157,7 +157,7 @@ void CLGEMMTranspose1xWKernel::run(const Window &window, cl::CommandQueue &queue
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, in_slice);
add_3D_tensor_argument(idx, _output, out_slice);
- enqueue(queue, *this, in_slice, _lws_hint);
+ enqueue(queue, *this, in_slice, lws_hint());
}
while(window.slide_window_slice_3D(in_slice) && out_window.slide_window_slice_3D(out_slice));
}
diff --git a/src/core/CL/kernels/CLGaussian3x3Kernel.cpp b/src/core/CL/kernels/CLGaussian3x3Kernel.cpp
index e5bc3f9656..7e8f3139f2 100644
--- a/src/core/CL/kernels/CLGaussian3x3Kernel.cpp
+++ b/src/core/CL/kernels/CLGaussian3x3Kernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -72,5 +72,5 @@ void CLGaussian3x3Kernel::configure(const ICLTensor *input, ICLTensor *output, b
output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
diff --git a/src/core/CL/kernels/CLGaussianPyramidKernel.cpp b/src/core/CL/kernels/CLGaussianPyramidKernel.cpp
index a4fda364e3..6b729c8585 100644
--- a/src/core/CL/kernels/CLGaussianPyramidKernel.cpp
+++ b/src/core/CL/kernels/CLGaussianPyramidKernel.cpp
@@ -95,7 +95,7 @@ void CLGaussianPyramidHorKernel::configure(const ICLTensor *input, ICLTensor *ou
output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLGaussianPyramidHorKernel::run(const Window &window, cl::CommandQueue &queue)
@@ -177,7 +177,7 @@ void CLGaussianPyramidVertKernel::configure(const ICLTensor *input, ICLTensor *o
output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLGaussianPyramidVertKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLHOGDescriptorKernel.cpp b/src/core/CL/kernels/CLHOGDescriptorKernel.cpp
index a15aab1f37..26c3b81175 100644
--- a/src/core/CL/kernels/CLHOGDescriptorKernel.cpp
+++ b/src/core/CL/kernels/CLHOGDescriptorKernel.cpp
@@ -91,7 +91,7 @@ void CLHOGOrientationBinningKernel::configure(const ICLTensor *input_magnitude,
output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLHOGOrientationBinningKernel::run(const Window &window, cl::CommandQueue &queue)
@@ -174,7 +174,7 @@ void CLHOGBlockNormalizationKernel::configure(const ICLTensor *input, ICLTensor
output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLHOGBlockNormalizationKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLHOGDetectorKernel.cpp b/src/core/CL/kernels/CLHOGDetectorKernel.cpp
index caca49846f..12bbbaf9f2 100644
--- a/src/core/CL/kernels/CLHOGDetectorKernel.cpp
+++ b/src/core/CL/kernels/CLHOGDetectorKernel.cpp
@@ -110,7 +110,7 @@ void CLHOGDetectorKernel::configure(const ICLTensor *input, const ICLHOG *hog, I
update_window_and_padding(win, AccessWindowRectangle(input->info(), 0, 0, num_elems_read_per_iteration, num_rows_read_per_iteration));
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLHOGDetectorKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLHarrisCornersKernel.cpp b/src/core/CL/kernels/CLHarrisCornersKernel.cpp
index 1f757fe34c..5320b6bebc 100644
--- a/src/core/CL/kernels/CLHarrisCornersKernel.cpp
+++ b/src/core/CL/kernels/CLHarrisCornersKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -106,7 +106,7 @@ void CLHarrisScoreKernel::configure(const ICLImage *input1, const ICLImage *inpu
ValidRegion valid_region = intersect_valid_regions(input1->info()->valid_region(), input2->info()->valid_region());
output_access.set_valid_region(win, valid_region, border_undefined, border_size());
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLHarrisScoreKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLHistogramKernel.cpp b/src/core/CL/kernels/CLHistogramKernel.cpp
index 7b715abb36..b56ad8d38d 100644
--- a/src/core/CL/kernels/CLHistogramKernel.cpp
+++ b/src/core/CL/kernels/CLHistogramKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -107,7 +107,7 @@ void CLHistogramKernel::configure(const ICLImage *input, ICLDistribution1D *outp
update_window_and_padding(win, AccessWindowHorizontal(input->info(), 0, pixels_per_item));
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLHistogramKernel::run(const Window &window, cl::CommandQueue &queue)
@@ -198,7 +198,7 @@ void CLHistogramBorderKernel::configure(const ICLImage *input, ICLDistribution1D
win.set(0, Window::Dimension(start_position, _input->info()->dimension(0)));
win.set(1, Window::Dimension(0, _input->info()->dimension(1)));
update_window_and_padding(win, AccessWindowHorizontal(input->info(), 0, 1));
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLHistogramBorderKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp
index 39654e2190..42bb96c16f 100644
--- a/src/core/CL/kernels/CLIm2ColKernel.cpp
+++ b/src/core/CL/kernels/CLIm2ColKernel.cpp
@@ -308,7 +308,7 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const
auto win_config = validate_and_configure_window(input->info(), output->info(), kernel_dims, conv_info, has_bias, dilation, im2col_config.num_elems_processed_per_iteration,
im2col_config.is_padding_required_nchw);
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure(win_config.second);
+ ICLKernel::configure_internal(win_config.second);
// Set config_id for enabling LWS tuning
_config_id = im2col_config.kernel_name;
@@ -386,7 +386,7 @@ void CLIm2ColKernel::run(const Window &window, cl::CommandQueue &queue)
add_2D_tensor_argument(idx, _output, slice_out);
_kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(_input->info()->strides_in_bytes()[3]));
_kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(_output->info()->strides_in_bytes()[2]));
- enqueue(queue, *this, slice, _lws_hint);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window_collapsed.slide_window_slice_3D(slice) && window_output.slide_window_slice_2D(slice_out) && window_collapsed.slide_window_slice_3D(slice_in));
-} \ No newline at end of file
+}
diff --git a/src/core/CL/kernels/CLIntegralImageKernel.cpp b/src/core/CL/kernels/CLIntegralImageKernel.cpp
index 69ede457df..6fb39ff0a2 100644
--- a/src/core/CL/kernels/CLIntegralImageKernel.cpp
+++ b/src/core/CL/kernels/CLIntegralImageKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -60,7 +60,7 @@ void CLIntegralImageHorKernel::configure(const ICLTensor *input, ICLTensor *outp
output_access.set_valid_region(win, input->info()->valid_region());
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
CLIntegralImageVertKernel::CLIntegralImageVertKernel()
@@ -89,7 +89,7 @@ void CLIntegralImageVertKernel::configure(ICLTensor *in_out)
in_out_access.set_valid_region(win, in_out->info()->valid_region());
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLIntegralImageVertKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp
index 39d9f958d3..54ed51eda2 100644
--- a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp
+++ b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp
@@ -120,7 +120,7 @@ void CLL2NormalizeLayerKernel::configure(const ICLTensor *input, const ICLTensor
auto win_config = validate_and_configure_window(_input->info(), _output->info());
ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
- ICLKernel::configure(std::get<1>(win_config));
+ ICLKernel::configure_internal(std::get<1>(win_config));
}
Status CLL2NormalizeLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, unsigned int axis, float epsilon)
diff --git a/src/core/CL/kernels/CLLKTrackerKernel.cpp b/src/core/CL/kernels/CLLKTrackerKernel.cpp
index 078d18e61c..40ed630c89 100644
--- a/src/core/CL/kernels/CLLKTrackerKernel.cpp
+++ b/src/core/CL/kernels/CLLKTrackerKernel.cpp
@@ -75,7 +75,7 @@ void CLLKTrackerInitKernel::configure(const ICLKeyPointArray *old_points, const
Window window;
window.set(Window::DimX, Window::Dimension(0, old_points->num_values(), 1));
window.set(Window::DimY, Window::Dimension(0, 1, 1));
- ICLKernel::configure(window);
+ ICLKernel::configure_internal(window);
}
void CLLKTrackerInitKernel::run(const Window &window, cl::CommandQueue &queue)
@@ -104,7 +104,7 @@ void CLLKTrackerFinalizeKernel::configure(ICLLKInternalKeypointArray *new_points
Window window;
window.set(Window::DimX, Window::Dimension(0, new_points_internal->num_values(), 1));
window.set(Window::DimY, Window::Dimension(0, 1, 1));
- ICLKernel::configure(window);
+ ICLKernel::configure_internal(window);
}
void CLLKTrackerFinalizeKernel::run(const Window &window, cl::CommandQueue &queue)
@@ -156,7 +156,7 @@ void CLLKTrackerStage0Kernel::configure(const ICLTensor *old_input, const ICLTen
AccessWindowStatic(old_scharr_gy->info(), valid_region.start(0), valid_region.start(1),
valid_region.end(0), valid_region.end(1)));
- ICLKernel::configure(window);
+ ICLKernel::configure_internal(window);
// Initialize required variables
const int level0 = (level == 0) ? 1 : 0;
@@ -232,7 +232,7 @@ void CLLKTrackerStage1Kernel::configure(const ICLTensor *new_input, ICLLKInterna
AccessWindowStatic(new_input->info(), valid_region.start(0), valid_region.start(1),
valid_region.end(0), valid_region.end(1)));
- ICLKernel::configure(window);
+ ICLKernel::configure_internal(window);
// Initialize required variables
const int level0 = (level == 0) ? 1 : 0;
diff --git a/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp
index 1a7d95cc2c..ad2f3a4892 100644
--- a/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp
@@ -90,13 +90,14 @@ void CLLocallyConnectedMatrixMultiplyKernel::configure(const ICLTensor *input0,
_input1 = input1;
_output = output;
+ cl::NDRange lws_hint;
if(output->info()->dimension(1) == 196)
{
- _lws_hint = cl::NDRange(1, 7);
+ lws_hint = cl::NDRange(1, 7);
}
else
{
- _lws_hint = cl::NDRange(8, 8);
+ lws_hint = cl::NDRange(8, 8);
}
std::ostringstream mm_arguments;
@@ -114,7 +115,7 @@ void CLLocallyConnectedMatrixMultiplyKernel::configure(const ICLTensor *input0,
ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
- ICLKernel::configure(std::get<1>(win_config));
+ ICLKernel::configure_internal(std::get<1>(win_config), lws_hint);
}
Status CLLocallyConnectedMatrixMultiplyKernel::validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output)
@@ -142,7 +143,7 @@ void CLLocallyConnectedMatrixMultiplyKernel::run(const Window &window, cl::Comma
add_2D_tensor_argument(idx, _input0, slice);
add_3D_tensor_argument(idx, _input1, slice_matrix_b);
add_2D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice, _lws_hint);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window.slide_window_slice_2D(slice));
}
diff --git a/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp b/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp
index c504189169..0b34c59a03 100644
--- a/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp
+++ b/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -137,7 +137,7 @@ void CLMagnitudePhaseKernel::configure(const ICLTensor *gx, const ICLTensor *gy,
output_magnitude_access.set_valid_region(win, valid_region);
output_phase_access.set_valid_region(win, valid_region);
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLMagnitudePhaseKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLMeanStdDevKernel.cpp b/src/core/CL/kernels/CLMeanStdDevKernel.cpp
index bd31131fe5..0cde9c5fe6 100644
--- a/src/core/CL/kernels/CLMeanStdDevKernel.cpp
+++ b/src/core/CL/kernels/CLMeanStdDevKernel.cpp
@@ -106,7 +106,7 @@ void CLMeanStdDevKernel::configure(const ICLImage *input, float *mean, cl::Buffe
AccessWindowRectangle input_access(input->info(), 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y);
update_window_and_padding(win, input_access);
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLMeanStdDevKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLMedian3x3Kernel.cpp b/src/core/CL/kernels/CLMedian3x3Kernel.cpp
index 3b9fb1fe88..b93179d5f4 100644
--- a/src/core/CL/kernels/CLMedian3x3Kernel.cpp
+++ b/src/core/CL/kernels/CLMedian3x3Kernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -62,5 +62,5 @@ void CLMedian3x3Kernel::configure(const ICLTensor *input, ICLTensor *output, boo
output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
diff --git a/src/core/CL/kernels/CLMinMaxLayerKernel.cpp b/src/core/CL/kernels/CLMinMaxLayerKernel.cpp
index 9493ddc878..fa7b678e86 100644
--- a/src/core/CL/kernels/CLMinMaxLayerKernel.cpp
+++ b/src/core/CL/kernels/CLMinMaxLayerKernel.cpp
@@ -105,7 +105,7 @@ void CLMinMaxLayerKernel::configure(const ICLTensor *input, ICLTensor *output)
ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
- ICLKernel::configure(std::get<1>(win_config));
+ ICLKernel::configure_internal(std::get<1>(win_config));
}
Status CLMinMaxLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output)
diff --git a/src/core/CL/kernels/CLMinMaxLocationKernel.cpp b/src/core/CL/kernels/CLMinMaxLocationKernel.cpp
index 5636592347..0c7f3bc070 100644
--- a/src/core/CL/kernels/CLMinMaxLocationKernel.cpp
+++ b/src/core/CL/kernels/CLMinMaxLocationKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -118,7 +118,7 @@ void CLMinMaxKernel::configure(const ICLImage *input, cl::Buffer *min_max)
// Configure kernel window
Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
update_window_and_padding(win, AccessWindowHorizontal(input->info(), 0, ceil_to_multiple(num_elems_processed_per_iteration, 16)));
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLMinMaxKernel::run(const Window &window, cl::CommandQueue &queue)
@@ -209,7 +209,7 @@ void CLMinMaxLocationKernel::configure(const ICLImage *input, cl::Buffer *min_ma
constexpr unsigned int num_elems_processed_per_iteration = 1;
Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
update_window_and_padding(win, AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration));
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLMinMaxLocationKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLNonLinearFilterKernel.cpp b/src/core/CL/kernels/CLNonLinearFilterKernel.cpp
index 6afa5822ba..5e419743d0 100644
--- a/src/core/CL/kernels/CLNonLinearFilterKernel.cpp
+++ b/src/core/CL/kernels/CLNonLinearFilterKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -94,5 +94,5 @@ void CLNonLinearFilterKernel::configure(const ICLTensor *input, ICLTensor *outpu
output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
diff --git a/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp b/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp
index 6a96b0effd..4e41f0df42 100644
--- a/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp
+++ b/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -68,5 +68,5 @@ void CLNonMaximaSuppression3x3Kernel::configure(const ICLTensor *input, ICLTenso
output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
diff --git a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp
index edc9e9d58c..8a7b7aed22 100644
--- a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp
@@ -135,7 +135,7 @@ void CLNormalizationLayerKernel::configure(const ICLTensor *input, ICLTensor *ou
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), output->info(), norm_info);
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure(win_config.second);
+ ICLKernel::configure_internal(win_config.second);
// Set config_id for enabling LWS tuning
_config_id = "normalization_layer_";
diff --git a/src/core/CL/kernels/CLPermuteKernel.cpp b/src/core/CL/kernels/CLPermuteKernel.cpp
index 7c0c95be1c..c6f0f4bc55 100644
--- a/src/core/CL/kernels/CLPermuteKernel.cpp
+++ b/src/core/CL/kernels/CLPermuteKernel.cpp
@@ -120,7 +120,7 @@ void CLPermuteKernel::configure(const ICLTensor *input, ICLTensor *output, const
coord.set_num_dimensions(output->info()->num_dimensions());
output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
Status CLPermuteKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm)
diff --git a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
index 4ea093fe04..4ca2ef8aa3 100644
--- a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
+++ b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
@@ -197,7 +197,7 @@ void CLPixelWiseMultiplicationKernel::configure(const ICLTensor *input1, const I
_kernel.setArg(idx++, scale);
}
- ICLKernel::configure(win_config.second);
+ ICLKernel::configure_internal(win_config.second);
}
Status CLPixelWiseMultiplicationKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale,
diff --git a/src/core/CL/kernels/CLPoolingLayerKernel.cpp b/src/core/CL/kernels/CLPoolingLayerKernel.cpp
index d5ea092c78..df13068239 100644
--- a/src/core/CL/kernels/CLPoolingLayerKernel.cpp
+++ b/src/core/CL/kernels/CLPoolingLayerKernel.cpp
@@ -269,7 +269,7 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output,
auto win_config = validate_and_configure_window(input->info(), output->info(), pool_info);
ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
- ICLKernel::configure(std::get<1>(win_config));
+ ICLKernel::configure_internal(std::get<1>(win_config));
if(data_layout == DataLayout::NCHW)
{
@@ -336,7 +336,7 @@ void CLPoolingLayerKernel::run(const Window &window, cl::CommandQueue &queue)
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, in_slice);
add_3D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice, _lws_hint);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window_collapsed.slide_window_slice_3D(slice));
break;
@@ -355,7 +355,7 @@ void CLPoolingLayerKernel::run(const Window &window, cl::CommandQueue &queue)
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, in_slice);
add_3D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice, _lws_hint);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window.slide_window_slice_3D(slice) && window.slide_window_slice_3D(in_slice));
break;
diff --git a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp
index af751f4832..9028b0f604 100644
--- a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp
@@ -96,7 +96,7 @@ void CLQuantizationLayerKernel::configure(const ICLTensor *input, ICLTensor *out
ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
- ICLKernel::configure(std::get<1>(win_config));
+ ICLKernel::configure_internal(std::get<1>(win_config));
}
Status CLQuantizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *min_max)
diff --git a/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp b/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp
index 4048e927f5..23676942a6 100644
--- a/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp
+++ b/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp
@@ -101,7 +101,7 @@ void CLROIPoolingLayerKernel::configure(const ICLTensor *input, const ICLROIArra
update_window_and_padding(window, input_access, output_access);
output_access.set_valid_region(window, ValidRegion(Coordinates(), output->info()->tensor_shape()));
- ICLKernel::configure(window);
+ ICLKernel::configure_internal(window);
}
void CLROIPoolingLayerKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLReductionOperationKernel.cpp b/src/core/CL/kernels/CLReductionOperationKernel.cpp
index 09861630ac..bf36ae2c0f 100644
--- a/src/core/CL/kernels/CLReductionOperationKernel.cpp
+++ b/src/core/CL/kernels/CLReductionOperationKernel.cpp
@@ -114,8 +114,8 @@ void CLReductionOperationKernel::configure(const ICLTensor *input, ICLTensor *ou
// Set the number of WG based on the input size. If input width is < 128
// we can use fewer threads than 8.
- _lws_hint = cl::NDRange(std::min(8U, num_of_threads));
- _border_size = BorderSize(0, border_width, 0, 0);
+ cl::NDRange lws_hint = cl::NDRange(std::min(8U, num_of_threads));
+ _border_size = BorderSize(0, border_width, 0, 0);
// Set build options
std::set<std::string> build_opts;
@@ -142,7 +142,7 @@ void CLReductionOperationKernel::configure(const ICLTensor *input, ICLTensor *ou
ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
- ICLKernel::configure(std::get<1>(win_config));
+ ICLKernel::configure_internal(std::get<1>(win_config), lws_hint);
}
Status CLReductionOperationKernel::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op)
@@ -171,7 +171,7 @@ void CLReductionOperationKernel::run(const Window &window, cl::CommandQueue &que
in_slice.set(Window::DimX, Window::Dimension(in_slice.x().start(), in_slice.x().end() + border_width, in_slice.x().step()));
// Set local sums buffer
- unsigned int local_sum_size = _lws_hint[0] * _input->info()->element_size();
+ unsigned int local_sum_size = lws_hint()[0] * _input->info()->element_size();
_kernel.setArg(num_arguments_per_2D_tensor() * 2, local_sum_size, nullptr);
do
@@ -179,7 +179,7 @@ void CLReductionOperationKernel::run(const Window &window, cl::CommandQueue &que
unsigned int idx = 0;
add_2D_tensor_argument(idx, _input, in_slice);
add_2D_tensor_argument(idx, _output, out_slice);
- enqueue(queue, *this, in_slice, _lws_hint);
+ enqueue(queue, *this, in_slice, lws_hint());
}
while(window.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(out_slice));
}
diff --git a/src/core/CL/kernels/CLRemapKernel.cpp b/src/core/CL/kernels/CLRemapKernel.cpp
index b46bb30c59..33c5f2d402 100644
--- a/src/core/CL/kernels/CLRemapKernel.cpp
+++ b/src/core/CL/kernels/CLRemapKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -83,7 +83,7 @@ void CLRemapKernel::configure(const ICLTensor *input, const ICLTensor *map_x, co
output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
// Set static arguments
unsigned int idx = 4 * num_arguments_per_2D_tensor(); //Skip the input and output parameters
diff --git a/src/core/CL/kernels/CLReshapeLayerKernel.cpp b/src/core/CL/kernels/CLReshapeLayerKernel.cpp
index ce9d7fff67..c7efa9a82d 100644
--- a/src/core/CL/kernels/CLReshapeLayerKernel.cpp
+++ b/src/core/CL/kernels/CLReshapeLayerKernel.cpp
@@ -92,7 +92,7 @@ void CLReshapeLayerKernel::configure(const ICLTensor *input, ICLTensor *output)
output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLReshapeLayerKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLScaleKernel.cpp b/src/core/CL/kernels/CLScaleKernel.cpp
index b1655d5cc1..b2cd4b7adf 100644
--- a/src/core/CL/kernels/CLScaleKernel.cpp
+++ b/src/core/CL/kernels/CLScaleKernel.cpp
@@ -181,7 +181,7 @@ void CLScaleKernel::configure(const ICLTensor *input, ICLTensor *output, Interpo
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), output->info(), policy, border_mode, sampling_policy, border);
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure(win_config.second);
+ ICLKernel::configure_internal(win_config.second);
// Create kernel
CLBuildOptions build_opts;
@@ -223,7 +223,7 @@ void CLScaleKernel::run(const Window &window, cl::CommandQueue &queue)
unsigned int idx = 0;
add_2D_tensor_argument(idx, _input, slice);
add_2D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice, _lws_hint);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window.slide_window_slice_2D(slice));
break;
@@ -237,7 +237,7 @@ void CLScaleKernel::run(const Window &window, cl::CommandQueue &queue)
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, slice);
add_3D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice, _lws_hint);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window.slide_window_slice_3D(slice));
break;
diff --git a/src/core/CL/kernels/CLScharr3x3Kernel.cpp b/src/core/CL/kernels/CLScharr3x3Kernel.cpp
index 913ef592d4..5182390822 100644
--- a/src/core/CL/kernels/CLScharr3x3Kernel.cpp
+++ b/src/core/CL/kernels/CLScharr3x3Kernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -102,7 +102,7 @@ void CLScharr3x3Kernel::configure(const ICLTensor *input, ICLTensor *output_x, I
output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLScharr3x3Kernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLSobel3x3Kernel.cpp b/src/core/CL/kernels/CLSobel3x3Kernel.cpp
index 436aaa498a..b4bfe28216 100644
--- a/src/core/CL/kernels/CLSobel3x3Kernel.cpp
+++ b/src/core/CL/kernels/CLSobel3x3Kernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -102,7 +102,7 @@ void CLSobel3x3Kernel::configure(const ICLTensor *input, ICLTensor *output_x, IC
output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLSobel3x3Kernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLSobel5x5Kernel.cpp b/src/core/CL/kernels/CLSobel5x5Kernel.cpp
index 4c0316f19e..46aa074d61 100644
--- a/src/core/CL/kernels/CLSobel5x5Kernel.cpp
+++ b/src/core/CL/kernels/CLSobel5x5Kernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -102,7 +102,7 @@ void CLSobel5x5HorKernel::configure(const ICLTensor *input, ICLTensor *output_x,
output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLSobel5x5HorKernel::run(const Window &window, cl::CommandQueue &queue)
@@ -201,7 +201,7 @@ void CLSobel5x5VertKernel::configure(const ICLTensor *input_x, const ICLTensor *
output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLSobel5x5VertKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLSobel7x7Kernel.cpp b/src/core/CL/kernels/CLSobel7x7Kernel.cpp
index a477953cfb..0c94e88acf 100644
--- a/src/core/CL/kernels/CLSobel7x7Kernel.cpp
+++ b/src/core/CL/kernels/CLSobel7x7Kernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -105,7 +105,7 @@ void CLSobel7x7HorKernel::configure(const ICLTensor *input, ICLTensor *output_x,
output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLSobel7x7HorKernel::run(const Window &window, cl::CommandQueue &queue)
@@ -204,7 +204,7 @@ void CLSobel7x7VertKernel::configure(const ICLTensor *input_x, const ICLTensor *
output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
void CLSobel7x7VertKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
index b9ebdc9583..403256baae 100644
--- a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
+++ b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
@@ -242,7 +242,7 @@ void CLLogits1DMaxShiftExpSumKernel::configure(const ICLTensor *input, ICLTensor
build_opts.add_option_if(is_data_type_float(dt) && (beta != 1.0f), "-DBETA=" + float_to_string_with_full_precision(beta));
build_opts.add_options_if(is_data_type_quantized_asymmetric(dt), prepare_quantized_softmax_build_options(input->info()->quantization_info().scale, beta).options());
- _lws_hint = cl::NullRange;
+ cl::NDRange lws_hint(cl::NullRange);
std::string kernel_name = is_data_type_quantized_asymmetric(dt) ? std::string("softmax_layer_max_shift_exp_sum_quantized_serial") :
std::string("softmax_layer_max_shift_exp_sum_serial");
ParallelReductionInfo parallel_reduction_info = is_parallel_reduction(reduction_dim_size);
@@ -264,7 +264,7 @@ void CLLogits1DMaxShiftExpSumKernel::configure(const ICLTensor *input, ICLTensor
build_opts.add_option_if((multiple_grid_size != 0) || ((reduction_dim_size % vector_size) != 0), "-DNON_MULTIPLE_OF_GRID_SIZE");
// Setting _lws_hint in this way can also communicate grid_size to CLLogits1DMaxShiftExpSumKernel::run().
// A single workgroup performs reduction in dimension 0 in the parallel case, hence lws[0]==gws[0].
- _lws_hint = cl::NDRange(_grid_size);
+ lws_hint = cl::NDRange(_grid_size);
}
// Create kernel.
@@ -277,7 +277,7 @@ void CLLogits1DMaxShiftExpSumKernel::configure(const ICLTensor *input, ICLTensor
// Configure window
auto win_config = validate_and_configure_window_1DMaxShiftExpSum(input->info(), max->info(), output->info(), sum->info());
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure(win_config.second);
+ ICLKernel::configure_internal(win_config.second, lws_hint);
}
Status CLLogits1DMaxShiftExpSumKernel::validate(const ITensorInfo *input, const ITensorInfo *max, const ITensorInfo *output, const ITensorInfo *sum)
@@ -322,7 +322,7 @@ void CLLogits1DMaxShiftExpSumKernel::run(const Window &window, cl::CommandQueue
add_3D_tensor_argument(idx, _max, slice);
add_3D_tensor_argument(idx, _output, slice);
add_3D_tensor_argument(idx, _sum, slice);
- enqueue(queue, *this, slice, _lws_hint);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window_collapsed.slide_window_slice_3D(slice));
}
@@ -365,7 +365,7 @@ void CLLogits1DNormKernel::configure(const ICLTensor *input, const ICLTensor *su
// Configure window
auto win_config = validate_and_configure_window_1DNorm(input->info(), output->info(), sum->info());
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure(win_config.second);
+ ICLKernel::configure_internal(win_config.second);
}
Status CLLogits1DNormKernel::validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output)
@@ -394,7 +394,7 @@ void CLLogits1DNormKernel::run(const Window &window, cl::CommandQueue &queue)
add_3D_tensor_argument(idx, _input, slice);
add_3D_tensor_argument(idx, _sum, sum_slice);
add_3D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice, _lws_hint);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window_collapsed.slide_window_slice_3D(slice));
}
diff --git a/src/core/CL/kernels/CLTransposeKernel.cpp b/src/core/CL/kernels/CLTransposeKernel.cpp
index 3d584345d7..695bdf7f40 100644
--- a/src/core/CL/kernels/CLTransposeKernel.cpp
+++ b/src/core/CL/kernels/CLTransposeKernel.cpp
@@ -117,9 +117,8 @@ void CLTransposeKernel::configure(const ICLTensor *input, ICLTensor *output)
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info()));
- _input = input;
- _output = output;
- _lws_hint = cl::NDRange(2, 8);
+ _input = input;
+ _output = output;
std::set<std::string> build_opts;
std::ostringstream data_type_in_bytes;
@@ -131,5 +130,5 @@ void CLTransposeKernel::configure(const ICLTensor *input, ICLTensor *output)
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), output->info());
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure(win_config.second);
+ ICLKernel::configure_internal(win_config.second, cl::NDRange(2, 8));
}
diff --git a/src/core/CL/kernels/CLWarpAffineKernel.cpp b/src/core/CL/kernels/CLWarpAffineKernel.cpp
index e0e09400af..1fae2b1974 100644
--- a/src/core/CL/kernels/CLWarpAffineKernel.cpp
+++ b/src/core/CL/kernels/CLWarpAffineKernel.cpp
@@ -98,5 +98,5 @@ void CLWarpAffineKernel::configure(const ICLTensor *input, ICLTensor *output, co
output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
diff --git a/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp b/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp
index d6fcb09658..e537aec058 100644
--- a/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp
+++ b/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp
@@ -95,5 +95,5 @@ void CLWarpPerspectiveKernel::configure(const ICLTensor *input, ICLTensor *outpu
output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
diff --git a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp
index 58ecd9ccb3..5ef0f5b152 100644
--- a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp
+++ b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp
@@ -115,7 +115,7 @@ void CLWeightsReshapeKernel::configure(const ICLTensor *input, const ICLTensor *
Window win = calculate_max_window(*input->info(), Steps());
// The CLWeightsReshapeKernel doesn't need padding so update_window_and_padding() can be skipped
output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
- ICLKernel::configure(win);
+ ICLKernel::configure_internal(win);
}
Status CLWeightsReshapeKernel::validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output, const unsigned int num_groups)
diff --git a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
index e2ca05a72a..e5ab8d2304 100644
--- a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
+++ b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
@@ -111,7 +111,7 @@ void CLWidthConcatenateLayerKernel::configure(const ICLTensor *input, unsigned i
auto win_config = validate_and_configure_window(input->info(), width_offset, output->info());
ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
- ICLKernel::configure(std::get<1>(win_config));
+ ICLKernel::configure_internal(std::get<1>(win_config));
}
void CLWidthConcatenateLayerKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp b/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp
index e6c713e5e7..818638c89e 100644
--- a/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp
+++ b/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp
@@ -125,7 +125,7 @@ void CLWinogradFilterTransformKernel::configure(const ICLTensor *input, ICLTenso
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), output->info());
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure(win_config.second);
+ ICLKernel::configure_internal(win_config.second);
}
Status CLWinogradFilterTransformKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info)
diff --git a/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp b/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp
index fcfd9e30a1..2309fbfb26 100644
--- a/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp
+++ b/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp
@@ -180,8 +180,6 @@ void CLWinogradInputTransformKernel::configure(const ICLTensor *input, ICLTensor
_step_z = (_input->info()->dimension(2) % 2) != 0 ? 1 : 2;
}
- _lws_hint = cl::NDRange(1, 1, 8);
-
// Append stepz and data layout
kernel_name += "_stepz";
kernel_name += support::cpp11::to_string(_step_z);
@@ -192,7 +190,7 @@ void CLWinogradInputTransformKernel::configure(const ICLTensor *input, ICLTensor
// Create window and update padding
auto win_config = validate_and_configure_window(input->info(), output->info(), winograd_info);
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure(win_config.second);
+ ICLKernel::configure_internal(win_config.second, cl::NDRange(1, 1, 8));
_config_id = kernel_name;
_config_id += support::cpp11::to_string(input->info()->dimension(0));
@@ -239,7 +237,7 @@ void CLWinogradInputTransformKernel::run(const Window &window, cl::CommandQueue
add_3D_tensor_argument(idx, _input, slice);
add_3D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice, _lws_hint);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window.slide_window_slice_3D(slice));
}
diff --git a/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp b/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp
index 40d5f6588f..fa42596604 100644
--- a/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp
+++ b/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp
@@ -172,7 +172,7 @@ void CLWinogradOutputTransformKernel::configure(const ICLTensor *input, const IC
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), (bias != nullptr ? bias->info() : nullptr), output->info(), winograd_info.output_tile_size);
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure(win_config.second);
+ ICLKernel::configure_internal(win_config.second);
// Set config_id for enabling LWS tuning
_config_id = kernel_name;
@@ -231,7 +231,7 @@ void CLWinogradOutputTransformKernel::run(const Window &window, cl::CommandQueue
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, slice);
add_3D_tensor_argument(idx, _output, slice_out);
- enqueue(queue, *this, slice, _lws_hint);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window.slide_window_slice_3D(slice) && window.slide_window_slice_3D(slice_out));
}