From b6eb35371d222c6b7f61210d97ebd7dd9e197458 Mon Sep 17 00:00:00 2001
From: Anthony Barbier <anthony.barbier@arm.com>
Date: Wed, 8 Aug 2018 13:20:04 +0100
Subject: COMPMID-1478: Stop relying on static default OpenCL objects in
 cl2.hpp

This causes problems when ACL is used as a shared library on Android.
Fixes some problems related to creation / destruction order between the Graph's CL backend and core / runtime

Change-Id: I716d63fd42f4586df1ffbb6fa97e4db06d3a781b
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/143228
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele DiGiorgio <michele.digiorgio@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
---
 src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp           |  4 ++--
 src/core/CL/kernels/CLActivationLayerKernel.cpp              |  4 ++--
 src/core/CL/kernels/CLArithmeticAdditionKernel.cpp           |  2 +-
 src/core/CL/kernels/CLArithmeticDivisionKernel.cpp           |  2 +-
 src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp        |  2 +-
 src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp      |  4 ++--
 src/core/CL/kernels/CLBitwiseAndKernel.cpp                   |  4 ++--
 src/core/CL/kernels/CLBitwiseOrKernel.cpp                    |  4 ++--
 src/core/CL/kernels/CLBitwiseXorKernel.cpp                   |  4 ++--
 src/core/CL/kernels/CLBox3x3Kernel.cpp                       |  4 ++--
 src/core/CL/kernels/CLCannyEdgeKernel.cpp                    |  8 ++++----
 src/core/CL/kernels/CLChannelCombineKernel.cpp               |  4 ++--
 src/core/CL/kernels/CLChannelExtractKernel.cpp               |  4 ++--
 src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp          |  2 +-
 src/core/CL/kernels/CLCol2ImKernel.cpp                       |  4 ++--
 src/core/CL/kernels/CLColorConvertKernel.cpp                 |  8 ++++----
 src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp |  4 ++--
 src/core/CL/kernels/CLConvolutionKernel.cpp                  |  8 ++++----
 src/core/CL/kernels/CLCopyKernel.cpp                         |  2 +-
 src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp   |  2 +-
 src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp        |  2 +-
 .../CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp  |  4 ++--
 .../CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp  |  4 ++--
 src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp              |  4 ++--
 src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp      |  2 +-
 src/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.cpp      |  2 +-
 src/core/CL/kernels/CLDequantizationLayerKernel.cpp          |  2 +-
 src/core/CL/kernels/CLDerivativeKernel.cpp                   |  4 ++--
 src/core/CL/kernels/CLDilateKernel.cpp                       |  4 ++--
 src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp       |  4 ++--
 src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp |  4 ++--
 src/core/CL/kernels/CLErodeKernel.cpp                        |  4 ++--
 src/core/CL/kernels/CLFastCornersKernel.cpp                  |  4 ++--
 src/core/CL/kernels/CLFillBorderKernel.cpp                   |  2 +-
 src/core/CL/kernels/CLFlattenLayerKernel.cpp                 |  6 +++---
 src/core/CL/kernels/CLFloorKernel.cpp                        |  2 +-
 src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp            |  4 ++--
 src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp       |  4 ++--
 src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp   |  2 +-
 ...MMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp |  6 +++---
 .../CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp        |  4 ++--
 src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp            |  6 +++---
 src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp   |  4 ++--
 src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp           |  2 +-
 src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp           |  6 +++---
 src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp     |  4 ++--
 src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp             |  4 ++--
 src/core/CL/kernels/CLGaussian3x3Kernel.cpp                  |  4 ++--
 src/core/CL/kernels/CLGaussianPyramidKernel.cpp              |  4 ++--
 src/core/CL/kernels/CLHOGDescriptorKernel.cpp                |  4 ++--
 src/core/CL/kernels/CLHOGDetectorKernel.cpp                  |  2 +-
 src/core/CL/kernels/CLHarrisCornersKernel.cpp                |  4 ++--
 src/core/CL/kernels/CLHistogramKernel.cpp                    |  6 +++---
 src/core/CL/kernels/CLIm2ColKernel.cpp                       |  6 +++---
 src/core/CL/kernels/CLIntegralImageKernel.cpp                |  6 +++---
 src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp             |  2 +-
 src/core/CL/kernels/CLLKTrackerKernel.cpp                    |  8 ++++----
 .../CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp    |  9 +++++----
 src/core/CL/kernels/CLMagnitudePhaseKernel.cpp               |  4 ++--
 src/core/CL/kernels/CLMeanStdDevKernel.cpp                   |  2 +-
 src/core/CL/kernels/CLMedian3x3Kernel.cpp                    |  4 ++--
 src/core/CL/kernels/CLMinMaxLayerKernel.cpp                  |  2 +-
 src/core/CL/kernels/CLMinMaxLocationKernel.cpp               |  6 +++---
 src/core/CL/kernels/CLNonLinearFilterKernel.cpp              |  4 ++--
 src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp      |  4 ++--
 src/core/CL/kernels/CLNormalizationLayerKernel.cpp           |  2 +-
 src/core/CL/kernels/CLPermuteKernel.cpp                      |  2 +-
 src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp      |  2 +-
 src/core/CL/kernels/CLPoolingLayerKernel.cpp                 |  6 +++---
 src/core/CL/kernels/CLQuantizationLayerKernel.cpp            |  2 +-
 src/core/CL/kernels/CLROIPoolingLayerKernel.cpp              |  2 +-
 src/core/CL/kernels/CLReductionOperationKernel.cpp           | 10 +++++-----
 src/core/CL/kernels/CLRemapKernel.cpp                        |  4 ++--
 src/core/CL/kernels/CLReshapeLayerKernel.cpp                 |  2 +-
 src/core/CL/kernels/CLScaleKernel.cpp                        |  6 +++---
 src/core/CL/kernels/CLScharr3x3Kernel.cpp                    |  4 ++--
 src/core/CL/kernels/CLSobel3x3Kernel.cpp                     |  4 ++--
 src/core/CL/kernels/CLSobel5x5Kernel.cpp                     |  6 +++---
 src/core/CL/kernels/CLSobel7x7Kernel.cpp                     |  6 +++---
 src/core/CL/kernels/CLSoftmaxLayerKernel.cpp                 | 12 ++++++------
 src/core/CL/kernels/CLTransposeKernel.cpp                    |  7 +++----
 src/core/CL/kernels/CLWarpAffineKernel.cpp                   |  2 +-
 src/core/CL/kernels/CLWarpPerspectiveKernel.cpp              |  2 +-
 src/core/CL/kernels/CLWeightsReshapeKernel.cpp               |  2 +-
 src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp        |  2 +-
 src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp      |  2 +-
 src/core/CL/kernels/CLWinogradInputTransformKernel.cpp       |  6 ++----
 src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp      |  4 ++--
 88 files changed, 178 insertions(+), 180 deletions(-)

(limited to 'src/core/CL/kernels')

diff --git a/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp b/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp
index 685b8e234e..0c1206adfb 100644
--- a/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp
+++ b/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -81,7 +81,7 @@ void CLAbsoluteDifferenceKernel::configure(const ICLTensor *input1, const ICLTen
 
     output_access.set_valid_region(win, valid_region);
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLAbsoluteDifferenceKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp
index d8bd2f7ee1..a15e99b8d4 100644
--- a/src/core/CL/kernels/CLActivationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp
@@ -179,7 +179,7 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act
     // Configure kernel window
     auto win_config = validate_and_configure_window(input->info(), (_run_in_place) ? nullptr : output->info());
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    ICLKernel::configure(win_config.second);
+    ICLKernel::configure_internal(win_config.second);
 
     // Set config_id for enabling LWS tuning
     _config_id = "activation_layer_";
@@ -215,7 +215,7 @@ void CLActivationLayerKernel::run(const Window &window, cl::CommandQueue &queue)
         {
             add_3D_tensor_argument(idx, _output, slice);
         }
-        enqueue(queue, *this, slice, _lws_hint);
+        enqueue(queue, *this, slice, lws_hint());
     }
     while(collapsed.slide_window_slice_3D(slice));
 }
diff --git a/src/core/CL/kernels/CLArithmeticAdditionKernel.cpp b/src/core/CL/kernels/CLArithmeticAdditionKernel.cpp
index 6d6cb6f98c..2372d458cf 100644
--- a/src/core/CL/kernels/CLArithmeticAdditionKernel.cpp
+++ b/src/core/CL/kernels/CLArithmeticAdditionKernel.cpp
@@ -154,7 +154,7 @@ void CLArithmeticAdditionKernel::configure(const ICLTensor *input1, const ICLTen
     // Create kernel
     _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts));
 
-    ICLKernel::configure(win_config.second);
+    ICLKernel::configure_internal(win_config.second);
 }
 
 Status CLArithmeticAdditionKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy)
diff --git a/src/core/CL/kernels/CLArithmeticDivisionKernel.cpp b/src/core/CL/kernels/CLArithmeticDivisionKernel.cpp
index 9bd0da15a3..e995ba1a41 100644
--- a/src/core/CL/kernels/CLArithmeticDivisionKernel.cpp
+++ b/src/core/CL/kernels/CLArithmeticDivisionKernel.cpp
@@ -121,7 +121,7 @@ void CLArithmeticDivisionKernel::configure(const ICLTensor *input1, const ICLTen
     // Create kernel
     _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("arithmetic_div", build_opts));
 
-    ICLKernel::configure(win_config.second);
+    ICLKernel::configure_internal(win_config.second);
 }
 
 Status CLArithmeticDivisionKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
diff --git a/src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp b/src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp
index aeee6022a7..299ac553e9 100644
--- a/src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp
+++ b/src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp
@@ -127,7 +127,7 @@ void CLArithmeticSubtractionKernel::configure(const ICLTensor *input1, const ICL
     // Configure kernel window
     auto win_config = validate_and_configure_window(input1->info(), input2->info(), output->info());
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    ICLKernel::configure(win_config.second);
+    ICLKernel::configure_internal(win_config.second);
 }
 
 Status CLArithmeticSubtractionKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy)
diff --git a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp
index 4c93fb28bf..d4a72076c1 100644
--- a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp
@@ -189,7 +189,7 @@ void CLBatchNormalizationLayerKernel::configure(ICLTensor *input, ICLTensor *out
                                                     (beta != nullptr) ? beta->info() : nullptr,
                                                     (gamma != nullptr) ? gamma->info() : nullptr);
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    ICLKernel::configure(win_config.second);
+    ICLKernel::configure_internal(win_config.second);
 
     _config_id = "batch_normalization_layer_";
     _config_id += string_from_data_layout(input->info()->data_layout());
@@ -252,7 +252,7 @@ void CLBatchNormalizationLayerKernel::run(const Window &window, cl::CommandQueue
         {
             add_3D_tensor_argument(idx, _output, slice);
         }
-        enqueue(queue, *this, slice, _lws_hint);
+        enqueue(queue, *this, slice, lws_hint());
     }
     while(window.slide_window_slice_3D(slice));
 }
diff --git a/src/core/CL/kernels/CLBitwiseAndKernel.cpp b/src/core/CL/kernels/CLBitwiseAndKernel.cpp
index 5ea4a86da5..dd301cd02e 100644
--- a/src/core/CL/kernels/CLBitwiseAndKernel.cpp
+++ b/src/core/CL/kernels/CLBitwiseAndKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -66,7 +66,7 @@ void CLBitwiseAndKernel::configure(const ICLTensor *input1, const ICLTensor *inp
 
     output_access.set_valid_region(win, valid_region);
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLBitwiseAndKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLBitwiseOrKernel.cpp b/src/core/CL/kernels/CLBitwiseOrKernel.cpp
index 2eeef0a993..aa84618258 100644
--- a/src/core/CL/kernels/CLBitwiseOrKernel.cpp
+++ b/src/core/CL/kernels/CLBitwiseOrKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -67,7 +67,7 @@ void CLBitwiseOrKernel::configure(const ICLTensor *input1, const ICLTensor *inpu
 
     output_access.set_valid_region(win, valid_region);
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLBitwiseOrKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLBitwiseXorKernel.cpp b/src/core/CL/kernels/CLBitwiseXorKernel.cpp
index c19a78e1c4..ad1f923253 100644
--- a/src/core/CL/kernels/CLBitwiseXorKernel.cpp
+++ b/src/core/CL/kernels/CLBitwiseXorKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -67,7 +67,7 @@ void CLBitwiseXorKernel::configure(const ICLTensor *input1, const ICLTensor *inp
 
     output_access.set_valid_region(win, valid_region);
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLBitwiseXorKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLBox3x3Kernel.cpp b/src/core/CL/kernels/CLBox3x3Kernel.cpp
index 0299f6233c..b81697f778 100644
--- a/src/core/CL/kernels/CLBox3x3Kernel.cpp
+++ b/src/core/CL/kernels/CLBox3x3Kernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -73,5 +73,5 @@ void CLBox3x3Kernel::configure(const ICLTensor *input, ICLTensor *output, bool b
 
     output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
diff --git a/src/core/CL/kernels/CLCannyEdgeKernel.cpp b/src/core/CL/kernels/CLCannyEdgeKernel.cpp
index 5d06d34631..94e5e230f9 100644
--- a/src/core/CL/kernels/CLCannyEdgeKernel.cpp
+++ b/src/core/CL/kernels/CLCannyEdgeKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -77,7 +77,7 @@ void CLGradientKernel::configure(const ICLTensor *gx, const ICLTensor *gy, ICLTe
     mag_access.set_valid_region(win, _gx->info()->valid_region());
     phase_access.set_valid_region(win, _gx->info()->valid_region());
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLGradientKernel::run(const Window &window, cl::CommandQueue &queue)
@@ -145,7 +145,7 @@ void CLEdgeNonMaxSuppressionKernel::configure(const ICLTensor *magnitude, const
 
     output_access.set_valid_region(win, _magnitude->info()->valid_region(), border_undefined, border_size());
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLEdgeNonMaxSuppressionKernel::run(const Window &window, cl::CommandQueue &queue)
@@ -230,7 +230,7 @@ void CLEdgeTraceKernel::configure(const ICLTensor *input, ICLTensor *output, int
     l1_stack_access.set_valid_region(win, _input->info()->valid_region());
     l1_stack_counter_access.set_valid_region(win, _input->info()->valid_region());
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLEdgeTraceKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLChannelCombineKernel.cpp b/src/core/CL/kernels/CLChannelCombineKernel.cpp
index 6e55e666ee..c7b1da41dc 100644
--- a/src/core/CL/kernels/CLChannelCombineKernel.cpp
+++ b/src/core/CL/kernels/CLChannelCombineKernel.cpp
@@ -128,7 +128,7 @@ void CLChannelCombineKernel::configure(const ICLTensor *plane0, const ICLTensor
     }
     output_access.set_valid_region(win, ValidRegion(valid_region.anchor, output->info()->tensor_shape()));
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLChannelCombineKernel::configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output)
@@ -232,7 +232,7 @@ void CLChannelCombineKernel::configure(const ICLImage *plane0, const ICLImage *p
     output_plane1_access.set_valid_region(win, ValidRegion(output_plane1_region.anchor, output->plane(1)->info()->tensor_shape()));
     output_plane2_access.set_valid_region(win, ValidRegion(plane2->info()->valid_region().anchor, output->plane(2)->info()->tensor_shape()));
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLChannelCombineKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLChannelExtractKernel.cpp b/src/core/CL/kernels/CLChannelExtractKernel.cpp
index 65843b8d5d..8bddba837a 100644
--- a/src/core/CL/kernels/CLChannelExtractKernel.cpp
+++ b/src/core/CL/kernels/CLChannelExtractKernel.cpp
@@ -101,7 +101,7 @@ void CLChannelExtractKernel::configure(const ICLTensor *input, Channel channel,
     ValidRegion input_valid_region = input->info()->valid_region();
     output_access.set_valid_region(win, ValidRegion(input_valid_region.anchor, output->info()->tensor_shape()));
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLChannelExtractKernel::configure(const ICLMultiImage *input, Channel channel, ICLImage *output)
@@ -162,7 +162,7 @@ void CLChannelExtractKernel::configure(const ICLMultiImage *input, Channel chann
 
     output_access.set_valid_region(win, input_plane->info()->valid_region());
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLChannelExtractKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp b/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp
index 5f0f0aebf8..be4d68770d 100644
--- a/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp
+++ b/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp
@@ -125,7 +125,7 @@ void CLChannelShuffleLayerKernel::configure(const ICLTensor *input, ICLTensor *o
     // Configure kernel window
     auto win_config = validate_and_configure_window(input->info(), output->info());
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    ICLKernel::configure(win_config.second);
+    ICLKernel::configure_internal(win_config.second);
 }
 
 Status CLChannelShuffleLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_groups)
diff --git a/src/core/CL/kernels/CLCol2ImKernel.cpp b/src/core/CL/kernels/CLCol2ImKernel.cpp
index 6274c9082a..6fd3be7f6a 100644
--- a/src/core/CL/kernels/CLCol2ImKernel.cpp
+++ b/src/core/CL/kernels/CLCol2ImKernel.cpp
@@ -111,7 +111,7 @@ void CLCol2ImKernel::configure(const ICLTensor *input, ICLTensor *output, std::p
     // Configure kernel window
     auto win_config = validate_and_configure_window(input->info(), output->info(), _convolved_dims);
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    ICLKernel::configure(win_config.second);
+    ICLKernel::configure_internal(win_config.second);
 
     // Set config_id for enabling LWS tuning
     _config_id = "col2im_";
@@ -156,7 +156,7 @@ void CLCol2ImKernel::run(const Window &window, cl::CommandQueue &queue)
         unsigned int idx = 0;
         add_2D_tensor_argument(idx, _input, slice);
         add_3D_tensor_argument(idx, _output, slice_out);
-        enqueue(queue, *this, slice, _lws_hint);
+        enqueue(queue, *this, slice, lws_hint());
     }
     while(collapsed_window.slide_window_slice_2D(slice) && out_window.slide_window_slice_3D(slice_out));
 }
diff --git a/src/core/CL/kernels/CLColorConvertKernel.cpp b/src/core/CL/kernels/CLColorConvertKernel.cpp
index 2b894989e1..e79019eab9 100644
--- a/src/core/CL/kernels/CLColorConvertKernel.cpp
+++ b/src/core/CL/kernels/CLColorConvertKernel.cpp
@@ -120,7 +120,7 @@ void CLColorConvertKernel::configure(const ICLTensor *input, ICLTensor *output)
 
     output_access.set_valid_region(win, input->info()->valid_region());
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLColorConvertKernel::configure(const ICLMultiImage *input, ICLImage *output)
@@ -189,7 +189,7 @@ void CLColorConvertKernel::configure(const ICLMultiImage *input, ICLImage *outpu
                                                            input->plane(2)->info()->valid_region());
     output_access.set_valid_region(win, ValidRegion(intersect_region.anchor, output->info()->tensor_shape()));
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *output)
@@ -285,7 +285,7 @@ void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *outpu
     output_plane1_access.set_valid_region(win, ValidRegion(input_region.anchor, output->plane(1)->info()->tensor_shape()));
     output_plane2_access.set_valid_region(win, ValidRegion(input_region.anchor, output->plane(2)->info()->tensor_shape()));
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLColorConvertKernel::configure(const ICLMultiImage *input, ICLMultiImage *output)
@@ -369,7 +369,7 @@ void CLColorConvertKernel::configure(const ICLMultiImage *input, ICLMultiImage *
     output_plane1_access.set_valid_region(win, ValidRegion(intersect_region.anchor, output->plane(1)->info()->tensor_shape()));
     output_plane2_access.set_valid_region(win, ValidRegion(intersect_region.anchor, output->plane(2)->info()->tensor_shape()));
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLColorConvertKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp
index 69ab590540..ace3fd5840 100644
--- a/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp
+++ b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp
@@ -73,7 +73,7 @@ void CLConvertFullyConnectedWeightsKernel::configure(const ICLTensor *input, ICL
 
     // Configure kernel window
     Window win = calculate_max_window(*input->info(), Steps());
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 Status CLConvertFullyConnectedWeightsKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape,
@@ -109,4 +109,4 @@ void CLConvertFullyConnectedWeightsKernel::run(const Window &window, cl::Command
     add_2D_tensor_argument(idx, _output, window);
     enqueue(queue, *this, window);
 }
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLConvolutionKernel.cpp b/src/core/CL/kernels/CLConvolutionKernel.cpp
index 2b08c8dfba..e6777938a2 100644
--- a/src/core/CL/kernels/CLConvolutionKernel.cpp
+++ b/src/core/CL/kernels/CLConvolutionKernel.cpp
@@ -105,7 +105,7 @@ void CLConvolutionKernel<matrix_size>::configure(const ICLTensor *input, ICLTens
 
     output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 /****************************************************************************************\
@@ -167,7 +167,7 @@ void CLSeparableConvolutionHorKernel<matrix_size>::configure(const ICLTensor *in
 
     output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 template <unsigned int matrix_size>
@@ -226,7 +226,7 @@ void CLSeparableConvolutionVertKernel<matrix_size>::configure(const ICLTensor *i
 
     output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 /****************************************************************************************\
@@ -298,7 +298,7 @@ void CLConvolutionRectangleKernel::configure(const ICLTensor *input, ICLTensor *
 
     output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLConvolutionRectangleKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLCopyKernel.cpp b/src/core/CL/kernels/CLCopyKernel.cpp
index 1fc8b5bfbe..2da67d2666 100644
--- a/src/core/CL/kernels/CLCopyKernel.cpp
+++ b/src/core/CL/kernels/CLCopyKernel.cpp
@@ -95,7 +95,7 @@ void CLCopyKernel::configure(const ICLTensor *input, ICLTensor *output)
     // Configure kernel window
     auto win_config = validate_and_configure_window(input->info(), output->info());
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    ICLKernel::configure(win_config.second);
+    ICLKernel::configure_internal(win_config.second);
 }
 
 Status CLCopyKernel::validate(const arm_compute::ITensorInfo *input, const arm_compute::ITensorInfo *output)
diff --git a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp
index 1feac7d815..c6a0031f4a 100644
--- a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp
+++ b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp
@@ -85,7 +85,7 @@ void CLDeconvolutionLayerUpsampleKernel::configure(const ICLTensor *input, ICLTe
     AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
     output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLDeconvolutionLayerUpsampleKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp
index 4055d1c7ab..40023948b1 100644
--- a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp
@@ -125,7 +125,7 @@ void CLDepthConcatenateLayerKernel::configure(const ICLTensor *input, unsigned i
     auto win_config = validate_and_configure_window(input->info(), depth_offset, output->info());
     ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
 
-    ICLKernel::configure(std::get<1>(win_config));
+    ICLKernel::configure_internal(std::get<1>(win_config));
 }
 
 Status CLDepthConcatenateLayerKernel::validate(const arm_compute::ITensorInfo *input,
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
index e091e5c2cb..a40aa2856c 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
@@ -280,7 +280,7 @@ void CLDepthwiseConvolutionLayer3x3NCHWKernel::configure(const ICLTensor *input,
 
     auto win_config = validate_and_configure_window(input->info(), weights->info(), output->info(), conv_info, depth_multiplier, gpu_target, kernel_name);
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    ICLKernel::configure(win_config.second);
+    ICLKernel::configure_internal(win_config.second);
 
     _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
 
@@ -345,7 +345,7 @@ void CLDepthwiseConvolutionLayer3x3NCHWKernel::run(const Window &window, cl::Com
         add_3D_tensor_argument(idx, _output, slice_out);
         add_3D_tensor_argument(idx, _weights, slice_weights);
 
-        enqueue(queue, *this, slice_out, _lws_hint);
+        enqueue(queue, *this, slice_out, lws_hint());
     }
     while(window.slide_window_slice_3D(slice_out) && win_in.slide_window_slice_3D(slice_in));
 }
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
index 610bfb51dd..63c350d9a5 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
@@ -244,7 +244,7 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(const ICLTensor *input,
     // Configure kernel window
     auto win_config = validate_and_configure_window(input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(), conv_info);
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    ICLKernel::configure(win_config.second);
+    ICLKernel::configure_internal(win_config.second);
 
     // Set config_id for enabling LWS tuning
     _config_id = kernel_name;
@@ -314,7 +314,7 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::run(const Window &window, cl::Com
         add_3D_tensor_argument(idx, _output, slice_out);
         add_3D_tensor_argument(idx, _weights, slice_out);
 
-        enqueue(queue, *this, slice_out, _lws_hint);
+        enqueue(queue, *this, slice_out, lws_hint());
     }
     while(window.slide_window_slice_3D(slice_out) && win_in.slide_window_slice_3D(slice_in));
 }
diff --git a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp
index cab943629a..d5c333a2c1 100644
--- a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp
@@ -101,7 +101,7 @@ void CLDepthwiseIm2ColKernel::configure(const ICLTensor *input, ICLTensor *outpu
     // CLDepthwiseIm2ColKernel doesn't need padding so update_window_and_padding() can be skipped
     output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 Status CLDepthwiseIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, unsigned int depth_multiplier)
@@ -135,7 +135,7 @@ void CLDepthwiseIm2ColKernel::run(const Window &window, cl::CommandQueue &queue)
         unsigned int idx = 0;
         add_3D_tensor_argument(idx, _input, slice_in);
         add_3D_tensor_argument(idx, _output, slice);
-        enqueue(queue, *this, slice, _lws_hint);
+        enqueue(queue, *this, slice, lws_hint());
     }
     while(window.slide_window_slice_3D(slice) && window.slide_window_slice_3D(slice_in));
 }
diff --git a/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp b/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp
index 67b2cc9f55..cdc27e8ab1 100644
--- a/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp
@@ -87,7 +87,7 @@ void CLDepthwiseVectorToTensorKernel::configure(const ICLTensor *input, ICLTenso
     // The CLDepthwisevectorToTensorKernel doesn't need padding so update_window_and_padding() can be skipped
     output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 Status CLDepthwiseVectorToTensorKernel::validate(const ITensorInfo *input, const ITensorInfo *output, size_t conv_w, size_t conv_h)
diff --git a/src/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.cpp b/src/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.cpp
index c28be3fccf..683dda8d67 100644
--- a/src/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.cpp
@@ -95,7 +95,7 @@ void CLDepthwiseWeightsReshapeKernel::configure(const ICLTensor *input, ICLTenso
     // The CLDepthwiseWeightsReshapeKernel doesn't need padding so update_window_and_padding() can be skipped
     output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 Status CLDepthwiseWeightsReshapeKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *biases)
diff --git a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
index fba721f50b..d4c1bec5f4 100644
--- a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
@@ -96,7 +96,7 @@ void CLDequantizationLayerKernel::configure(const ICLTensor *input, ICLTensor *o
 
     ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
 
-    ICLKernel::configure(std::get<1>(win_config));
+    ICLKernel::configure_internal(std::get<1>(win_config));
 }
 
 Status CLDequantizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *min_max)
diff --git a/src/core/CL/kernels/CLDerivativeKernel.cpp b/src/core/CL/kernels/CLDerivativeKernel.cpp
index 5bfe75140b..af7df14359 100644
--- a/src/core/CL/kernels/CLDerivativeKernel.cpp
+++ b/src/core/CL/kernels/CLDerivativeKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -117,7 +117,7 @@ void CLDerivativeKernel::configure(const ICLTensor *input, ICLTensor *output_x,
     output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
     output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLDerivativeKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLDilateKernel.cpp b/src/core/CL/kernels/CLDilateKernel.cpp
index 3abd747011..89853d7b19 100644
--- a/src/core/CL/kernels/CLDilateKernel.cpp
+++ b/src/core/CL/kernels/CLDilateKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -61,5 +61,5 @@ void CLDilateKernel::configure(const ICLTensor *input, ICLTensor *output, bool b
 
     output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
diff --git a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
index 754f0d8f23..6de97d40af 100644
--- a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
@@ -442,7 +442,7 @@ void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICL
     // Configure kernel window
     auto win_config = validate_and_configure_window(input->info(), weights->info(), output->info(), conv_info, gpu_target);
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    ICLKernel::configure(win_config.second);
+    ICLKernel::configure_internal(win_config.second);
 
     // Set static kernel arguments
     if(is_data_type_quantized_asymmetric(data_type))
@@ -532,7 +532,7 @@ void CLDirectConvolutionLayerKernel::run(const Window &window, cl::CommandQueue
         unsigned int idx = 0;
         add_3D_tensor_argument(idx, _input, slice_in);
         add_3D_tensor_argument(idx, _output, slice);
-        enqueue(queue, *this, slice, _lws_hint);
+        enqueue(queue, *this, slice, lws_hint());
     }
     while(window.slide_window_slice_3D(slice) && win_in.slide_window_slice_3D(slice_in));
 }
diff --git a/src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp
index 4e2352cf6e..5f4dacb269 100644
--- a/src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp
+++ b/src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp
@@ -168,7 +168,7 @@ void CLDirectConvolutionLayerOutputStageKernel::configure(ICLTensor *input, cons
     // Configure kernel window
     auto win_config = validate_and_configure_window(input->info(), (bias == nullptr) ? nullptr : bias->info(), (output == nullptr) ? nullptr : output->info());
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    ICLKernel::configure(win_config.second);
+    ICLKernel::configure_internal(win_config.second);
 }
 
 Status CLDirectConvolutionLayerOutputStageKernel::validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output)
@@ -202,7 +202,7 @@ void CLDirectConvolutionLayerOutputStageKernel::run(const Window &window, cl::Co
         unsigned int idx = 0;
         add_3D_tensor_argument(idx, _input, slice);
         add_3D_tensor_argument(idx, _output, slice);
-        enqueue(queue, *this, slice, _lws_hint);
+        enqueue(queue, *this, slice, lws_hint());
     }
     while(window.slide_window_slice_3D(slice));
 }
diff --git a/src/core/CL/kernels/CLErodeKernel.cpp b/src/core/CL/kernels/CLErodeKernel.cpp
index a7aa88fc5c..e56b71a75e 100644
--- a/src/core/CL/kernels/CLErodeKernel.cpp
+++ b/src/core/CL/kernels/CLErodeKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -61,5 +61,5 @@ void CLErodeKernel::configure(const ICLTensor *input, ICLTensor *output, bool bo
 
     output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
diff --git a/src/core/CL/kernels/CLFastCornersKernel.cpp b/src/core/CL/kernels/CLFastCornersKernel.cpp
index 616e41b5fc..782ab7a7c0 100644
--- a/src/core/CL/kernels/CLFastCornersKernel.cpp
+++ b/src/core/CL/kernels/CLFastCornersKernel.cpp
@@ -87,7 +87,7 @@ void CLFastCornersKernel::configure(const ICLImage *input, ICLImage *output, flo
 
     output_access.set_valid_region(win, input->info()->valid_region(), border_mode == BorderMode::UNDEFINED, border_size());
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLFastCornersKernel::run(const Window &window, cl::CommandQueue &queue)
@@ -148,7 +148,7 @@ void CLCopyToArrayKernel::configure(const ICLImage *input, bool update_number, I
     Window                 win                               = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
     update_window_and_padding(win,
                               AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration));
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLCopyToArrayKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLFillBorderKernel.cpp b/src/core/CL/kernels/CLFillBorderKernel.cpp
index 3b1edaf46c..baf6bb6024 100644
--- a/src/core/CL/kernels/CLFillBorderKernel.cpp
+++ b/src/core/CL/kernels/CLFillBorderKernel.cpp
@@ -154,7 +154,7 @@ void CLFillBorderKernel::configure(ICLTensor *tensor, BorderSize border_size, Bo
     win.set(Window::DimX, Window::Dimension(0, total_valid_width + valid_height));
     win.set(Window::DimY, Window::Dimension(0, 1, 1));
     win.use_tensor_dimensions(tensor->info()->tensor_shape(), Window::DimZ);
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLFillBorderKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLFlattenLayerKernel.cpp b/src/core/CL/kernels/CLFlattenLayerKernel.cpp
index 0b5feffcc9..17189143ef 100644
--- a/src/core/CL/kernels/CLFlattenLayerKernel.cpp
+++ b/src/core/CL/kernels/CLFlattenLayerKernel.cpp
@@ -101,7 +101,7 @@ void CLFlattenLayerKernel::configure(const ICLTensor *input, ICLTensor *output)
     // Configure kernel window
     auto win_config = validate_and_configure_window(input->info(), output->info());
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    ICLKernel::configure(win_config.second);
+    ICLKernel::configure_internal(win_config.second);
 
     // Set config_id for enabling LWS tuning
     _config_id = "flatten";
@@ -144,8 +144,8 @@ void CLFlattenLayerKernel::run(const Window &window, cl::CommandQueue &queue)
         unsigned int idx = 0;
         add_3D_tensor_argument(idx, _input, in_slice);
         add_1D_tensor_argument(idx, _output, out_slice);
-        enqueue(queue, *this, in_slice, _lws_hint);
+        enqueue(queue, *this, in_slice, lws_hint());
     }
     while(window.slide_window_slice_3D(in_slice) && out_window.slide_window_slice_1D(out_slice));
 }
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLFloorKernel.cpp b/src/core/CL/kernels/CLFloorKernel.cpp
index f6b0e829a0..20e3a3a66f 100644
--- a/src/core/CL/kernels/CLFloorKernel.cpp
+++ b/src/core/CL/kernels/CLFloorKernel.cpp
@@ -69,7 +69,7 @@ void CLFloorKernel::configure(const ICLTensor *input, ICLTensor *output)
     update_window_and_padding(win, input_access, output_access);
     output_access.set_valid_region(win, input->info()->valid_region());
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLFloorKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp b/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp
index 6ea1160c69..ae54e77972 100644
--- a/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp
+++ b/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp
@@ -157,7 +157,7 @@ void CLGEMMInterleave4x4Kernel::configure(const ICLTensor *input, ICLTensor *out
     // Configure kernel window
     auto win_config = validate_and_configure_window(input->info(), output->info(), mult_interleave4x4_height, reinterpret_input_as_3d);
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    ICLKernel::configure(win_config.second);
+    ICLKernel::configure_internal(win_config.second);
 
     // Set config_id for enabling LWS tuning
     _config_id = "interleave4x4_";
@@ -210,7 +210,7 @@ void CLGEMMInterleave4x4Kernel::run(const Window &window, cl::CommandQueue &queu
         unsigned int idx = 0;
         add_3D_tensor_argument(idx, _input, slice);
         add_3D_tensor_argument(idx, _output, slice);
-        enqueue(queue, *this, slice, _lws_hint);
+        enqueue(queue, *this, slice, lws_hint());
     }
     while(window.slide_window_slice_3D(slice));
 }
diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp
index 8a4a1b5820..9adf95fa33 100644
--- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp
@@ -188,7 +188,7 @@ void CLGEMMLowpMatrixMultiplyKernel::configure(const ICLTensor *input0, const IC
     // Configure kernel window
     auto win_config = validate_and_configure_window(input0->info(), input1->info(), output->info(), is_interleaved_transposed, num_elements_processed);
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    ICLKernel::configure(win_config.second);
+    ICLKernel::configure_internal(win_config.second);
 
     const bool is_dot8_supported = dot8_supported(CLKernelLibrary::get().get_device());
 
@@ -273,7 +273,7 @@ void CLGEMMLowpMatrixMultiplyKernel::run(const Window &window, cl::CommandQueue
         add_2D_tensor_argument(idx, _input0, slice);
         add_2D_tensor_argument(idx, _input1, slice_b);
         add_2D_tensor_argument(idx, _output, slice);
-        enqueue(queue, *this, slice, _lws_hint);
+        enqueue(queue, *this, slice, lws_hint());
     }
     while(window.slide_window_slice_2D(slice));
 }
diff --git a/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp b/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp
index 221a1566b9..aa954abde1 100644
--- a/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp
@@ -159,7 +159,7 @@ void CLGEMMLowpOffsetContributionKernel::configure(ICLTensor *mm_result, const I
                                                     vector_sum_row != nullptr ? vector_sum_row->info() : nullptr,
                                                     a_offset, b_offset); // NOLINT
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    ICLKernel::configure(win_config.second);
+    ICLKernel::configure_internal(win_config.second);
 
     // Set config_id for enabling LWS tuning
     _config_id = "gemmlowp_offset_contribution_";
diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp
index ff2fc646aa..875e26d6cb 100644
--- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -146,7 +146,7 @@ void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::configure(const
     // Configure kernel window
     auto win_config = validate_and_configure_window(input->info(), (bias != nullptr) ? bias->info() : nullptr, output->info());
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    ICLKernel::configure(win_config.second);
+    ICLKernel::configure_internal(win_config.second);
 }
 
 void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::run(const Window &window, cl::CommandQueue &queue)
@@ -174,4 +174,4 @@ void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::run(const Window
         enqueue(queue, *this, slice);
     }
     while(collapsed.slide_window_slice_3D(slice));
-}
\ No newline at end of file
+}
diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp
index 151a6588d5..57891131c7 100644
--- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -145,7 +145,7 @@ void CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel::configure(const ICLTensor *i
     // Configure kernel window
     auto win_config = validate_and_configure_window(input->info(), (bias != nullptr) ? bias->info() : nullptr, output->info());
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    ICLKernel::configure(win_config.second);
+    ICLKernel::configure_internal(win_config.second);
 }
 
 void CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp b/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp
index 6951512167..cd26cd1597 100644
--- a/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -121,7 +121,7 @@ void CLGEMMLowpMatrixAReductionKernel::configure(const ICLTensor *mtx_a, ICLTens
     // Configure kernel window
     auto win_config = validate_and_configure_window_matrix_a_reduction(_input->info(), _output->info());
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    ICLKernel::configure(win_config.second);
+    ICLKernel::configure_internal(win_config.second);
 }
 
 Status CLGEMMLowpMatrixAReductionKernel::validate(const ITensorInfo *mtx_a, const ITensorInfo *vector_sum_row)
@@ -175,7 +175,7 @@ void CLGEMMLowpMatrixBReductionKernel::configure(const ICLTensor *mtx_b, ICLTens
     // Configure kernel window
     auto win_config = validate_and_configure_window_matrix_b_reduction(_input->info(), _output->info());
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    ICLKernel::configure(win_config.second);
+    ICLKernel::configure_internal(win_config.second);
 }
 
 Status CLGEMMLowpMatrixBReductionKernel::validate(const ITensorInfo *mtx_b, const ITensorInfo *vector_sum_col)
diff --git a/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp
index ebe4013bf0..2f1f1bf865 100644
--- a/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp
@@ -88,7 +88,7 @@ void CLGEMMMatrixAccumulateBiasesKernel::configure(ICLTensor *accum, const ICLTe
     // Configure kernel window
     auto win_config = validate_and_configure_window(accum->info(), biases->info(), gpu_target, vector_size);
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    ICLKernel::configure(win_config.second);
+    ICLKernel::configure_internal(win_config.second);
 
     // Add build options
     CLBuildOptions build_opts;
@@ -126,7 +126,7 @@ void CLGEMMMatrixAccumulateBiasesKernel::run(const Window &window, cl::CommandQu
         add_2D_tensor_argument(idx, _accum, accum_slice);
         add_1D_tensor_argument(idx, _biases, biases_slice);
 
-        enqueue(queue, *this, accum_slice, _lws_hint);
+        enqueue(queue, *this, accum_slice, lws_hint());
     }
     while(window.slide_window_slice_2D(accum_slice));
 }
diff --git a/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp
index bcc3a01296..0c65bb40c0 100644
--- a/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp
@@ -98,7 +98,7 @@ void CLGEMMMatrixAdditionKernel::configure(const ICLTensor *input, ICLTensor *ou
     // Configure kernel window
     auto win_config = validate_and_configure_window(input->info(), output->info());
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    ICLKernel::configure(win_config.second);
+    ICLKernel::configure_internal(win_config.second);
 }
 
 Status CLGEMMMatrixAdditionKernel::validate(const ITensorInfo *input, const ITensorInfo *output, float beta)
diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
index 79e2f8b11a..8530ed2fd3 100644
--- a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
@@ -253,7 +253,7 @@ void CLGEMMMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTen
     // Configure kernel window
     auto win_config = validate_and_configure_window(input0->info(), input1->info(), output->info(), is_interleaved_transposed, reshape_info, gpu_target, num_elements_processed);
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    ICLKernel::configure(win_config.second);
+    ICLKernel::configure_internal(win_config.second);
 
     // Create build options
     CLBuildOptions build_opts;
@@ -316,7 +316,7 @@ void CLGEMMMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTen
 
             // The work-group size equal to the Bifrost quad size has been proved to be optimal for these kernels
             // via exhaustive autotuning over a range of representative layer configurations.
-            _lws_hint = cl::NDRange(4);
+            set_lws_hint(cl::NDRange(4));
         }
         else // (MIDGARD and F32) or (F16)
         {
@@ -416,7 +416,7 @@ void CLGEMMMatrixMultiplyKernel::run(const Window &window, cl::CommandQueue &que
         _kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(_input0->info()->strides_in_bytes()[2]));
         _kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(_input1->info()->strides_in_bytes()[2]));
         _kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(_output->info()->strides_in_bytes()[2]));
-        enqueue(queue, *this, slice, _lws_hint);
+        enqueue(queue, *this, slice, lws_hint());
     }
     while(window.slide_window_slice_3D(slice));
 }
diff --git a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp
index 43a6cf25db..11a4292270 100644
--- a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp
@@ -121,7 +121,7 @@ void CLGEMMMatrixVectorMultiplyKernel::configure(const ICLTensor *input0, const
 
     auto win_config = validate_and_configure_window(input0->info(), input1->info(), output->info());
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    ICLKernel::configure(win_config.second);
+    ICLKernel::configure_internal(win_config.second);
 }
 
 Status CLGEMMMatrixVectorMultiplyKernel::validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output)
@@ -165,7 +165,7 @@ void CLGEMMMatrixVectorMultiplyKernel::run(const Window &window, cl::CommandQueu
         unsigned int idx_2 = num_arguments_per_3D_tensor() + num_arguments_per_2D_tensor();
         add_3D_tensor_argument(idx_0, _input0, slice_in);
         add_1D_tensor_argument(idx_2, _output, slice_out);
-        enqueue(queue, *this, slice_in, _lws_hint);
+        enqueue(queue, *this, slice_in, lws_hint());
     }
     while(window.slide_window_slice_3D(slice_in) && window.slide_window_slice_3D(slice_out));
 }
diff --git a/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp b/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp
index 7e44fa7118..5b299052d4 100644
--- a/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp
@@ -107,7 +107,7 @@ void CLGEMMTranspose1xWKernel::configure(const ICLTensor *input, ICLTensor *outp
     unsigned int num_elems_processed_per_iteration = 1;
     auto         win_config                        = validate_and_configure_window(input->info(), output->info(), num_elems_processed_per_iteration, mult_transpose1xW_width);
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    ICLKernel::configure(win_config.second);
+    ICLKernel::configure_internal(win_config.second);
 
     // Create build options
     CLBuildOptions build_opts;
@@ -157,7 +157,7 @@ void CLGEMMTranspose1xWKernel::run(const Window &window, cl::CommandQueue &queue
         unsigned int idx = 0;
         add_3D_tensor_argument(idx, _input, in_slice);
         add_3D_tensor_argument(idx, _output, out_slice);
-        enqueue(queue, *this, in_slice, _lws_hint);
+        enqueue(queue, *this, in_slice, lws_hint());
     }
     while(window.slide_window_slice_3D(in_slice) && out_window.slide_window_slice_3D(out_slice));
 }
diff --git a/src/core/CL/kernels/CLGaussian3x3Kernel.cpp b/src/core/CL/kernels/CLGaussian3x3Kernel.cpp
index e5bc3f9656..7e8f3139f2 100644
--- a/src/core/CL/kernels/CLGaussian3x3Kernel.cpp
+++ b/src/core/CL/kernels/CLGaussian3x3Kernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -72,5 +72,5 @@ void CLGaussian3x3Kernel::configure(const ICLTensor *input, ICLTensor *output, b
 
     output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
diff --git a/src/core/CL/kernels/CLGaussianPyramidKernel.cpp b/src/core/CL/kernels/CLGaussianPyramidKernel.cpp
index a4fda364e3..6b729c8585 100644
--- a/src/core/CL/kernels/CLGaussianPyramidKernel.cpp
+++ b/src/core/CL/kernels/CLGaussianPyramidKernel.cpp
@@ -95,7 +95,7 @@ void CLGaussianPyramidHorKernel::configure(const ICLTensor *input, ICLTensor *ou
 
     output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLGaussianPyramidHorKernel::run(const Window &window, cl::CommandQueue &queue)
@@ -177,7 +177,7 @@ void CLGaussianPyramidVertKernel::configure(const ICLTensor *input, ICLTensor *o
 
     output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLGaussianPyramidVertKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLHOGDescriptorKernel.cpp b/src/core/CL/kernels/CLHOGDescriptorKernel.cpp
index a15aab1f37..26c3b81175 100644
--- a/src/core/CL/kernels/CLHOGDescriptorKernel.cpp
+++ b/src/core/CL/kernels/CLHOGDescriptorKernel.cpp
@@ -91,7 +91,7 @@ void CLHOGOrientationBinningKernel::configure(const ICLTensor *input_magnitude,
 
     output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLHOGOrientationBinningKernel::run(const Window &window, cl::CommandQueue &queue)
@@ -174,7 +174,7 @@ void CLHOGBlockNormalizationKernel::configure(const ICLTensor *input, ICLTensor
 
     output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLHOGBlockNormalizationKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLHOGDetectorKernel.cpp b/src/core/CL/kernels/CLHOGDetectorKernel.cpp
index caca49846f..12bbbaf9f2 100644
--- a/src/core/CL/kernels/CLHOGDetectorKernel.cpp
+++ b/src/core/CL/kernels/CLHOGDetectorKernel.cpp
@@ -110,7 +110,7 @@ void CLHOGDetectorKernel::configure(const ICLTensor *input, const ICLHOG *hog, I
 
     update_window_and_padding(win, AccessWindowRectangle(input->info(), 0, 0, num_elems_read_per_iteration, num_rows_read_per_iteration));
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLHOGDetectorKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLHarrisCornersKernel.cpp b/src/core/CL/kernels/CLHarrisCornersKernel.cpp
index 1f757fe34c..5320b6bebc 100644
--- a/src/core/CL/kernels/CLHarrisCornersKernel.cpp
+++ b/src/core/CL/kernels/CLHarrisCornersKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -106,7 +106,7 @@ void CLHarrisScoreKernel::configure(const ICLImage *input1, const ICLImage *inpu
     ValidRegion valid_region = intersect_valid_regions(input1->info()->valid_region(), input2->info()->valid_region());
     output_access.set_valid_region(win, valid_region, border_undefined, border_size());
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLHarrisScoreKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLHistogramKernel.cpp b/src/core/CL/kernels/CLHistogramKernel.cpp
index 7b715abb36..b56ad8d38d 100644
--- a/src/core/CL/kernels/CLHistogramKernel.cpp
+++ b/src/core/CL/kernels/CLHistogramKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -107,7 +107,7 @@ void CLHistogramKernel::configure(const ICLImage *input, ICLDistribution1D *outp
 
     update_window_and_padding(win, AccessWindowHorizontal(input->info(), 0, pixels_per_item));
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLHistogramKernel::run(const Window &window, cl::CommandQueue &queue)
@@ -198,7 +198,7 @@ void CLHistogramBorderKernel::configure(const ICLImage *input, ICLDistribution1D
     win.set(0, Window::Dimension(start_position, _input->info()->dimension(0)));
     win.set(1, Window::Dimension(0, _input->info()->dimension(1)));
     update_window_and_padding(win, AccessWindowHorizontal(input->info(), 0, 1));
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLHistogramBorderKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp
index 39654e2190..42bb96c16f 100644
--- a/src/core/CL/kernels/CLIm2ColKernel.cpp
+++ b/src/core/CL/kernels/CLIm2ColKernel.cpp
@@ -308,7 +308,7 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const
     auto win_config = validate_and_configure_window(input->info(), output->info(), kernel_dims, conv_info, has_bias, dilation, im2col_config.num_elems_processed_per_iteration,
                                                     im2col_config.is_padding_required_nchw);
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    ICLKernel::configure(win_config.second);
+    ICLKernel::configure_internal(win_config.second);
 
     // Set config_id for enabling LWS tuning
     _config_id = im2col_config.kernel_name;
@@ -386,7 +386,7 @@ void CLIm2ColKernel::run(const Window &window, cl::CommandQueue &queue)
         add_2D_tensor_argument(idx, _output, slice_out);
         _kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(_input->info()->strides_in_bytes()[3]));
         _kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(_output->info()->strides_in_bytes()[2]));
-        enqueue(queue, *this, slice, _lws_hint);
+        enqueue(queue, *this, slice, lws_hint());
     }
     while(window_collapsed.slide_window_slice_3D(slice) && window_output.slide_window_slice_2D(slice_out) && window_collapsed.slide_window_slice_3D(slice_in));
-}
\ No newline at end of file
+}
diff --git a/src/core/CL/kernels/CLIntegralImageKernel.cpp b/src/core/CL/kernels/CLIntegralImageKernel.cpp
index 69ede457df..6fb39ff0a2 100644
--- a/src/core/CL/kernels/CLIntegralImageKernel.cpp
+++ b/src/core/CL/kernels/CLIntegralImageKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -60,7 +60,7 @@ void CLIntegralImageHorKernel::configure(const ICLTensor *input, ICLTensor *outp
 
     output_access.set_valid_region(win, input->info()->valid_region());
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 CLIntegralImageVertKernel::CLIntegralImageVertKernel()
@@ -89,7 +89,7 @@ void CLIntegralImageVertKernel::configure(ICLTensor *in_out)
 
     in_out_access.set_valid_region(win, in_out->info()->valid_region());
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLIntegralImageVertKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp
index 39d9f958d3..54ed51eda2 100644
--- a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp
+++ b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp
@@ -120,7 +120,7 @@ void CLL2NormalizeLayerKernel::configure(const ICLTensor *input, const ICLTensor
     auto win_config = validate_and_configure_window(_input->info(), _output->info());
     ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
 
-    ICLKernel::configure(std::get<1>(win_config));
+    ICLKernel::configure_internal(std::get<1>(win_config));
 }
 
 Status CLL2NormalizeLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, unsigned int axis, float epsilon)
diff --git a/src/core/CL/kernels/CLLKTrackerKernel.cpp b/src/core/CL/kernels/CLLKTrackerKernel.cpp
index 078d18e61c..40ed630c89 100644
--- a/src/core/CL/kernels/CLLKTrackerKernel.cpp
+++ b/src/core/CL/kernels/CLLKTrackerKernel.cpp
@@ -75,7 +75,7 @@ void CLLKTrackerInitKernel::configure(const ICLKeyPointArray *old_points, const
     Window window;
     window.set(Window::DimX, Window::Dimension(0, old_points->num_values(), 1));
     window.set(Window::DimY, Window::Dimension(0, 1, 1));
-    ICLKernel::configure(window);
+    ICLKernel::configure_internal(window);
 }
 
 void CLLKTrackerInitKernel::run(const Window &window, cl::CommandQueue &queue)
@@ -104,7 +104,7 @@ void CLLKTrackerFinalizeKernel::configure(ICLLKInternalKeypointArray *new_points
     Window window;
     window.set(Window::DimX, Window::Dimension(0, new_points_internal->num_values(), 1));
     window.set(Window::DimY, Window::Dimension(0, 1, 1));
-    ICLKernel::configure(window);
+    ICLKernel::configure_internal(window);
 }
 
 void CLLKTrackerFinalizeKernel::run(const Window &window, cl::CommandQueue &queue)
@@ -156,7 +156,7 @@ void CLLKTrackerStage0Kernel::configure(const ICLTensor *old_input, const ICLTen
                               AccessWindowStatic(old_scharr_gy->info(), valid_region.start(0), valid_region.start(1),
                                                  valid_region.end(0), valid_region.end(1)));
 
-    ICLKernel::configure(window);
+    ICLKernel::configure_internal(window);
 
     // Initialize required variables
     const int       level0              = (level == 0) ? 1 : 0;
@@ -232,7 +232,7 @@ void CLLKTrackerStage1Kernel::configure(const ICLTensor *new_input, ICLLKInterna
                               AccessWindowStatic(new_input->info(), valid_region.start(0), valid_region.start(1),
                                                  valid_region.end(0), valid_region.end(1)));
 
-    ICLKernel::configure(window);
+    ICLKernel::configure_internal(window);
 
     // Initialize required variables
     const int       level0              = (level == 0) ? 1 : 0;
diff --git a/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp
index 1a7d95cc2c..ad2f3a4892 100644
--- a/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp
@@ -90,13 +90,14 @@ void CLLocallyConnectedMatrixMultiplyKernel::configure(const ICLTensor *input0,
     _input1 = input1;
     _output = output;
 
+    cl::NDRange lws_hint;
     if(output->info()->dimension(1) == 196)
     {
-        _lws_hint = cl::NDRange(1, 7);
+        lws_hint = cl::NDRange(1, 7);
     }
     else
     {
-        _lws_hint = cl::NDRange(8, 8);
+        lws_hint = cl::NDRange(8, 8);
     }
 
     std::ostringstream    mm_arguments;
@@ -114,7 +115,7 @@ void CLLocallyConnectedMatrixMultiplyKernel::configure(const ICLTensor *input0,
 
     ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
 
-    ICLKernel::configure(std::get<1>(win_config));
+    ICLKernel::configure_internal(std::get<1>(win_config), lws_hint);
 }
 
 Status CLLocallyConnectedMatrixMultiplyKernel::validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output)
@@ -142,7 +143,7 @@ void CLLocallyConnectedMatrixMultiplyKernel::run(const Window &window, cl::Comma
         add_2D_tensor_argument(idx, _input0, slice);
         add_3D_tensor_argument(idx, _input1, slice_matrix_b);
         add_2D_tensor_argument(idx, _output, slice);
-        enqueue(queue, *this, slice, _lws_hint);
+        enqueue(queue, *this, slice, lws_hint());
     }
     while(window.slide_window_slice_2D(slice));
 }
diff --git a/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp b/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp
index c504189169..0b34c59a03 100644
--- a/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp
+++ b/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -137,7 +137,7 @@ void CLMagnitudePhaseKernel::configure(const ICLTensor *gx, const ICLTensor *gy,
     output_magnitude_access.set_valid_region(win, valid_region);
     output_phase_access.set_valid_region(win, valid_region);
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLMagnitudePhaseKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLMeanStdDevKernel.cpp b/src/core/CL/kernels/CLMeanStdDevKernel.cpp
index bd31131fe5..0cde9c5fe6 100644
--- a/src/core/CL/kernels/CLMeanStdDevKernel.cpp
+++ b/src/core/CL/kernels/CLMeanStdDevKernel.cpp
@@ -106,7 +106,7 @@ void CLMeanStdDevKernel::configure(const ICLImage *input, float *mean, cl::Buffe
     AccessWindowRectangle input_access(input->info(), 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y);
     update_window_and_padding(win, input_access);
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLMeanStdDevKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLMedian3x3Kernel.cpp b/src/core/CL/kernels/CLMedian3x3Kernel.cpp
index 3b9fb1fe88..b93179d5f4 100644
--- a/src/core/CL/kernels/CLMedian3x3Kernel.cpp
+++ b/src/core/CL/kernels/CLMedian3x3Kernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -62,5 +62,5 @@ void CLMedian3x3Kernel::configure(const ICLTensor *input, ICLTensor *output, boo
 
     output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
diff --git a/src/core/CL/kernels/CLMinMaxLayerKernel.cpp b/src/core/CL/kernels/CLMinMaxLayerKernel.cpp
index 9493ddc878..fa7b678e86 100644
--- a/src/core/CL/kernels/CLMinMaxLayerKernel.cpp
+++ b/src/core/CL/kernels/CLMinMaxLayerKernel.cpp
@@ -105,7 +105,7 @@ void CLMinMaxLayerKernel::configure(const ICLTensor *input, ICLTensor *output)
 
     ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
 
-    ICLKernel::configure(std::get<1>(win_config));
+    ICLKernel::configure_internal(std::get<1>(win_config));
 }
 
 Status CLMinMaxLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output)
diff --git a/src/core/CL/kernels/CLMinMaxLocationKernel.cpp b/src/core/CL/kernels/CLMinMaxLocationKernel.cpp
index 5636592347..0c7f3bc070 100644
--- a/src/core/CL/kernels/CLMinMaxLocationKernel.cpp
+++ b/src/core/CL/kernels/CLMinMaxLocationKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -118,7 +118,7 @@ void CLMinMaxKernel::configure(const ICLImage *input, cl::Buffer *min_max)
     // Configure kernel window
     Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
     update_window_and_padding(win, AccessWindowHorizontal(input->info(), 0, ceil_to_multiple(num_elems_processed_per_iteration, 16)));
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLMinMaxKernel::run(const Window &window, cl::CommandQueue &queue)
@@ -209,7 +209,7 @@ void CLMinMaxLocationKernel::configure(const ICLImage *input, cl::Buffer *min_ma
     constexpr unsigned int num_elems_processed_per_iteration = 1;
     Window                 win                               = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
     update_window_and_padding(win, AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration));
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLMinMaxLocationKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLNonLinearFilterKernel.cpp b/src/core/CL/kernels/CLNonLinearFilterKernel.cpp
index 6afa5822ba..5e419743d0 100644
--- a/src/core/CL/kernels/CLNonLinearFilterKernel.cpp
+++ b/src/core/CL/kernels/CLNonLinearFilterKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -94,5 +94,5 @@ void CLNonLinearFilterKernel::configure(const ICLTensor *input, ICLTensor *outpu
 
     output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
diff --git a/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp b/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp
index 6a96b0effd..4e41f0df42 100644
--- a/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp
+++ b/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -68,5 +68,5 @@ void CLNonMaximaSuppression3x3Kernel::configure(const ICLTensor *input, ICLTenso
 
     output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
diff --git a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp
index edc9e9d58c..8a7b7aed22 100644
--- a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp
@@ -135,7 +135,7 @@ void CLNormalizationLayerKernel::configure(const ICLTensor *input, ICLTensor *ou
     // Configure kernel window
     auto win_config = validate_and_configure_window(input->info(), output->info(), norm_info);
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    ICLKernel::configure(win_config.second);
+    ICLKernel::configure_internal(win_config.second);
 
     // Set config_id for enabling LWS tuning
     _config_id = "normalization_layer_";
diff --git a/src/core/CL/kernels/CLPermuteKernel.cpp b/src/core/CL/kernels/CLPermuteKernel.cpp
index 7c0c95be1c..c6f0f4bc55 100644
--- a/src/core/CL/kernels/CLPermuteKernel.cpp
+++ b/src/core/CL/kernels/CLPermuteKernel.cpp
@@ -120,7 +120,7 @@ void CLPermuteKernel::configure(const ICLTensor *input, ICLTensor *output, const
     coord.set_num_dimensions(output->info()->num_dimensions());
     output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 Status CLPermuteKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm)
diff --git a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
index 4ea093fe04..4ca2ef8aa3 100644
--- a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
+++ b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
@@ -197,7 +197,7 @@ void CLPixelWiseMultiplicationKernel::configure(const ICLTensor *input1, const I
         _kernel.setArg(idx++, scale);
     }
 
-    ICLKernel::configure(win_config.second);
+    ICLKernel::configure_internal(win_config.second);
 }
 
 Status CLPixelWiseMultiplicationKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale,
diff --git a/src/core/CL/kernels/CLPoolingLayerKernel.cpp b/src/core/CL/kernels/CLPoolingLayerKernel.cpp
index d5ea092c78..df13068239 100644
--- a/src/core/CL/kernels/CLPoolingLayerKernel.cpp
+++ b/src/core/CL/kernels/CLPoolingLayerKernel.cpp
@@ -269,7 +269,7 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output,
     auto win_config = validate_and_configure_window(input->info(), output->info(), pool_info);
 
     ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
-    ICLKernel::configure(std::get<1>(win_config));
+    ICLKernel::configure_internal(std::get<1>(win_config));
 
     if(data_layout == DataLayout::NCHW)
     {
@@ -336,7 +336,7 @@ void CLPoolingLayerKernel::run(const Window &window, cl::CommandQueue &queue)
                 unsigned int idx = 0;
                 add_3D_tensor_argument(idx, _input, in_slice);
                 add_3D_tensor_argument(idx, _output, slice);
-                enqueue(queue, *this, slice, _lws_hint);
+                enqueue(queue, *this, slice, lws_hint());
             }
             while(window_collapsed.slide_window_slice_3D(slice));
             break;
@@ -355,7 +355,7 @@ void CLPoolingLayerKernel::run(const Window &window, cl::CommandQueue &queue)
                 unsigned int idx = 0;
                 add_3D_tensor_argument(idx, _input, in_slice);
                 add_3D_tensor_argument(idx, _output, slice);
-                enqueue(queue, *this, slice, _lws_hint);
+                enqueue(queue, *this, slice, lws_hint());
             }
             while(window.slide_window_slice_3D(slice) && window.slide_window_slice_3D(in_slice));
             break;
diff --git a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp
index af751f4832..9028b0f604 100644
--- a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp
@@ -96,7 +96,7 @@ void CLQuantizationLayerKernel::configure(const ICLTensor *input, ICLTensor *out
 
     ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
 
-    ICLKernel::configure(std::get<1>(win_config));
+    ICLKernel::configure_internal(std::get<1>(win_config));
 }
 
 Status CLQuantizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *min_max)
diff --git a/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp b/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp
index 4048e927f5..23676942a6 100644
--- a/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp
+++ b/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp
@@ -101,7 +101,7 @@ void CLROIPoolingLayerKernel::configure(const ICLTensor *input, const ICLROIArra
 
     update_window_and_padding(window, input_access, output_access);
     output_access.set_valid_region(window, ValidRegion(Coordinates(), output->info()->tensor_shape()));
-    ICLKernel::configure(window);
+    ICLKernel::configure_internal(window);
 }
 
 void CLROIPoolingLayerKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLReductionOperationKernel.cpp b/src/core/CL/kernels/CLReductionOperationKernel.cpp
index 09861630ac..bf36ae2c0f 100644
--- a/src/core/CL/kernels/CLReductionOperationKernel.cpp
+++ b/src/core/CL/kernels/CLReductionOperationKernel.cpp
@@ -114,8 +114,8 @@ void CLReductionOperationKernel::configure(const ICLTensor *input, ICLTensor *ou
 
     // Set the number of WG based on the input size. If input width is < 128
     // we can use fewer threads than 8.
-    _lws_hint    = cl::NDRange(std::min(8U, num_of_threads));
-    _border_size = BorderSize(0, border_width, 0, 0);
+    cl::NDRange lws_hint = cl::NDRange(std::min(8U, num_of_threads));
+    _border_size         = BorderSize(0, border_width, 0, 0);
 
     // Set build options
     std::set<std::string> build_opts;
@@ -142,7 +142,7 @@ void CLReductionOperationKernel::configure(const ICLTensor *input, ICLTensor *ou
 
     ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
 
-    ICLKernel::configure(std::get<1>(win_config));
+    ICLKernel::configure_internal(std::get<1>(win_config), lws_hint);
 }
 
 Status CLReductionOperationKernel::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op)
@@ -171,7 +171,7 @@ void CLReductionOperationKernel::run(const Window &window, cl::CommandQueue &que
     in_slice.set(Window::DimX, Window::Dimension(in_slice.x().start(), in_slice.x().end() + border_width, in_slice.x().step()));
 
     // Set local sums buffer
-    unsigned int local_sum_size = _lws_hint[0] * _input->info()->element_size();
+    unsigned int local_sum_size = lws_hint()[0] * _input->info()->element_size();
     _kernel.setArg(num_arguments_per_2D_tensor() * 2, local_sum_size, nullptr);
 
     do
@@ -179,7 +179,7 @@ void CLReductionOperationKernel::run(const Window &window, cl::CommandQueue &que
         unsigned int idx = 0;
         add_2D_tensor_argument(idx, _input, in_slice);
         add_2D_tensor_argument(idx, _output, out_slice);
-        enqueue(queue, *this, in_slice, _lws_hint);
+        enqueue(queue, *this, in_slice, lws_hint());
     }
     while(window.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(out_slice));
 }
diff --git a/src/core/CL/kernels/CLRemapKernel.cpp b/src/core/CL/kernels/CLRemapKernel.cpp
index b46bb30c59..33c5f2d402 100644
--- a/src/core/CL/kernels/CLRemapKernel.cpp
+++ b/src/core/CL/kernels/CLRemapKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -83,7 +83,7 @@ void CLRemapKernel::configure(const ICLTensor *input, const ICLTensor *map_x, co
 
     output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 
     // Set static arguments
     unsigned int idx = 4 * num_arguments_per_2D_tensor(); //Skip the input and output parameters
diff --git a/src/core/CL/kernels/CLReshapeLayerKernel.cpp b/src/core/CL/kernels/CLReshapeLayerKernel.cpp
index ce9d7fff67..c7efa9a82d 100644
--- a/src/core/CL/kernels/CLReshapeLayerKernel.cpp
+++ b/src/core/CL/kernels/CLReshapeLayerKernel.cpp
@@ -92,7 +92,7 @@ void CLReshapeLayerKernel::configure(const ICLTensor *input, ICLTensor *output)
 
     output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLReshapeLayerKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLScaleKernel.cpp b/src/core/CL/kernels/CLScaleKernel.cpp
index b1655d5cc1..b2cd4b7adf 100644
--- a/src/core/CL/kernels/CLScaleKernel.cpp
+++ b/src/core/CL/kernels/CLScaleKernel.cpp
@@ -181,7 +181,7 @@ void CLScaleKernel::configure(const ICLTensor *input, ICLTensor *output, Interpo
     // Configure kernel window
     auto win_config = validate_and_configure_window(input->info(), output->info(), policy, border_mode, sampling_policy, border);
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    ICLKernel::configure(win_config.second);
+    ICLKernel::configure_internal(win_config.second);
 
     // Create kernel
     CLBuildOptions build_opts;
@@ -223,7 +223,7 @@ void CLScaleKernel::run(const Window &window, cl::CommandQueue &queue)
                 unsigned int idx = 0;
                 add_2D_tensor_argument(idx, _input, slice);
                 add_2D_tensor_argument(idx, _output, slice);
-                enqueue(queue, *this, slice, _lws_hint);
+                enqueue(queue, *this, slice, lws_hint());
             }
             while(window.slide_window_slice_2D(slice));
             break;
@@ -237,7 +237,7 @@ void CLScaleKernel::run(const Window &window, cl::CommandQueue &queue)
                 unsigned int idx = 0;
                 add_3D_tensor_argument(idx, _input, slice);
                 add_3D_tensor_argument(idx, _output, slice);
-                enqueue(queue, *this, slice, _lws_hint);
+                enqueue(queue, *this, slice, lws_hint());
             }
             while(window.slide_window_slice_3D(slice));
             break;
diff --git a/src/core/CL/kernels/CLScharr3x3Kernel.cpp b/src/core/CL/kernels/CLScharr3x3Kernel.cpp
index 913ef592d4..5182390822 100644
--- a/src/core/CL/kernels/CLScharr3x3Kernel.cpp
+++ b/src/core/CL/kernels/CLScharr3x3Kernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -102,7 +102,7 @@ void CLScharr3x3Kernel::configure(const ICLTensor *input, ICLTensor *output_x, I
     output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
     output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLScharr3x3Kernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLSobel3x3Kernel.cpp b/src/core/CL/kernels/CLSobel3x3Kernel.cpp
index 436aaa498a..b4bfe28216 100644
--- a/src/core/CL/kernels/CLSobel3x3Kernel.cpp
+++ b/src/core/CL/kernels/CLSobel3x3Kernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -102,7 +102,7 @@ void CLSobel3x3Kernel::configure(const ICLTensor *input, ICLTensor *output_x, IC
     output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
     output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLSobel3x3Kernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLSobel5x5Kernel.cpp b/src/core/CL/kernels/CLSobel5x5Kernel.cpp
index 4c0316f19e..46aa074d61 100644
--- a/src/core/CL/kernels/CLSobel5x5Kernel.cpp
+++ b/src/core/CL/kernels/CLSobel5x5Kernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -102,7 +102,7 @@ void CLSobel5x5HorKernel::configure(const ICLTensor *input, ICLTensor *output_x,
     output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
     output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLSobel5x5HorKernel::run(const Window &window, cl::CommandQueue &queue)
@@ -201,7 +201,7 @@ void CLSobel5x5VertKernel::configure(const ICLTensor *input_x, const ICLTensor *
     output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
     output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLSobel5x5VertKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLSobel7x7Kernel.cpp b/src/core/CL/kernels/CLSobel7x7Kernel.cpp
index a477953cfb..0c94e88acf 100644
--- a/src/core/CL/kernels/CLSobel7x7Kernel.cpp
+++ b/src/core/CL/kernels/CLSobel7x7Kernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -105,7 +105,7 @@ void CLSobel7x7HorKernel::configure(const ICLTensor *input, ICLTensor *output_x,
     output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
     output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLSobel7x7HorKernel::run(const Window &window, cl::CommandQueue &queue)
@@ -204,7 +204,7 @@ void CLSobel7x7VertKernel::configure(const ICLTensor *input_x, const ICLTensor *
     output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
     output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 void CLSobel7x7VertKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
index b9ebdc9583..403256baae 100644
--- a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
+++ b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
@@ -242,7 +242,7 @@ void CLLogits1DMaxShiftExpSumKernel::configure(const ICLTensor *input, ICLTensor
     build_opts.add_option_if(is_data_type_float(dt) && (beta != 1.0f), "-DBETA=" + float_to_string_with_full_precision(beta));
     build_opts.add_options_if(is_data_type_quantized_asymmetric(dt), prepare_quantized_softmax_build_options(input->info()->quantization_info().scale, beta).options());
 
-    _lws_hint               = cl::NullRange;
+    cl::NDRange lws_hint(cl::NullRange);
     std::string kernel_name = is_data_type_quantized_asymmetric(dt) ? std::string("softmax_layer_max_shift_exp_sum_quantized_serial") :
                               std::string("softmax_layer_max_shift_exp_sum_serial");
     ParallelReductionInfo parallel_reduction_info = is_parallel_reduction(reduction_dim_size);
@@ -264,7 +264,7 @@ void CLLogits1DMaxShiftExpSumKernel::configure(const ICLTensor *input, ICLTensor
         build_opts.add_option_if((multiple_grid_size != 0) || ((reduction_dim_size % vector_size) != 0), "-DNON_MULTIPLE_OF_GRID_SIZE");
         // Setting _lws_hint in this way can also communicate grid_size to CLLogits1DMaxShiftExpSumKernel::run().
         // A single workgroup performs reduction in dimension 0 in the parallel case, hence lws[0]==gws[0].
-        _lws_hint = cl::NDRange(_grid_size);
+        lws_hint = cl::NDRange(_grid_size);
     }
 
     // Create kernel.
@@ -277,7 +277,7 @@ void CLLogits1DMaxShiftExpSumKernel::configure(const ICLTensor *input, ICLTensor
     // Configure window
     auto win_config = validate_and_configure_window_1DMaxShiftExpSum(input->info(), max->info(), output->info(), sum->info());
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    ICLKernel::configure(win_config.second);
+    ICLKernel::configure_internal(win_config.second, lws_hint);
 }
 
 Status CLLogits1DMaxShiftExpSumKernel::validate(const ITensorInfo *input, const ITensorInfo *max, const ITensorInfo *output, const ITensorInfo *sum)
@@ -322,7 +322,7 @@ void CLLogits1DMaxShiftExpSumKernel::run(const Window &window, cl::CommandQueue
         add_3D_tensor_argument(idx, _max, slice);
         add_3D_tensor_argument(idx, _output, slice);
         add_3D_tensor_argument(idx, _sum, slice);
-        enqueue(queue, *this, slice, _lws_hint);
+        enqueue(queue, *this, slice, lws_hint());
     }
     while(window_collapsed.slide_window_slice_3D(slice));
 }
@@ -365,7 +365,7 @@ void CLLogits1DNormKernel::configure(const ICLTensor *input, const ICLTensor *su
     // Configure window
     auto win_config = validate_and_configure_window_1DNorm(input->info(), output->info(), sum->info());
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    ICLKernel::configure(win_config.second);
+    ICLKernel::configure_internal(win_config.second);
 }
 
 Status CLLogits1DNormKernel::validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output)
@@ -394,7 +394,7 @@ void CLLogits1DNormKernel::run(const Window &window, cl::CommandQueue &queue)
         add_3D_tensor_argument(idx, _input, slice);
         add_3D_tensor_argument(idx, _sum, sum_slice);
         add_3D_tensor_argument(idx, _output, slice);
-        enqueue(queue, *this, slice, _lws_hint);
+        enqueue(queue, *this, slice, lws_hint());
     }
     while(window_collapsed.slide_window_slice_3D(slice));
 }
diff --git a/src/core/CL/kernels/CLTransposeKernel.cpp b/src/core/CL/kernels/CLTransposeKernel.cpp
index 3d584345d7..695bdf7f40 100644
--- a/src/core/CL/kernels/CLTransposeKernel.cpp
+++ b/src/core/CL/kernels/CLTransposeKernel.cpp
@@ -117,9 +117,8 @@ void CLTransposeKernel::configure(const ICLTensor *input, ICLTensor *output)
 
     ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info()));
 
-    _input    = input;
-    _output   = output;
-    _lws_hint = cl::NDRange(2, 8);
+    _input  = input;
+    _output = output;
 
     std::set<std::string> build_opts;
     std::ostringstream    data_type_in_bytes;
@@ -131,5 +130,5 @@ void CLTransposeKernel::configure(const ICLTensor *input, ICLTensor *output)
     // Configure kernel window
     auto win_config = validate_and_configure_window(input->info(), output->info());
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    ICLKernel::configure(win_config.second);
+    ICLKernel::configure_internal(win_config.second, cl::NDRange(2, 8));
 }
diff --git a/src/core/CL/kernels/CLWarpAffineKernel.cpp b/src/core/CL/kernels/CLWarpAffineKernel.cpp
index e0e09400af..1fae2b1974 100644
--- a/src/core/CL/kernels/CLWarpAffineKernel.cpp
+++ b/src/core/CL/kernels/CLWarpAffineKernel.cpp
@@ -98,5 +98,5 @@ void CLWarpAffineKernel::configure(const ICLTensor *input, ICLTensor *output, co
 
     output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
diff --git a/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp b/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp
index d6fcb09658..e537aec058 100644
--- a/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp
+++ b/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp
@@ -95,5 +95,5 @@ void CLWarpPerspectiveKernel::configure(const ICLTensor *input, ICLTensor *outpu
 
     output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
 
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
diff --git a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp
index 58ecd9ccb3..5ef0f5b152 100644
--- a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp
+++ b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp
@@ -115,7 +115,7 @@ void CLWeightsReshapeKernel::configure(const ICLTensor *input, const ICLTensor *
     Window win = calculate_max_window(*input->info(), Steps());
     // The CLWeightsReshapeKernel doesn't need padding so update_window_and_padding() can be skipped
     output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
-    ICLKernel::configure(win);
+    ICLKernel::configure_internal(win);
 }
 
 Status CLWeightsReshapeKernel::validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output, const unsigned int num_groups)
diff --git a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
index e2ca05a72a..e5ab8d2304 100644
--- a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
+++ b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
@@ -111,7 +111,7 @@ void CLWidthConcatenateLayerKernel::configure(const ICLTensor *input, unsigned i
     auto win_config = validate_and_configure_window(input->info(), width_offset, output->info());
     ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
 
-    ICLKernel::configure(std::get<1>(win_config));
+    ICLKernel::configure_internal(std::get<1>(win_config));
 }
 
 void CLWidthConcatenateLayerKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp b/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp
index e6c713e5e7..818638c89e 100644
--- a/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp
+++ b/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp
@@ -125,7 +125,7 @@ void CLWinogradFilterTransformKernel::configure(const ICLTensor *input, ICLTenso
     // Configure kernel window
     auto win_config = validate_and_configure_window(input->info(), output->info());
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    ICLKernel::configure(win_config.second);
+    ICLKernel::configure_internal(win_config.second);
 }
 
 Status CLWinogradFilterTransformKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info)
diff --git a/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp b/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp
index fcfd9e30a1..2309fbfb26 100644
--- a/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp
+++ b/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp
@@ -180,8 +180,6 @@ void CLWinogradInputTransformKernel::configure(const ICLTensor *input, ICLTensor
         _step_z = (_input->info()->dimension(2) % 2) != 0 ? 1 : 2;
     }
 
-    _lws_hint = cl::NDRange(1, 1, 8);
-
     // Append stepz and data layout
     kernel_name += "_stepz";
     kernel_name += support::cpp11::to_string(_step_z);
@@ -192,7 +190,7 @@ void CLWinogradInputTransformKernel::configure(const ICLTensor *input, ICLTensor
     // Create window and update padding
     auto win_config = validate_and_configure_window(input->info(), output->info(), winograd_info);
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    ICLKernel::configure(win_config.second);
+    ICLKernel::configure_internal(win_config.second, cl::NDRange(1, 1, 8));
 
     _config_id = kernel_name;
     _config_id += support::cpp11::to_string(input->info()->dimension(0));
@@ -239,7 +237,7 @@ void CLWinogradInputTransformKernel::run(const Window &window, cl::CommandQueue
         add_3D_tensor_argument(idx, _input, slice);
         add_3D_tensor_argument(idx, _output, slice);
 
-        enqueue(queue, *this, slice, _lws_hint);
+        enqueue(queue, *this, slice, lws_hint());
     }
     while(window.slide_window_slice_3D(slice));
 }
diff --git a/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp b/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp
index 40d5f6588f..fa42596604 100644
--- a/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp
+++ b/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp
@@ -172,7 +172,7 @@ void CLWinogradOutputTransformKernel::configure(const ICLTensor *input, const IC
     // Configure kernel window
     auto win_config = validate_and_configure_window(input->info(), (bias != nullptr ? bias->info() : nullptr), output->info(), winograd_info.output_tile_size);
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    ICLKernel::configure(win_config.second);
+    ICLKernel::configure_internal(win_config.second);
 
     // Set config_id for enabling LWS tuning
     _config_id = kernel_name;
@@ -231,7 +231,7 @@ void CLWinogradOutputTransformKernel::run(const Window &window, cl::CommandQueue
         unsigned int idx = 0;
         add_3D_tensor_argument(idx, _input, slice);
         add_3D_tensor_argument(idx, _output, slice_out);
-        enqueue(queue, *this, slice, _lws_hint);
+        enqueue(queue, *this, slice, lws_hint());
     }
     while(window.slide_window_slice_3D(slice) && window.slide_window_slice_3D(slice_out));
 }
-- 
cgit v1.2.1