From 4c6bd514a8d424a29b776754f1b3426fa3a8c339 Mon Sep 17 00:00:00 2001
From: Manuel Bottini <manuel.bottini@arm.com>
Date: Wed, 8 Apr 2020 10:15:51 +0100
Subject: COMPMID-3280: Make all ML primitives for CL use the new interface -
 Part 1

- Only CLKernels have been updated

Change-Id: Ife55b847c2e39e712a186eb6ca452503d5b66937
Signed-off-by: Manuel Bottini <manuel.bottini@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3001
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
---
 arm_compute/core/CL/CLHelpers.h                    |  2 +-
 .../core/CL/kernels/CLAbsoluteDifferenceKernel.h   | 10 +++-
 arm_compute/core/CL/kernels/CLAccumulateKernel.h   | 25 +++++++++-
 .../core/CL/kernels/CLActivationLayerKernel.h      | 15 ++++--
 .../core/CL/kernels/CLArgMinMaxLayerKernel.h       | 14 +++++-
 .../CL/kernels/CLBatchConcatenateLayerKernel.h     | 12 +++++
 .../CL/kernels/CLBatchNormalizationLayerKernel.h   | 21 +++++++-
 .../core/CL/kernels/CLBatchToSpaceLayerKernel.h    | 17 +++++++
 arm_compute/core/CL/kernels/CLBitwiseAndKernel.h   | 10 +++-
 arm_compute/core/CL/kernels/CLBitwiseNotKernel.h   |  9 +++-
 arm_compute/core/CL/kernels/CLBitwiseOrKernel.h    | 10 +++-
 arm_compute/core/CL/kernels/CLBitwiseXorKernel.h   | 10 +++-
 .../core/CL/kernels/CLBoundingBoxTransformKernel.h | 15 +++++-
 arm_compute/core/CL/kernels/CLBox3x3Kernel.h       | 10 +++-
 arm_compute/core/CL/kernels/CLCannyEdgeKernel.h    | 42 +++++++++++++++-
 .../core/CL/kernels/CLChannelCombineKernel.h       | 21 +++++++-
 .../core/CL/kernels/CLChannelExtractKernel.h       | 18 ++++++-
 .../core/CL/kernels/CLChannelShuffleLayerKernel.h  |  8 ++++
 arm_compute/core/CL/kernels/CLCol2ImKernel.h       | 10 ++++
 arm_compute/core/CL/kernels/CLColorConvertKernel.h | 32 ++++++++++++-
 arm_compute/core/CL/kernels/CLComparisonKernel.h   |  9 ++++
 .../kernels/CLConvertFullyConnectedWeightsKernel.h |  9 ++++
 arm_compute/core/CL/kernels/CLConvolutionKernel.h  | 44 ++++++++++++++++-
 arm_compute/core/CL/kernels/CLCopyKernel.h         | 11 ++++-
 arm_compute/core/CL/kernels/CLCropKernel.h         | 17 ++++++-
 .../kernels/CLDeconvolutionLayerUpsampleKernel.h   |  8 ++++
 .../kernels/CLDeconvolutionReshapeOutputKernel.h   | 13 +++++
 .../CL/kernels/CLDepthConcatenateLayerKernel.h     | 12 +++++
 .../core/CL/kernels/CLDepthConvertLayerKernel.h    | 22 ++++++++-
 .../core/CL/kernels/CLDepthToSpaceLayerKernel.h    |  8 ++++
 .../CLDepthwiseConvolutionLayer3x3NCHWKernel.h     | 23 ++++++++-
 .../CLDepthwiseConvolutionLayer3x3NHWCKernel.h     | 23 ++++++++-
 .../CLDepthwiseConvolutionLayerNativeKernel.h      | 22 +++++++++
 ...DepthwiseConvolutionLayerReshapeWeightsKernel.h |  8 ++++
 .../core/CL/kernels/CLDequantizationLayerKernel.h  |  7 +++
 arm_compute/core/CL/kernels/CLDerivativeKernel.h   | 13 ++++-
 arm_compute/core/CL/kernels/CLDilateKernel.h       | 10 +++-
 .../CL/kernels/CLDirectConvolutionLayerKernel.h    | 21 ++++++++
 .../CL/kernels/CLElementWiseUnaryLayerKernel.h     | 10 +++-
 .../core/CL/kernels/CLElementwiseOperationKernel.h | 27 +++++++++++
 arm_compute/core/CL/kernels/CLErodeKernel.h        | 10 +++-
 .../core/CL/kernels/CLFFTDigitReverseKernel.h      | 11 ++++-
 .../core/CL/kernels/CLFFTRadixStageKernel.h        | 12 ++++-
 arm_compute/core/CL/kernels/CLFFTScaleKernel.h     | 10 +++-
 arm_compute/core/CL/kernels/CLFastCornersKernel.h  | 21 +++++++-
 arm_compute/core/CL/kernels/CLFillBorderKernel.h   |  9 ++++
 arm_compute/core/CL/kernels/CLFlattenLayerKernel.h |  9 ++++
 .../CL/kernels/CLFuseBatchNormalizationKernel.h    | 21 +++++++-
 .../CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h    | 13 ++++-
 .../kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h | 16 +++++++
 .../CLGEMMLowpMatrixMultiplyReshapedKernel.h       | 20 ++++++++
 ...CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h | 27 +++++++++++
 .../kernels/CLGEMMLowpOffsetContributionKernel.h   | 18 ++++++-
 ...CLGEMMLowpOffsetContributionOutputStageKernel.h | 23 +++++++++
 ...CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h | 10 ++++
 .../CLGEMMLowpQuantizeDownInt32ScaleKernel.h       | 10 ++++
 ...antizeDownInt32ToInt16ScaleByFixedPointKernel.h | 16 ++++++-
 ...uantizeDownInt32ToInt8ScaleByFixedPointKernel.h | 18 ++++++-
 ...antizeDownInt32ToUint8ScaleByFixedPointKernel.h | 18 ++++++-
 .../core/CL/kernels/CLGEMMLowpReductionKernel.h    | 36 ++++++++++++++
 .../kernels/CLGEMMMatrixAccumulateBiasesKernel.h   |  9 +++-
 .../core/CL/kernels/CLGEMMMatrixMultiplyKernel.h   | 19 +++++++-
 .../CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h  | 23 ++++++++-
 .../kernels/CLGEMMMatrixMultiplyReshapedKernel.h   | 31 +++++++++++-
 .../CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h    | 23 ++++++++-
 .../CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h  |  8 ++++
 .../core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h | 15 ++++++
 .../core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h | 16 ++++++-
 arm_compute/core/CL/kernels/CLGatherKernel.h       |  9 ++++
 arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h  | 10 +++-
 arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h  | 18 ++++++-
 .../core/CL/kernels/CLGaussianPyramidKernel.h      | 16 ++++++-
 .../CL/kernels/CLGenerateProposalsLayerKernel.h    | 11 ++++-
 .../core/CL/kernels/CLHOGDescriptorKernel.h        | 19 +++++++-
 arm_compute/core/CL/kernels/CLHOGDetectorKernel.h  | 17 ++++++-
 .../core/CL/kernels/CLHarrisCornersKernel.h        | 17 ++++++-
 .../CL/kernels/CLHeightConcatenateLayerKernel.h    |  9 ++++
 arm_compute/core/CL/kernels/CLHistogramKernel.h    | 16 ++++++-
 arm_compute/core/CL/kernels/CLIm2ColKernel.h       | 16 +++++++
 .../kernels/CLInstanceNormalizationLayerKernel.h   |  9 ++++
 .../core/CL/kernels/CLIntegralImageKernel.h        | 15 +++++-
 .../core/CL/kernels/CLL2NormalizeLayerKernel.h     | 14 +++++-
 arm_compute/core/CL/kernels/CLLKTrackerKernel.h    | 56 +++++++++++++++++++++-
 .../CLLocallyConnectedMatrixMultiplyKernel.h       | 10 +++-
 .../core/CL/kernels/CLMagnitudePhaseKernel.h       | 16 ++++++-
 arm_compute/core/CL/kernels/CLMeanStdDevKernel.h   | 12 ++++-
 .../CL/kernels/CLMeanStdDevNormalizationKernel.h   | 13 ++++-
 arm_compute/core/CL/kernels/CLMedian3x3Kernel.h    | 10 +++-
 arm_compute/core/CL/kernels/CLMemsetKernel.h       |  8 ++++
 arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h  | 10 +++-
 .../core/CL/kernels/CLMinMaxLocationKernel.h       | 22 ++++++++-
 .../core/CL/kernels/CLNonLinearFilterKernel.h      | 16 ++++++-
 .../CL/kernels/CLNonMaximaSuppression3x3Kernel.h   | 10 +++-
 .../core/CL/kernels/CLNormalizationLayerKernel.h   | 12 ++++-
 .../CL/kernels/CLNormalizePlanarYUVLayerKernel.h   | 11 +++++
 arm_compute/core/CL/kernels/CLPadLayerKernel.h     | 13 +++++
 arm_compute/core/CL/kernels/CLPermuteKernel.h      | 10 ++++
 .../CL/kernels/CLPixelWiseMultiplicationKernel.h   | 23 +++++++++
 arm_compute/core/CL/kernels/CLPoolingLayerKernel.h | 10 ++++
 .../core/CL/kernels/CLPriorBoxLayerKernel.h        | 15 +++++-
 .../core/CL/kernels/CLQuantizationLayerKernel.h    |  9 ++++
 .../core/CL/kernels/CLROIAlignLayerKernel.h        | 16 +++++++
 .../core/CL/kernels/CLROIPoolingLayerKernel.h      | 17 ++++++-
 arm_compute/core/CL/kernels/CLRangeKernel.h        | 11 ++++-
 .../core/CL/kernels/CLReductionOperationKernel.h   | 11 +++++
 arm_compute/core/CL/kernels/CLRemapKernel.h        | 13 ++++-
 arm_compute/core/CL/kernels/CLReorgLayerKernel.h   | 11 +++++
 arm_compute/core/CL/kernels/CLReshapeLayerKernel.h |  7 +++
 arm_compute/core/CL/kernels/CLReverseKernel.h      |  8 ++++
 arm_compute/core/CL/kernels/CLScaleKernel.h        | 13 +++++
 arm_compute/core/CL/kernels/CLScharr3x3Kernel.h    | 13 ++++-
 arm_compute/core/CL/kernels/CLSelectKernel.h       |  9 ++++
 arm_compute/core/CL/kernels/CLSobel3x3Kernel.h     | 13 ++++-
 arm_compute/core/CL/kernels/CLSobel5x5Kernel.h     | 25 +++++++++-
 arm_compute/core/CL/kernels/CLSobel7x7Kernel.h     | 25 +++++++++-
 arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h | 38 ++++++++++++++-
 .../core/CL/kernels/CLSpaceToBatchLayerKernel.h    | 20 ++++++++
 .../core/CL/kernels/CLSpaceToDepthLayerKernel.h    |  8 ++++
 arm_compute/core/CL/kernels/CLStackLayerKernel.h   | 14 ++++++
 arm_compute/core/CL/kernels/CLStridedSliceKernel.h | 18 +++++++
 arm_compute/core/CL/kernels/CLTableLookupKernel.h  | 10 +++-
 arm_compute/core/CL/kernels/CLThresholdKernel.h    | 15 +++++-
 arm_compute/core/CL/kernels/CLTileKernel.h         | 10 ++++
 arm_compute/core/CL/kernels/CLTransposeKernel.h    |  7 +++
 .../core/CL/kernels/CLUpsampleLayerKernel.h        |  9 ++++
 arm_compute/core/CL/kernels/CLWarpAffineKernel.h   | 12 ++++-
 .../core/CL/kernels/CLWarpPerspectiveKernel.h      | 11 ++++-
 .../core/CL/kernels/CLWeightsReshapeKernel.h       | 16 ++++++-
 .../CL/kernels/CLWidthConcatenate2TensorsKernel.h  |  8 ++++
 .../CL/kernels/CLWidthConcatenate4TensorsKernel.h  | 10 ++++
 .../CL/kernels/CLWidthConcatenateLayerKernel.h     |  9 ++++
 .../CL/kernels/CLWinogradFilterTransformKernel.h   | 21 +++++++-
 .../CL/kernels/CLWinogradInputTransformKernel.h    | 21 +++++++-
 .../CL/kernels/CLWinogradOutputTransformKernel.h   | 24 +++++++++-
 arm_compute/core/CL/kernels/CLYOLOLayerKernel.h    | 14 +++++-
 .../ICLDepthwiseConvolutionLayer3x3Kernel.h        | 23 ++++++++-
 136 files changed, 2008 insertions(+), 84 deletions(-)

(limited to 'arm_compute')

diff --git a/arm_compute/core/CL/CLHelpers.h b/arm_compute/core/CL/CLHelpers.h
index ee6397af7a..77c17c7d9c 100644
--- a/arm_compute/core/CL/CLHelpers.h
+++ b/arm_compute/core/CL/CLHelpers.h
@@ -206,7 +206,7 @@ cl::Kernel create_opencl_kernel(CLCoreRuntimeContext *ctx, const std::string &ke
  *
  * @return An opencl kernel
  */
-cl::Kernel create_kernel(CLCompileContext &ctx, const std::string &kernel_name, const std::set<std::string> &build_opts);
+cl::Kernel create_kernel(CLCompileContext &ctx, const std::string &kernel_name, const std::set<std::string> &build_opts = std::set<std::string>());
 
 /** Creates a suitable LWS hint object for parallel implementations. Sets the number of WG based on the input size.
  *  If input width is smaller than 128 we can use fewer threads than 8.
diff --git a/arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h b/arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h
index 24993e2fda..18896725e2 100644
--- a/arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h
+++ b/arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -58,6 +58,14 @@ public:
      * @param[out] output Destination tensor. Data types supported: U8/S16.
      */
     void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+    /** Set the inputs and output images.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input1          Source tensor. Data types supported: U8/S16.
+     * @param[in]  input2          Source tensor. Data types supported: U8/S16.
+     * @param[out] output          Destination tensor. Data types supported: U8/S16.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/CL/kernels/CLAccumulateKernel.h b/arm_compute/core/CL/kernels/CLAccumulateKernel.h
index 84f3f2c41a..d7cb09fdd3 100644
--- a/arm_compute/core/CL/kernels/CLAccumulateKernel.h
+++ b/arm_compute/core/CL/kernels/CLAccumulateKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -46,6 +46,13 @@ public:
      * @param[out] accum Destination tensor. Data types supported: S16.
      */
     void configure(const ICLTensor *input, ICLTensor *accum);
+    /** Set the input and accumulation tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: U8.
+     * @param[out] accum           Destination tensor. Data types supported: S16.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *accum);
 };
 
 /** Interface for the accumulate weighted kernel.
@@ -67,6 +74,14 @@ public:
      * @param[in,out] accum Accumulated tensor. Data types supported: U8.
      */
     void configure(const ICLTensor *input, float alpha, ICLTensor *accum);
+    /** Set the input and accumulation images, and the scale value.
+     *
+     * @param[in]     compile_context The compile context to be used.
+     * @param[in]     input           Source tensor. Data types supported: U8.
+     * @param[in]     alpha           Scalar value in the range [0, 1.0]. Data types supported: F32.
+     * @param[in,out] accum           Accumulated tensor. Data types supported: U8.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, float alpha, ICLTensor *accum);
 };
 
 /** Interface for the accumulate squared kernel.
@@ -86,6 +101,14 @@ public:
      * @param[in,out] accum Accumulated tensor. Data types supported: S16.
      */
     void configure(const ICLTensor *input, uint32_t shift, ICLTensor *accum);
+    /** Set the input and accumulation tensors and the shift value.
+     *
+     * @param[in]     compile_context The compile context to be used.
+     * @param[in]     input           Source tensor. Data types supported: U8.
+     * @param[in]     shift           Shift value in the range of [0, 15]. Data types supported: U32.
+     * @param[in,out] accum           Accumulated tensor. Data types supported: S16.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, uint32_t shift, ICLTensor *accum);
 };
 } // namespace arm_compute
 #endif /*ARM_COMPUTE_CLACCUMULATEKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h
index 5b65a54824..d25480cd60 100644
--- a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h
@@ -29,13 +29,12 @@
 namespace arm_compute
 {
 class ICLTensor;
-class CLCoreRuntimeContext;
 /** Interface for the activation layer kernel. */
 class CLActivationLayerKernel : public ICLKernel
 {
 public:
     /** Default constructor */
-    CLActivationLayerKernel(CLCoreRuntimeContext *ctx = nullptr);
+    CLActivationLayerKernel();
     /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLActivationLayerKernel(const CLActivationLayerKernel &) = delete;
     /** Prevent instances of this class from being copied (As this class contains pointers) */
@@ -56,6 +55,17 @@ public:
      * @param[in]      act_info Activation layer information.
      */
     void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info);
+    /** Set the input and output tensor.
+     *
+     * @note If the output tensor is a nullptr, the activation function will be performed in-place
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input           Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
+     *                                 of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
+     * @param[out]     output          Destination tensor. Data type supported: same as @p input
+     * @param[in]      act_info        Activation layer information.
+     */
+    void configure(CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info);
     /** Static function to check if given info will lead to a valid configuration of @ref CLActivationLayerKernel
      *
      * @param[in] input    Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
@@ -74,7 +84,6 @@ private:
     ICLTensor            *_input;
     ICLTensor            *_output;
     bool                  _run_in_place;
-    CLCoreRuntimeContext *_ctx;
 };
 } // namespace arm_compute
 #endif /*ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h b/arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h
index 7f4cfe3edc..831cee5e58 100644
--- a/arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -65,6 +65,18 @@ public:
      * @param[in]  op          Reduction operation to perform. Only ArgMin and ArgMax are supported.
      */
     void configure(const ICLTensor *input, const ICLTensor *prev_output, ICLTensor *output, unsigned int axis, ReductionOperation op);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: S32/F16/F32.
+     * @param[in]  prev_output     Destination tensor of the previous iterations of @ref CLArgMinMaxLayerKernel. Data types supported: U32/S32
+     *                             Has to be nullptr for the first iteration
+     * @param[out] output          Destination tensor. Data types supported: U32/S32
+     *                             Output will have the same number of dimensions as input.
+     * @param[in]  axis            Axis along which to reduce. Supported reduction axis : 0,1,2,3
+     * @param[in]  op              Reduction operation to perform. Only ArgMin and ArgMax are supported.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *prev_output, ICLTensor *output, unsigned int axis, ReductionOperation op);
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLArgMinMaxLayerKernel.
      *
diff --git a/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h
index 3711617959..06764302f4 100644
--- a/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h
@@ -61,6 +61,18 @@ public:
      *
      */
     void configure(const ICLTensor *input, unsigned int batch_offset, ICLTensor *output);
+    /** Initialise the kernel's inputs and output
+     *
+     * @param[in]     compile_context The compile context to be used.
+     * @param[in]     input           Input tensor. Data types supported: All.
+     * @param[in]     batch_offset    The offset on axis # 3.
+     * @param[in,out] output          Output tensor. Data types supported: Same as @p input.
+     *
+     * @note: The output tensor's low two dimensions can't be smaller than the input one's.
+     * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
+     *
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, unsigned int batch_offset, ICLTensor *output);
     /**  Static function to check if given info will lead to a valid configuration of @ref CLBatchConcatenateLayerKernel
      *
      * @param[in] input        Input tensor info. Data types supported: All.
diff --git a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h
index 7afa0e2784..564b21680b 100644
--- a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -65,6 +65,25 @@ public:
      */
     void configure(ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta = nullptr, const ICLTensor *gamma = nullptr, float epsilon = 0.001f,
                    ActivationLayerInfo act_info = ActivationLayerInfo());
+    /** Set the input and output tensors.
+     *
+     * @note If the output tensor is a nullptr, the batch normalization function will be performed in-place
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input           Source tensor. In case of @p output tensor = nullptr, this tensor will store the result.
+     *                                 3 lower dimensions represent a single input with dimensions [width, height, FM].
+     *                                 The rest are optional and used for representing batches. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
+     * @param[out]     output          Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
+     * @param[in]      mean            Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]      var             Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]      beta            (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input
+     * @param[in]      gamma           (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input
+     * @param[in]      epsilon         (Optional) Small value to avoid division with zero. Default value is 0.001f.
+     * @param[in]      act_info        (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
+     */
+    void configure(CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta = nullptr, const ICLTensor *gamma = nullptr,
+                   float               epsilon  = 0.001f,
+                   ActivationLayerInfo act_info = ActivationLayerInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref CLBatchNormalizationLayerKernel
      *
      * @param[in] input    Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result.
diff --git a/arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h b/arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h
index 21197c26d0..f9289eab73 100644
--- a/arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h
@@ -54,6 +54,14 @@ public:
      * @param[out] output      Tensor output. Data types supported: same as @p input
      */
     void configure(const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output);
+    /** Initialise the kernel's inputs and output.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Tensor input. Supported tensor rank: 4. Data types supported: All.
+     * @param[in]  block_shape     1-D tensor with shape [M]. Data types supported: S32
+     * @param[out] output          Tensor output. Data types supported: same as @p input
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output);
     /** Initialise the kernel's inputs and output (Static block shape).
      *
      * @param[in]  input         Tensor input. Supported tensor rank: 4. Data types supported: All.
@@ -62,6 +70,15 @@ public:
      * @param[out] output        Tensor output. Data types supported: same as @p input
      */
     void configure(const ICLTensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ICLTensor *output);
+    /** Initialise the kernel's inputs and output (Static block shape).
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Tensor input. Supported tensor rank: 4. Data types supported: All.
+     * @param[in]  block_shape_x   Block shape x value.
+     * @param[in]  block_shape_y   Block shape y value.
+     * @param[out] output          Tensor output. Data types supported: same as @p input
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayerKernel
      *
      * @param[in] input       Tensor input. Supported tensor rank: 4. Data types supported: All.
diff --git a/arm_compute/core/CL/kernels/CLBitwiseAndKernel.h b/arm_compute/core/CL/kernels/CLBitwiseAndKernel.h
index 0aa2228a48..6c60bc0f33 100644
--- a/arm_compute/core/CL/kernels/CLBitwiseAndKernel.h
+++ b/arm_compute/core/CL/kernels/CLBitwiseAndKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -55,6 +55,14 @@ public:
      * @param[out] output Destination tensor. Data types supported: U8.
      */
     void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+    /** Set the inputs and output images
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input1          Source tensor. Data types supported: U8.
+     * @param[in]  input2          Source tensor. Data types supported: U8.
+     * @param[out] output          Destination tensor. Data types supported: U8.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/CL/kernels/CLBitwiseNotKernel.h b/arm_compute/core/CL/kernels/CLBitwiseNotKernel.h
index a7b00dd8df..0522841e73 100644
--- a/arm_compute/core/CL/kernels/CLBitwiseNotKernel.h
+++ b/arm_compute/core/CL/kernels/CLBitwiseNotKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -44,6 +44,13 @@ public:
      * @param[out] output Destination tensor. Data types supported: U8.
      */
     void configure(const ICLTensor *input, ICLTensor *output);
+    /** Set the inputs and output images.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: U8.
+     * @param[out] output          Destination tensor. Data types supported: U8.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_CLBITWISENOTKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLBitwiseOrKernel.h b/arm_compute/core/CL/kernels/CLBitwiseOrKernel.h
index 5764cf5f90..151f19d374 100644
--- a/arm_compute/core/CL/kernels/CLBitwiseOrKernel.h
+++ b/arm_compute/core/CL/kernels/CLBitwiseOrKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -55,6 +55,14 @@ public:
      * @param[out] output Destination tensor. Data types supported: U8.
      */
     void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+    /** Set the inputs and output images
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input1          Source tensor. Data types supported: U8.
+     * @param[in]  input2          Source tensor. Data types supported: U8.
+     * @param[out] output          Destination tensor. Data types supported: U8.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/CL/kernels/CLBitwiseXorKernel.h b/arm_compute/core/CL/kernels/CLBitwiseXorKernel.h
index c1e2e4b744..03c1e05da4 100644
--- a/arm_compute/core/CL/kernels/CLBitwiseXorKernel.h
+++ b/arm_compute/core/CL/kernels/CLBitwiseXorKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -55,6 +55,14 @@ public:
      * @param[out] output Destination tensor. Data types supported: U8.
      */
     void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+    /** Set the inputs and output images
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input1          Source tensor. Data types supported: U8.
+     * @param[in]  input2          Source tensor. Data types supported: U8.
+     * @param[out] output          Destination tensor. Data types supported: U8.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h b/arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h
index bd1645573a..ffa63bd5a4 100644
--- a/arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h
+++ b/arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -59,6 +59,19 @@ public:
      *
      */
     void configure(const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  boxes           Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32.
+     * @param[out] pred_boxes      Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input
+     * @param[in]  deltas          Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K  is the number of classes.
+     *                             Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input
+     * @param[in]  info            Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo.
+     *
+     * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct.
+     *
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info);
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLBoundingBoxTransform
      *
diff --git a/arm_compute/core/CL/kernels/CLBox3x3Kernel.h b/arm_compute/core/CL/kernels/CLBox3x3Kernel.h
index 359b227850..572ae87d9a 100644
--- a/arm_compute/core/CL/kernels/CLBox3x3Kernel.h
+++ b/arm_compute/core/CL/kernels/CLBox3x3Kernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,6 +43,14 @@ public:
      * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
      */
     void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
+    /**Initialise the kernel's input and output.
+     *
+     * @param[in]  compile_context  The compile context to be used.
+     * @param[in]  input            An input tensor. Data types supported: U8
+     * @param[out] output           The output tensor. Data types supported: U8.
+     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
 
     //Inherited methods overriden:
     BorderSize border_size() const override;
diff --git a/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h b/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h
index 2d348dd5a6..67c23dd811 100644
--- a/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h
+++ b/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -54,6 +54,18 @@ public:
      * @param[in]  norm_type Normalization type. if 1, L1-Norm otherwise L2-Norm.
      */
     void configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type);
+    /** Initialise the kernel's sources, destinations and border mode.
+     *
+     * @note gx, gy and mag must all be the same size (either 16 or 32).
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  gx              Source tensor - Gx component. Data types supported: S16/S32.
+     * @param[in]  gy              Source tensor - Gy component. Data types supported: Same as gx.
+     * @param[out] magnitude       Destination tensor - Magnitude. Data types supported: U16/U32. Must match the pixel size of gx, gy.
+     * @param[out] phase           Destination tensor - Quantized phase. Data types supported: U8.
+     * @param[in]  norm_type       Normalization type. if 1, L1-Norm otherwise L2-Norm.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
@@ -90,6 +102,16 @@ public:
      * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
      */
     void configure(const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined);
+    /** Initialise the kernel's sources, destination and border mode.
+     *
+     * @param[in]  compile_context  The compile context to be used.
+     * @param[in]  magnitude        Source tensor - Magnitude. Data types supported: U16/U32.
+     * @param[in]  phase            Source tensor - Quantized phase. Data types supported: U8.
+     * @param[out] output           Destination tensor. Data types supported: U16/U32.
+     * @param[in]  lower_thr        Lower threshold.
+     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
@@ -129,6 +151,24 @@ public:
      */
     void configure(const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr,
                    ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter);
+    /** Initialise the kernel's source, destination and border mode.
+     *
+     * @param[in]     compile_context  The compile context to be used.
+     * @param[in]     input            Source tensor. Data types supported: U8.
+     * @param[out]    output           Destination tensor. Data types supported: U8.
+     * @param[in]     upper_thr        Upper threshold used for the hysteresis
+     * @param[in]     lower_thr        Lower threshold used for the hysteresis
+     * @param[in,out] visited          Tensor for keeping the visited pixels. Data types supported: U32.
+     *                                 Expected to be initialized to 0 before each run.
+     * @param[in,out] recorded         Tensor for keeping the recorded pixels. Data types supported: U32
+     *                                 Expected to be initialized to 0 before each run.
+     * @param[in,out] l1_stack         Tensor with the L1 stack for each pixel. Data types supported: S32.
+     *                                 Expected to be initialized to 0 before each run.
+     * @param[in,out] l1_stack_counter Tensor for counting the elements in the L1 stack of each pixel. Data types supported: U8.
+     *                                              Expected to be initialized to 0 before each run.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr,
+                   ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/CL/kernels/CLChannelCombineKernel.h b/arm_compute/core/CL/kernels/CLChannelCombineKernel.h
index ae5658fba4..60d0bd4a45 100644
--- a/arm_compute/core/CL/kernels/CLChannelCombineKernel.h
+++ b/arm_compute/core/CL/kernels/CLChannelCombineKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -60,6 +60,16 @@ public:
      * @param[out] output The single planar output tensor.
      */
     void configure(const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output);
+    /** Configure function's inputs and outputs.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  plane0          The 2D plane that forms channel 0. Must be of U8 format.
+     * @param[in]  plane1          The 2D plane that forms channel 1. Must be of U8 format.
+     * @param[in]  plane2          The 2D plane that forms channel 2. Must be of U8 format.
+     * @param[in]  plane3          The 2D plane that forms channel 3. Must be of U8 format.
+     * @param[out] output          The single planar output tensor.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output);
     /** Configure function's inputs and outputs.
      *
      * @param[in]  plane0 The 2D plane that forms channel 0. Must be of U8 format.
@@ -68,6 +78,15 @@ public:
      * @param[out] output The multi planar output tensor.
      */
     void configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output);
+    /** Configure function's inputs and outputs.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  plane0          The 2D plane that forms channel 0. Must be of U8 format.
+     * @param[in]  plane1          The 2D plane that forms channel 1. Must be of U8 format.
+     * @param[in]  plane2          The 2D plane that forms channel 2. Must be of U8 format.
+     * @param[out] output          The multi planar output tensor.
+     */
+    void configure(CLCompileContext &compile_context, const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/CL/kernels/CLChannelExtractKernel.h b/arm_compute/core/CL/kernels/CLChannelExtractKernel.h
index 371f17ff2b..1f2cc8900a 100644
--- a/arm_compute/core/CL/kernels/CLChannelExtractKernel.h
+++ b/arm_compute/core/CL/kernels/CLChannelExtractKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -58,6 +58,14 @@ public:
      * @param[out] output  Destination tensor. Must be of U8 format.
      */
     void configure(const ICLTensor *input, Channel channel, ICLTensor *output);
+    /** Set the input and output of the kernel
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422
+     * @param[in]  channel         Channel to extract.
+     * @param[out] output          Destination tensor. Must be of U8 format.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, Channel channel, ICLTensor *output);
     /** Set the input and output of the kernel
      *
      * @param[in]  input   Multi-planar source image. Formats supported: NV12/NV21/IYUV/YUV444
@@ -65,6 +73,14 @@ public:
      * @param[out] output  Single-planar 2D destination image. Must be of U8 format.
      */
     void configure(const ICLMultiImage *input, Channel channel, ICLImage *output);
+    /** Set the input and output of the kernel
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Multi-planar source image. Formats supported: NV12/NV21/IYUV/YUV444
+     * @param[in]  channel         Channel to extract.
+     * @param[out] output          Single-planar 2D destination image. Must be of U8 format.
+     */
+    void configure(CLCompileContext &compile_context, const ICLMultiImage *input, Channel channel, ICLImage *output);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h b/arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h
index 7e6589e8c0..921c20df10 100644
--- a/arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h
@@ -53,6 +53,14 @@ public:
      * @param[in]  num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
      */
     void configure(const ICLTensor *input, ICLTensor *output, unsigned int num_groups);
+    /** Configure function's inputs and outputs.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. Data types supported: All.
+     * @param[out] output          Output tensor. Data type supported: Same as @p input
+     * @param[in]  num_groups      Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int num_groups);
     /** Static function to check if given info will lead to a valid configuration of @ref CLChannelShuffleLayerKernel
      *
      * @param[in] input      Input tensor info. Data types supported: All.
diff --git a/arm_compute/core/CL/kernels/CLCol2ImKernel.h b/arm_compute/core/CL/kernels/CLCol2ImKernel.h
index b22c666a6b..6ef424853e 100644
--- a/arm_compute/core/CL/kernels/CLCol2ImKernel.h
+++ b/arm_compute/core/CL/kernels/CLCol2ImKernel.h
@@ -72,6 +72,16 @@ public:
      * @param[in]  num_groups     (Optional) Number of groups when performing a grouped convolution
      */
     void configure(const ICLTensor *input, ICLTensor *output, const Size2D &convolved_dims, unsigned int num_groups = 1);
+    /** Set the input and output of the kernel.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           The input tensor to convert. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
+     * @param[out] output          The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
+     *                             while the rest represent batch of outputs. Data types supported: Same as @p input. Data layout: NCHW
+     * @param[in]  convolved_dims  Output convolved dimensions.
+     * @param[in]  num_groups      (Optional) Number of groups when performing a grouped convolution
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &convolved_dims, unsigned int num_groups = 1);
     /** Static function to check if given info will lead to a valid configuration of @ref CLCol2ImKernel
      *
      * @param[in] input          The input tensor to convert. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
diff --git a/arm_compute/core/CL/kernels/CLColorConvertKernel.h b/arm_compute/core/CL/kernels/CLColorConvertKernel.h
index 2e23a6234b..25b95eb42c 100644
--- a/arm_compute/core/CL/kernels/CLColorConvertKernel.h
+++ b/arm_compute/core/CL/kernels/CLColorConvertKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -59,24 +59,54 @@ public:
      *                                                          U8 (if the formats of @p input is RGB888)
      */
     void configure(const ICLTensor *input, ICLTensor *output);
+    /** Set the input and output of the kernel
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888
+     * @param[out] output          Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422),
+     *                                                          RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/),
+     *                                                          U8 (if the formats of @p input is RGB888)
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
     /** Set the input and output of the kernel
      *
      * @param[in]  input  Multi-planar source image. Formats supported: NV12/NV21/IYUV
      * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888
      */
     void configure(const ICLMultiImage *input, ICLImage *output);
+    /** Set the input and output of the kernel
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Multi-planar source image. Formats supported: NV12/NV21/IYUV
+     * @param[out] output          Single-planar destination image. Formats supported: RGB888/RGBA8888
+     */
+    void configure(CLCompileContext &compile_context, const ICLMultiImage *input, ICLImage *output);
     /** Set the input and output of the kernel
      *
      * @param[in]  input  Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422
      * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888)
      */
     void configure(const ICLImage *input, ICLMultiImage *output);
+    /** Set the input and output of the kernel
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422
+     * @param[out] output          Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888)
+     */
+    void configure(CLCompileContext &compile_context, const ICLImage *input, ICLMultiImage *output);
     /** Set the input and output of the kernel
      *
      * @param[in]  input  Multi-planar source image. Formats supported: NV12/NV21/IYUV
      * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of  @p input is IYUV)
      */
     void configure(const ICLMultiImage *input, ICLMultiImage *output);
+    /** Set the input and output of the kernel
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Multi-planar source image. Formats supported: NV12/NV21/IYUV
+     * @param[out] output          Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of  @p input is IYUV)
+     */
+    void configure(CLCompileContext &compile_context, const ICLMultiImage *input, ICLMultiImage *output);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/CL/kernels/CLComparisonKernel.h b/arm_compute/core/CL/kernels/CLComparisonKernel.h
index a9c463901d..15779938b2 100644
--- a/arm_compute/core/CL/kernels/CLComparisonKernel.h
+++ b/arm_compute/core/CL/kernels/CLComparisonKernel.h
@@ -56,6 +56,15 @@ public:
      * @param[in]  operation Comparison operation to use.
      */
     void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ComparisonOperation operation);
+    /** Set the inputs and output tensors
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input1          Source tensor. Data types supported: All.
+     * @param[in]  input2          Source tensor. Data types supported: Same as @p input1.
+     * @param[out] output          Destination tensor. Data types supported: U8.
+     * @param[in]  operation       Comparison operation to use.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ComparisonOperation operation);
     /** Static function to check if given info will lead to a valid configuration of @ref CLComparisonKernel
      *
      * @param[in] input1    Source tensor. Data types supported: All.
diff --git a/arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h b/arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h
index b204eaa2ac..f7e212e1e4 100644
--- a/arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h
+++ b/arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h
@@ -61,6 +61,15 @@ public:
      * @param[in]  data_layout          The data layout the weights have been trained in.
      */
     void configure(const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout);
+    /** Set the input and output tensor.
+     *
+     * @param[in]  compile_context      The compile context to be used.
+     * @param[in]  input                Source weights tensor to convert. Must be 2 dimensional. Data types supported: All.
+     * @param[out] output               The converted weights tensor. Shape and Data Type: Same as @p input.
+     * @param[in]  original_input_shape Shape of the original input tensor (the one entering fully connected layer).
+     * @param[in]  data_layout          The data layout the weights have been trained in.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout);
     /** Static function to check if given info will lead to a valid configuration of @ref CLConvertFullyConnectedWeightsKernel
      *
      * @param[in] input                Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All.
diff --git a/arm_compute/core/CL/kernels/CLConvolutionKernel.h b/arm_compute/core/CL/kernels/CLConvolutionKernel.h
index 089a8cd10e..e1cdc88007 100644
--- a/arm_compute/core/CL/kernels/CLConvolutionKernel.h
+++ b/arm_compute/core/CL/kernels/CLConvolutionKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -61,6 +61,16 @@ public:
      * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
      */
     void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined);
+    /** Initialise the kernel's input, output and border mode.
+     *
+     * @param[in]  compile_context  The compile context to be used.
+     * @param[in]  input            Source tensor. Data types supported: U8.
+     * @param[out] output           Destination tensor, Data types supported: U8, S16.
+     * @param[in]  conv             Convolution matrix to apply to the input tensor.
+     * @param[in]  scale            Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
+     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined);
 
     // Inherited methods overridden:
     BorderSize border_size() const override;
@@ -94,6 +104,15 @@ public:
      * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
      */
     void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined);
+    /** Initialise the kernel's input, output and border mode.
+     *
+     * @param[in]  compile_context  The compile context to be used.
+     * @param[in]  input            Source tensor. Data types supported: U8.
+     * @param[out] output           Destination tensor, Data types supported: S16.
+     * @param[in]  conv             Convolution matrix to apply to the input tensor.
+     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined);
 
     // Inherited methods overridden:
     BorderSize border_size() const override;
@@ -124,6 +143,17 @@ public:
      * @param[in]  data_type        Data type to use for intermeidate result. @sa data_type_for_convolution
      */
     void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type = DataType::S32);
+    /** Initialise the kernel's input, output and border mode.
+     *
+     * @param[in]  compile_context  The compile context to be used.
+     * @param[in]  input            Source tensor. Data types supported: S16.
+     * @param[out] output           Destination tensor, Data types supported: U8, S16.
+     * @param[in]  conv             Convolution matrix to apply to the input tensor.
+     * @param[in]  scale            Scale of the convolution matrix.
+     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
+     * @param[in]  data_type        Data type to use for intermeidate result. @sa data_type_for_convolution
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type = DataType::S32);
 
     // Inherited methods overridden:
     BorderSize border_size() const override;
@@ -168,6 +198,18 @@ public:
      * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
      */
     void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined);
+    /** Initialise the kernel's input, output and border mode.
+     *
+     * @param[in]  compile_context  The compile context to be used.
+     * @param[in]  input            Source tensor. Data types supported: U8.
+     * @param[out] output           Destination tensor, Data types supported: U8, S16.
+     * @param[in]  conv             Convolution matrix to apply to the input tensor.
+     * @param[in]  width            Width of convolution matrix (Number of columns)
+     * @param[in]  height           Height of convolution matrix (Number of rows)
+     * @param[in]  scale            Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
+     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/CL/kernels/CLCopyKernel.h b/arm_compute/core/CL/kernels/CLCopyKernel.h
index 50bf38966d..1774f8ccad 100644
--- a/arm_compute/core/CL/kernels/CLCopyKernel.h
+++ b/arm_compute/core/CL/kernels/CLCopyKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -53,6 +53,15 @@ public:
      * @param[in]  output_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr.
      */
     void configure(const ICLTensor *input, ICLTensor *output, const PaddingList &padding = PaddingList(), Window *output_window = nullptr);
+    /** Initialize the kernel's input, output.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[out] output          Destination tensor. Data types supported: same as @p input.
+     * @param[in]  padding         (Optional) Padding to be applied to the input tensor
+     * @param[in]  output_window   (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PaddingList &padding = PaddingList(), Window *output_window = nullptr);
     /** Static function to check if given info will lead to a valid configuration of @ref CLCopyKernel
      *
      * @param[in] input         Source tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
diff --git a/arm_compute/core/CL/kernels/CLCropKernel.h b/arm_compute/core/CL/kernels/CLCropKernel.h
index bcce9c15ea..103986a5f8 100644
--- a/arm_compute/core/CL/kernels/CLCropKernel.h
+++ b/arm_compute/core/CL/kernels/CLCropKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -58,6 +58,21 @@ public:
      * @param[in]  output_window       Output window to be used in case cropped image is being copied into a tensor. Default is nullptr.
      */
     void configure(const ICLTensor *input, ICLTensor *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0, Window *output_window = nullptr);
+    /** Configure kernel
+     *
+     * @note Supported tensor rank: up to 4
+     *
+     * @param[in]  compile_context     The compile context to be used.
+     * @param[in]  input               Source tensor. Data type supported: U16/S16/U32/S32/F16/F32. Data layouts supported: NHWC.
+     * @param[out] output              Destination tensor. Data type supported: F32
+     * @param[in]  start               Coordinates of where to start cropping the image.
+     * @param[in]  end                 Coordinates of where to end cropping the image.
+     * @param[in]  batch_index         Fourth dimension index of the 3D image to crop in @p input.
+     * @param[in]  extrapolation_value Value to be used for values outside of the image. Default is 0.
+     * @param[in]  output_window       Output window to be used in case cropped image is being copied into a tensor. Default is nullptr.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0,
+                   Window *output_window = nullptr);
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLStridedSliceKernel
      *
diff --git a/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h b/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h
index a1c6bbdafe..7e8a45fd8f 100644
--- a/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h
+++ b/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h
@@ -55,6 +55,14 @@ public:
      * @param[in]  info   Contains padding and stride information described in @ref PadStrideInfo.
      */
     void configure(const ICLTensor *input, ICLTensor *output, const PadStrideInfo &info);
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: All.
+     * @param[out] output          Destination tensor. Data types supported: same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
+     * @param[in]  info            Contains padding and stride information described in @ref PadStrideInfo.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PadStrideInfo &info);
     /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayerUpsample
      *
      * @param[in] input  Source tensor info. Data types supported: All.
diff --git a/arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h b/arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h
index 6c90bd6c7f..daeb8c1f9c 100644
--- a/arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h
+++ b/arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h
@@ -68,6 +68,19 @@ public:
      * @param[in]  deconv_info  Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. This kernel supports only stride_x = weights.width && stride_y = weights.height. Moreover, padding is not supported.
      */
     void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const ITensorInfo *input_info, const ITensorInfo *weights_info, const PadStrideInfo &deconv_info);
+    /** Initialise the kernel's source and destination.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32.
+     * @param[in]  bias            Bias tensor to be added directly during the reshape operation. Supported data types: same as @p input.  Supported data layouts: same as @p input.
+     * @param[out] output          Output tensor with the following shape: [stride_x * (input_width - 1) + filter_width - 2 * padx, stride_y * (input_height - 1) + filter_height - 2 * pady, ofms, batch_size]
+     *                             Supported data types: same as @p input.  Supported data layouts: same as @p input.
+     * @param[in]  input_info      Deconvolution input tensor info. Supported data types: same as @p input.  Supported data layouts: same as @p input.
+     * @param[in]  weights_info    Deconvolution weights tensor info. Supported data types: same as @p input.  Supported data layouts: same as @p input.
+     * @param[in]  deconv_info     Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. This kernel supports only stride_x = weights.width && stride_y = weights.height. Moreover, padding is not supported.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const ITensorInfo *input_info, const ITensorInfo *weights_info,
+                   const PadStrideInfo &deconv_info);
 
     /** Static function to check if given info will lead to a valid configuration of @ref  CLDeconvolutionReshapeOutputKernel.
      *
diff --git a/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h
index e55dd5dee8..7b594417d6 100644
--- a/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h
@@ -61,6 +61,18 @@ public:
      *
      */
     void configure(const ICLTensor *input, unsigned int depth_offset, ICLTensor *output);
+    /** Initialise the kernel's inputs and output
+     *
+     * @param[in]     compile_context The compile context to be used.
+     * @param[in]     input           Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[in]     depth_offset    The offset on the Z axis.
+     * @param[in,out] output          Output tensor. Data types supported: Same as @p input.
+     *
+     * @note: The output tensor's low two dimensions can't be smaller than the input one's.
+     * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
+     *
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, unsigned int depth_offset, ICLTensor *output);
     /**  Static function to check if given info will lead to a valid configuration of @ref CLDepthConcatenateLayerKernel
      *
      * @param[in] input        Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
diff --git a/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h b/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h
index fb65aa5392..8bbf9b3dce 100644
--- a/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -56,6 +56,26 @@ public:
      * @param[in]  shift  Value for down/up conversions. Must be 0 <= shift < 8.
      */
     void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift);
+    /** Set the input and output of the kernel.
+     *
+     * Valid conversions Input -> Output :
+     *
+     *   - QSYMM8_PER_CHANNEL -> QASYMM8 (ATTENTION: it is the user's responsibility to keep track of the quantization info in the TensorInfo meta-data)
+     *   - U8  -> S8, U16, S16, U32, S32, F16, F32
+     *   - U16 -> U8, S8, S16, U32, S32, F16, F32
+     *   - S16 -> U8, S8, U16, U32, S32, F16, F32
+     *   - U32 -> U8, S8, U16, S16, S32, F16, F32
+     *   - S32 -> U8, S8, U16, S16, U32, F16, F32
+     *   - F16 -> U8, S8, U16, S16, U32, F32
+     *   - F32 -> U8, S8, U16, S16, U32, F16
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           The input tensor to convert. Data types supported: U8/S8/QSYMM8_PER_CHANNEL/U16/S16/U32/S32/F16/F32.
+     * @param[out] output          The output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+     * @param[in]  policy          Conversion policy
+     * @param[in]  shift           Value for down/up conversions. Must be 0 <= shift < 8.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift);
     /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConvertLayerKernel
      *
      * @param[in] input  Source tensor info. Data types supported: U8/S8/QSYMM8_PER_CHANNEL/U16/S16/U32/S32/F16/F32.
diff --git a/arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h b/arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h
index 637e5fa960..541506b521 100644
--- a/arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h
@@ -54,6 +54,14 @@ public:
      * @param[in]  block_shape Block shape value.
      */
     void configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape);
+    /** Initialise the kernel's inputs and output.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Tensor input. Supported tensor rank: 4. Data types supported: All.
+     * @param[out] output          Tensor output. Data types supported: same as @p input
+     * @param[in]  block_shape     Block shape value.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape);
     /** Static function to check if given info will lead to a valid configuration of @ref CLDepthToSpaceLayerKernel.
      *
      * @param[in] input       Tensor input info. Supported tensor rank: 4. Data types supported: All.
diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h
index 3d1b91c81f..f68fde4737 100644
--- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h
+++ b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -57,6 +57,27 @@ public:
     void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
                    unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
                    const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override;
+    /** Initialize the function's source, destination, conv and border_size.
+     *
+     * @param[in]  compile_context    The compile context to be used.
+     * @param[in]  input              Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[in]  weights            Weights tensor. A 3D tensor with dimensions [3, 3, IFM].
+     *                                Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
+     * @param[in]  biases             Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+     *                                Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
+     * @param[out] output             Destination tensor. Data type supported: Same as @p input.
+     * @param[in]  conv_info          Padding and stride information to use for the convolution.
+     * @param[in]  depth_multiplier   (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+     * @param[in]  act_info           (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for QASYMM8 supported.
+     * @param[in]  dilation           (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+     * @param[in]  output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
+     *                                the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+     * @param[in]  output_shifts      (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
+     *                                the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+                   unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
+                   const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override;
     /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NCHWKernel
      *
      * @param[in] input              Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h
index 9e74be76d8..f9fda0a42c 100644
--- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h
+++ b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -58,6 +58,27 @@ public:
     void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
                    unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
                    const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override;
+    /** Initialize the function's source, destination, conv and border_size.
+     *
+     * @param[in]  compile_context    The compile context to be used.
+     * @param[in]  input              Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED.
+     * @param[in]  weights            Weights tensor. A 3D tensor with dimensions [IFM, 3, 3].
+     *                                Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
+     * @param[in]  biases             Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+     *                                Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
+     * @param[out] output             Destination tensor. Data type supported: Same as @p input.
+     * @param[in]  conv_info          Padding and stride information to use for the convolution.
+     * @param[in]  depth_multiplier   (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+     * @param[in]  act_info           (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported.
+     * @param[in]  dilation           (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+     * @param[in]  output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
+     *                                the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+     * @param[in]  output_shifts      (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
+     *                                the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+                   unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
+                   const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override;
     /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NHWCKernel
      *
      * @param[in] input              Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED.
diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h
index 7e19ed6285..db26b4a06f 100644
--- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h
+++ b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h
@@ -68,6 +68,28 @@ public:
     void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCWeightsKernelInfo &dwc_weights_info,
                    const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U),
                    const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr);
+    /** Initialize the function's source, destination and parameters
+     *
+     * @param[in]  compile_context    The compile context to be used.
+     * @param[in]  input              Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/FP32/FP16. Data layout supported: NHWC
+     * @param[in]  weights            Weights tensor. A 3D tensor with dimensions [IFM, N, M].
+     *                                Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+     * @param[in]  biases             Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+     *                                Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
+     * @param[out] output             Destination tensor. Data type supported: Same as @p input.
+     * @param[in]  dwc_weights_info   Depthwise convolution layer weights info to retrieve the number of output elements processed by each thread
+     * @param[in]  dwc_info           Depthwise convolution layer info
+     * @param[in]  conv_info          Padding and stride information to use for the convolution.
+     * @param[in]  depth_multiplier   (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+     * @param[in]  dilation           (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+     * @param[in]  output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
+     *                                the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+     * @param[in]  output_shifts      (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
+     *                                the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCWeightsKernelInfo &dwc_weights_info,
+                   const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U),
+                   const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr);
     /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayerNativeKernel
      *
      * @param[in] input              Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/FP32/FP16. Data layout supported: NHWC
diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h
index 97225c7c33..e7fc6f8d81 100644
--- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h
+++ b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h
@@ -52,6 +52,14 @@ public:
      * @param[in]  info   Depthwise convolution information to reshape the input tensor.
      */
     void configure(const ICLTensor *input, ICLTensor *output, const DepthwiseConvolutionReshapeInfo &info);
+    /** Initialize the function's source and destination.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           The input tensor of dimension [IFM, W, H]. Data types supported: All. Data layouts supported: NHWC
+     * @param[out] output          The output tensor of dimension [W*H*C0, ceil(IFM/C0)]. C0 is the number of channels read by each thread. Data types supported: same as @p weights.
+     * @param[in]  info            Depthwise convolution information to reshape the input tensor.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const DepthwiseConvolutionReshapeInfo &info);
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NHWCKernel
      *
diff --git a/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h b/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h
index 78b5c14128..4cb1339300 100644
--- a/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h
@@ -52,6 +52,13 @@ public:
      * @param[out] output Destination tensor. Data types supported: F16/F32.
      */
     void configure(const ICLTensor *input, ICLTensor *output);
+    /** Set the input, output, min and max.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
+     * @param[out] output          Destination tensor. Data types supported: F16/F32.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLDequantizationLayerKernel
      *
      * @param[in] input  Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
diff --git a/arm_compute/core/CL/kernels/CLDerivativeKernel.h b/arm_compute/core/CL/kernels/CLDerivativeKernel.h
index d6be5c2b86..5d5ad860f3 100644
--- a/arm_compute/core/CL/kernels/CLDerivativeKernel.h
+++ b/arm_compute/core/CL/kernels/CLDerivativeKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -56,6 +56,17 @@ public:
      * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
      */
     void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
+    /** Initialise the kernel's sources, destination and border
+     *
+     * @note At least one of output_x or output_y must be set
+     *
+     * @param[in]  compile_context  The compile context to be used.
+     * @param[in]  input            Source tensor. Data types supported: U8.
+     * @param[out] output_x         (Optional) Destination tensor for the X gradient, Data types supported: S16.
+     * @param[out] output_y         (Optional) Destination tensor for the Y gradient, Data types supported: S16.
+     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/CL/kernels/CLDilateKernel.h b/arm_compute/core/CL/kernels/CLDilateKernel.h
index d131b34e00..9c41a84b31 100644
--- a/arm_compute/core/CL/kernels/CLDilateKernel.h
+++ b/arm_compute/core/CL/kernels/CLDilateKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,6 +43,14 @@ public:
      * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
      */
     void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
+    /**Initialise the kernel's input and output.
+     *
+     * @param[in]  compile_context  The compile context to be used.
+     * @param[in]  input            An input tensor. Data types supported: U8
+     * @param[out] output           The output tensor. Data types supported: U8.
+     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
 
     // Inherited methods overridden:
     BorderSize border_size() const override;
diff --git a/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h b/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h
index 5bf9a5d57f..f1409b6339 100644
--- a/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h
@@ -68,6 +68,27 @@ public:
      * @param[in]  conv_info Contains padding and stride information described in @ref PadStrideInfo.
      */
     void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info);
+    /** Set the input, weights, biases and output tensors.
+     *
+     * @note: DirectConvolution only works in the following configurations:
+     *        1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3
+     *        3x3 convolution with stride_x = 1/2, stride_y = 1/2
+     *        5x5 convolution with stride_x = 1/2, stride_y = 1/2
+     *        9x9 convolution with stride_x = 1/2, stride_y = 1/2, data_layout=NHWC
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM],
+     *                             while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+     * @param[in]  weights         Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+     *                             The 3rd dimension must be the same as the input's volume 3rd dimension.
+     *                             Data type supported:Same as @p input.
+     * @param[in]  biases          Biases tensor. Biases are 1D tensor with dimension [OFM].
+     *                             Data type supported: Should match @p input data type, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
+     * @param[out] output          Output tensor.
+     *                             The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input.
+     * @param[in]  conv_info       Contains padding and stride information described in @ref PadStrideInfo.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info);
     /** Static function to check if given info will lead to a valid configuration of @ref CLDirectConvolutionLayerKernel
      *
      * @param[in] input     The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM],
diff --git a/arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h b/arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h
index 0e4d2ec0be..1f76992b96 100644
--- a/arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,6 +43,14 @@ public:
      * @param[in]  op     Element wise unary operation to perform.
      */
     void configure(const ICLTensor *input, ICLTensor *output, const ElementWiseUnary &op);
+    /** Initialise the kernel's inputs, output.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           First tensor input. Data types supported: F16/F32.
+     * @param[out] output          Output tensor. Data types supported: Same as @p input.
+     * @param[in]  op              Element wise unary operation to perform.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ElementWiseUnary &op);
     /** Static function to check if given info will lead to a valid configuration of @ref CLElementWiseUnaryLayerKernel
      *
      * @param[in] input  First tensor input info. Data types supported: F16/F32.
diff --git a/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h b/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h
index 85961f28bc..2f1060126a 100644
--- a/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h
+++ b/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h
@@ -96,6 +96,10 @@ protected:
      *
      */
     void configure_common(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+    /** Commmon configure function for element-wise operators with no additional options (e.g., Div, Min, Max, SquaredDiff)
+     *
+     */
+    void configure_common(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
 
     ActivationLayerInfo _act_info;
 
@@ -124,6 +128,18 @@ public:
      * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
      */
     void configure(ArithmeticOperation op, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ConvertPolicy &policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+    /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel
+     *
+     * @param[in] compile_context The compile context to be used.
+     * @param[in] op              Arithmetic operation to be executed.
+     * @param[in] input1          First tensor input. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/QSYMM16/F16/U32/S32/F32.
+     * @param[in] input2          Second tensor input. Data types supported: Same as @p input1.
+     * @param[in] output          Output tensor. Data types supported: Same as @p input1.
+     * @param[in] policy          Policy to use to handle overflow.
+     * @param[in] act_info        (Optional) Activation layer information in case of a fused activation.
+     */
+    void configure(CLCompileContext &compile_context, ArithmeticOperation op, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ConvertPolicy &policy,
+                   const ActivationLayerInfo &act_info = ActivationLayerInfo());
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel
      *
@@ -169,6 +185,17 @@ public:
      * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
      */
     void configure(ArithmeticOperation op, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+    /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel
+     *
+     * @param[in] compile_context The compile context to be used.
+     * @param[in] op              Arithmetic operation to be executed.
+     * @param[in] input1          First tensor input. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/QSYMM16/F16/U32/S32/F32.
+     * @param[in] input2          Second tensor input. Data types supported: Same as @p input1.
+     * @param[in] output          Output tensor. Data types supported: Same as @p input1.
+     * @param[in] act_info        (Optional) Activation layer information in case of a fused activation.
+     */
+    void configure(CLCompileContext &compile_context, ArithmeticOperation op, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output,
+                   const ActivationLayerInfo &act_info = ActivationLayerInfo());
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel
      *
diff --git a/arm_compute/core/CL/kernels/CLErodeKernel.h b/arm_compute/core/CL/kernels/CLErodeKernel.h
index 4fcf70b0e6..8ba6ff8408 100644
--- a/arm_compute/core/CL/kernels/CLErodeKernel.h
+++ b/arm_compute/core/CL/kernels/CLErodeKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,6 +43,14 @@ public:
      * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
      */
     void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
+    /**Initialise the kernel's input and output.
+     *
+     * @param[in]  compile_context  The compile context to be used.
+     * @param[in]  input            An input tensor. Data types supported: U8
+     * @param[out] output           The output tensor. Data types supported: U8.
+     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
 
     // Inherited methods overridden:
     BorderSize border_size() const override;
diff --git a/arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h b/arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h
index 42eff76d21..eac03ff868 100644
--- a/arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h
+++ b/arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -57,6 +57,15 @@ public:
      * @param[in]  config Kernel configuration.
      */
     void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: F32.
+     * @param[out] output          Destination tensor. Data type supported: same as @p input
+     * @param[in]  idx             Digit reverse index tensor. Data type supported: U32
+     * @param[in]  config          Kernel configuration.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config);
     /** Static function to check if given info will lead to a valid configuration of @ref CLFFTDigitReverseKernel
      *
      * @param[in] input  Source tensor info. Data types supported: F32.
diff --git a/arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h b/arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h
index b88ab1af67..85bf4cce66 100644
--- a/arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h
+++ b/arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -60,6 +60,16 @@ public:
      * @param[in]     config FFT descriptor metadata.
      */
     void configure(ICLTensor *input, ICLTensor *output, const FFTRadixStageKernelInfo &config);
+    /** Set the input and output tensors.
+     *
+     * @note If the output tensor is nullptr, the FFT will be performed in-place
+     *
+     * @param[in]     compile_context The compile context to be used.
+     * @param[in,out] input           Source tensor. Data types supported: F32.
+     * @param[out]    output          Destination tensor. Can be nullptr. Data type supported: same as @p input
+     * @param[in]     config          FFT descriptor metadata.
+     */
+    void configure(CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const FFTRadixStageKernelInfo &config);
     /** Static function to check if given info will lead to a valid configuration of @ref CLFFTRadixStageKernel
      *
      * @param[in] input  Source tensor info. Data types supported: F32.
diff --git a/arm_compute/core/CL/kernels/CLFFTScaleKernel.h b/arm_compute/core/CL/kernels/CLFFTScaleKernel.h
index 3a069fe6c0..cd4fe58b9c 100644
--- a/arm_compute/core/CL/kernels/CLFFTScaleKernel.h
+++ b/arm_compute/core/CL/kernels/CLFFTScaleKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -56,6 +56,14 @@ public:
      * @param[in]     config Kernel configuration
      */
     void configure(ICLTensor *input, ICLTensor *output, const FFTScaleKernelInfo &config);
+    /** Set the input and output tensors.
+     *
+     * @param[in]     compile_context The compile context to be used.
+     * @param[in,out] input           Source tensor. Data types supported: F32.
+     * @param[out]    output          Destination tensor. Data type supported: same as @p input
+     * @param[in]     config          Kernel configuration
+     */
+    void configure(CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const FFTScaleKernelInfo &config);
     /** Static function to check if given info will lead to a valid configuration of @ref CLFFTScaleKernel
      *
      * @param[in] input  Source tensor info. Data types supported: F32.
diff --git a/arm_compute/core/CL/kernels/CLFastCornersKernel.h b/arm_compute/core/CL/kernels/CLFastCornersKernel.h
index 3cca39e007..2a6102036f 100644
--- a/arm_compute/core/CL/kernels/CLFastCornersKernel.h
+++ b/arm_compute/core/CL/kernels/CLFastCornersKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -66,6 +66,16 @@ public:
      * @param[in]  border_mode         Strategy to use for borders.
      */
     void configure(const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode);
+    /** Initialise the kernel.
+     *
+     * @param[in]  compile_context     The compile context to be used.
+     * @param[in]  input               Source image. Data types supported: U8.
+     * @param[out] output              Output image. Data types supported: U8.
+     * @param[in]  threshold           Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3.
+     * @param[in]  non_max_suppression True if non-maxima suppresion is applied, false otherwise.
+     * @param[in]  border_mode         Strategy to use for borders.
+     */
+    void configure(CLCompileContext &compile_context, const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode);
 
     // Inherited methods overridden
     void run(const Window &window, cl::CommandQueue &queue) override;
@@ -101,6 +111,15 @@ public:
      * @param[out] num_buffers   Number of keypoints to store the results.
      */
     void configure(const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers);
+    /** Initialise the kernel.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source image. Data types supported: U8.
+     * @param[in]  update_number   Flag to indicate whether we need to update the number of corners
+     * @param[out] corners         Array of keypoints to store the results.
+     * @param[out] num_buffers     Number of keypoints to store the results.
+     */
+    void configure(CLCompileContext &compile_context, const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/CL/kernels/CLFillBorderKernel.h b/arm_compute/core/CL/kernels/CLFillBorderKernel.h
index 36f54c54d4..226b611bcb 100644
--- a/arm_compute/core/CL/kernels/CLFillBorderKernel.h
+++ b/arm_compute/core/CL/kernels/CLFillBorderKernel.h
@@ -57,6 +57,15 @@ public:
      * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
      */
     void configure(ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue());
+    /** Initialise the kernel's input, output and border mode.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] tensor                Tensor to process Data types supported: U8/QASYMM8/S8/QASYMM8_SIGNED/U16/S16/U32/S32/F16/F32.
+     * @param[in]     border_size           Size of the border to fill in elements.
+     * @param[in]     border_mode           Border mode to use for the convolution.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(CLCompileContext &compile_context, ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue());
 
     /** Function to set the constant value on fill border kernel depending on type.
      *
diff --git a/arm_compute/core/CL/kernels/CLFlattenLayerKernel.h b/arm_compute/core/CL/kernels/CLFlattenLayerKernel.h
index 1b7fdcc54f..b795e03a34 100644
--- a/arm_compute/core/CL/kernels/CLFlattenLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLFlattenLayerKernel.h
@@ -52,6 +52,15 @@ public:
      *                    w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
      */
     void configure(const ICLTensor *input, ICLTensor *output);
+    /** Set the input and output of the kernel.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           First input tensor to flatten with at least 3 dimensions.
+     *                             The dimensions above the third will be interpreted as batches. Data types supported: All.
+     * @param[out] output          Output tensor with shape [w*h*d, input_batches] where:
+     *                    w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLFlattenLayerKernel
      *
      * @param[in]  input  First input tensor to flatten with at least 3 dimensions.
diff --git a/arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h b/arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h
index aa60376768..2d62a576bb 100644
--- a/arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h
+++ b/arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -65,6 +65,25 @@ public:
     void configure(const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var, ICLTensor *fused_weights, ICLTensor *fused_bias,
                    const ICLTensor *input_bias = nullptr, const ICLTensor *bn_beta = nullptr, const ICLTensor *bn_gamma = nullptr,
                    float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
+    /** Set the source, destination of the kernel
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input_weights   Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
+     * @param[in]  bn_mean         Batch normalization layer mean tensor. Same as @p input_weights
+     * @param[in]  bn_var          Batch normalization layer variance tensor. Same as @p input_weights
+     * @param[out] fused_weights   Output fused weights tensor. It can be a nullptr in case of in-place computation. Same as @p input_weights
+     * @param[out] fused_bias      Output fused bias tensor. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights
+     * @param[in]  input_bias      (Optional) Input bias tensor for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights
+     * @param[in]  bn_beta         (Optional) Batch normalization layer beta tensor. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights
+     *                             @note if nullptr, bn_beta is set to 0.0
+     * @param[in]  bn_gamma        (Optional) Batch normalization layer gamma tensor. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights
+     *                             @note if nullptr, bn_gamma is set to 1.0
+     * @param[in]  epsilon         (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f.
+     * @param[in]  fbn_type        (Optional) Fused batch normalization type. Defaults to CONVOLUTION.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var, ICLTensor *fused_weights, ICLTensor *fused_bias,
+                   const ICLTensor *input_bias = nullptr, const ICLTensor *bn_beta = nullptr, const ICLTensor *bn_gamma = nullptr,
+                   float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
     /** Static function to check if given info will lead to a valid configuration of @ref CLFuseBatchNormalizationKernel
      *
      * @param[in] input_weights Input weights tensor info for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h
index a8853d4c0c..e926f5ed36 100644
--- a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -64,6 +64,17 @@ public:
      * @param[in]  gemm_info (Optional) GEMM information used to retrieve the original dimensions of the input matrices
      */
     void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMReshapeInfo &gemm_info = GEMMReshapeInfo());
+    /** Initialise the kernel's input and output.
+     *
+     * @note This kernel should be used ONLY for Midgard architectures
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input0          Input tensor containing the LHS matrix. Data type supported: QASYMM8
+     * @param[in]  input1          Input tensor containing the RHS matrix. Data type supported: same as @p input0
+     * @param[out] output          Output tensor to store the result of matrix multiplication. Data type supported: S32
+     * @param[in]  gemm_info       (Optional) GEMM information used to retrieve the original dimensions of the input matrices
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMReshapeInfo &gemm_info = GEMMReshapeInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyKernel
      *
      * @param[in] input0    Input tensor containing the LHS matrix. Data type supported: QASYMM8
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h
index e1191f265e..d100efdcb7 100644
--- a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h
@@ -58,6 +58,22 @@ public:
      * @param[in]  gemm_info GEMM information used to retrieve the original dimensions of the input matrices
      */
     void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMReshapeInfo &gemm_info);
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input0          Input tensor containing the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED
+     * @param[in]  input1          Input tensor containing the RHS matrix. Data type supported: same as @p input0
+     * @param[out] output          Output tensor to store the result of matrix multiplication. Data type supported: S32
+     * @param[in]  lhs_info        LHS matrix information used to retrieve the number of rows to be processed by each thread
+     *                             lhs_info.m0: 2,3,4,5,6,7,8
+     *                             lhs_info.k0: 2,3,4,8,16
+     * @param[in]  rhs_info        RHS matrix information used to retrieve the number of columns to be processed by each thread
+     *                             rhs_info.n0: 2,3,4,8,16
+     *                             rhs_info.k0: same as lhs_info.k0
+     * @param[in]  gemm_info       GEMM information used to retrieve the original dimensions of the input matrices
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
+                   const GEMMReshapeInfo &gemm_info);
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyNativeKernel
      *
      * @param[in] input0    Input tensor info for the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h
index 64a98128ce..9e3b198c8c 100644
--- a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h
@@ -65,6 +65,26 @@ public:
      * @note lhs_info.k0 must be equal to rhs_info.k0
      */
     void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMReshapeInfo &gemm_info);
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input0          Input tensor containing the LHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED. The number of dimensions for the LHS matrix must be less or equal than 4.
+     * @param[in]  input1          Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
+     * @param[out] output          Output tensor to store the result of matrix multiplication. Data type supported: S32
+     * @param[in]  lhs_info        LHS matrix information used for reshaping the input0 tensor.  Only the following values are supported:
+     *                             lhs_info.m0: 2,3,4,5,6,7,8
+     *                             lhs_info.k0: 2,3,4,8,16
+     *                             lhs_info.transpose: false
+     * @param[in]  rhs_info        RHS matrix information used for reshaping the input1 tensor.  Only the following values are supported:
+     *                             rhs_info.n0: 2,3,4,8,16
+     *                             rhs_info.k0: same as lhs_info.k0
+     *                             rhs_info.transpose: true
+     * @param[in]  gemm_info       GEMM information used to retrieve the original dimensions of the input matrices
+     *
+     * @note lhs_info.k0 must be equal to rhs_info.k0
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
+                   const GEMMReshapeInfo &gemm_info);
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyReshapedKernel
      *
      * @param[in] input0    Input tensor info containing the LHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED. The number of dimensions for the LHS matrix must be less or equal than 4.
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h
index 7845d244eb..cc3c5f5186 100644
--- a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h
@@ -75,6 +75,33 @@ public:
      */
     void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMKernelInfo &gemm_info, const ICLTensor *vector_sum_col = nullptr,
                    const ICLTensor *vector_sum_row = nullptr, const ICLTensor *bias = nullptr, const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr);
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]  compile_context    The compile context to be used.
+     * @param[in]  input0             Input tensor containing the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED
+     * @param[in]  input1             Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0
+     * @param[out] output             Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/S32.
+     * @param[in]  gemm_info          GEMM information used to retrieve the original dimensions of the input matrices, output stage information and RHS/LHS info.
+     *                                Only the following values are supported for LHS info:
+     *                                lhs_info.m0: 2,3,4,5,6,7,8
+     *                                lhs_info.k0: 2,3,4,8,16
+     *                                Only the following values are supported for RHS info:
+     *                                rhs_info.n0: 2,3,4,8,16
+     *                                rhs_info.k0: same as lhs_info.k0
+     *                                rhs_info.transpose: true
+     * @param[in]  vector_sum_col     (Optional) Input row-vector of sums of all the entries in each column of matrix B.
+     *                                Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: S32
+     * @param[in]  vector_sum_row     (Optional) Input row-vector of sums of all the entries in each row of matrix A.
+     *                                Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: S32
+     * @param[in]  bias               (Optional) Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
+     *                                Biases are 1D tensor with dimensions [OFM]. Data type supported: S32.
+     * @param[in]  output_multipliers (Optional) Output multipliers tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
+     *                                Supported data types: S32.
+     * @param[in]  output_shifts      (Optional) Output shifts tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
+     *                                Supported data types: S32.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMKernelInfo &gemm_info, const ICLTensor *vector_sum_col = nullptr,
+                   const ICLTensor *vector_sum_row = nullptr, const ICLTensor *bias = nullptr, const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr);
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel
      *
      * @param[in] input0             Input tensor info for the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h
index f0291044a1..d7266b2805 100644
--- a/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -70,6 +70,22 @@ public:
      * @param[in]      b_offset       Offset to be added to each element of the matrix B.
      */
     void configure(ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, int32_t k, int32_t a_offset, int32_t b_offset);
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] mm_result       Input tensor containing the result of @ref CLGEMMLowpMatrixMultiplyKernel. Data type supported: S32
+     * @param[in]      vector_sum_col  Input row-vector of sums of all the entries in each column of matrix B.
+     *                                 Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
+     * @param[in]      vector_sum_row  Input row-vector of sums of all the entries in each row of matrix A.
+     *                                 Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
+     * @param[in]      bias            Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
+     *                                 Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+     * @param[in]      k               Number of matrix A columns or Matrix B rows
+     * @param[in]      a_offset        Offset to be added to each element of the matrix A.
+     * @param[in]      b_offset        Offset to be added to each element of the matrix B.
+     */
+    void configure(CLCompileContext &compile_context, ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, int32_t k, int32_t a_offset,
+                   int32_t b_offset);
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpOffsetContributionKernel
      *
      * @param[in] mm_result      Input tensor containing the result of @ref CLGEMMLowpOffsetContributionKernel. Data type supported: S32
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h
index 4094bc681e..02ed20e5af 100644
--- a/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h
@@ -71,6 +71,29 @@ public:
      */
     void configure(const ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, ICLTensor *output, int32_t k, int32_t a_offset, int32_t b_offset,
                    const GEMMLowpOutputStageInfo &output_stage, const ICLTensor *output_multipliers, const ICLTensor *output_shifts);
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]  compile_context    The compile context to be used.
+     * @param[in]  mm_result          Input tensor containing the result of @ref CLGEMMLowpMatrixMultiplyKernel. Data type supported: S32
+     * @param[in]  vector_sum_col     Input row-vector of sums of all the entries in each column of matrix B.
+     *                                Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
+     * @param[in]  vector_sum_row     Input row-vector of sums of all the entries in each row of matrix A.
+     *                                Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
+     * @param[in]  bias               Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
+     *                                Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+     * @param[out] output             Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED.
+     * @param[in]  k                  Number of matrix A columns or Matrix B rows
+     * @param[in]  a_offset           Offset to be added to each element of the matrix A.
+     * @param[in]  b_offset           Offset to be added to each element of the matrix B.
+     * @param[in]  output_stage       GEMMLowp output stage info
+     * @param[in]  output_multipliers Output multipliers tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
+     *                                Supported data types: S32
+     * @param[in]  output_shifts      Output shifts tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
+     *                                Supported data types: S32
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, ICLTensor *output, int32_t k,
+                   int32_t a_offset, int32_t b_offset,
+                   const GEMMLowpOutputStageInfo &output_stage, const ICLTensor *output_multipliers, const ICLTensor *output_shifts);
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpOffsetContributionKernel
      *
      * @param[in] mm_result          Input tensor containing the result of @ref CLGEMMLowpOffsetContributionKernel. Data type supported: S32
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h
index 439f569d07..0b5b22cafc 100644
--- a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h
@@ -67,6 +67,16 @@ public:
      * @param[in]  info   Output stage info. Used to pass the quantized output data type
      */
     void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *info);
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. Data type supported: S32
+     * @param[in]  bias            Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
+     *                             Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+     * @param[out] output          Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
+     * @param[in]  info            Output stage info. Used to pass the quantized output data type
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *info);
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel
      *
      * @param[in] input  Input tensor. Data type supported: S32
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h
index 3378359d29..0d7d1c3390 100644
--- a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h
@@ -67,6 +67,16 @@ public:
      * @param[in]  output_stage GEMMLowp output stage metadata.
      */
     void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *output_stage);
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. Data type supported: S32
+     * @param[in]  bias            Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
+     *                             Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+     * @param[out] output          Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
+     * @param[in]  output_stage    GEMMLowp output stage metadata.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *output_stage);
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ScaleKernel
      *
      * @param[in] input        Input tensor. Data type supported: S32
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h
index 72ca4c5455..2845d9259e 100644
--- a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -68,6 +68,20 @@ public:
      *                                          Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0.
      */
     void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int min = 0, int max = 0);
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]  compile_context              The compile context to be used.
+     * @param[in]  input                        Input tensor. Data type supported: S32
+     * @param[in]  bias                         Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
+     *                                          Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+     * @param[out] output                       Output tensor. Data type supported: Data type supported: QSYMM16
+     * @param[in]  result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
+     * @param[in]  result_shift                 Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication
+     * @param[in]  min                          (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0.
+     * @param[in]  max                          (Optional) Max value used to saturate up the output result before converting back to QSYMM16.
+     *                                          Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int min = 0, int max = 0);
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel
      *
      * @param[in] input  Input tensor info. Data type supported: S32
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h
index 22ac8fae4a..a768b6fba0 100644
--- a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -70,6 +70,22 @@ public:
      */
     void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
                    int min = 0, int max = 0);
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]  compile_context              The compile context to be used.
+     * @param[in]  input                        Input tensor. Data type supported: S32
+     * @param[in]  bias                         Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
+     *                                          Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+     * @param[out] output                       Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED
+     * @param[in]  result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
+     * @param[in]  result_shift                 Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication
+     * @param[in]  result_offset_after_shift    Offset to be applied to result before converting it back to QASYMM8_SIGNED
+     * @param[in]  min                          (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to 0
+     * @param[in]  max                          (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED. Defaults to 0
+     *                                          Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
+                   int min = 0, int max = 0);
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
      *
      * @param[in] input  Input tensor. Data type supported: S32
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h
index e8066e0c3e..e319c32c78 100644
--- a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -70,6 +70,22 @@ public:
      */
     void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
                    int min = 0, int max = 0);
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]  compile_context              The compile context to be used.
+     * @param[in]  input                        Input tensor. Data type supported: S32
+     * @param[in]  bias                         Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
+     *                                          Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+     * @param[out] output                       Output tensor. Data type supported: Data type supported: QASYMM8
+     * @param[in]  result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
+     * @param[in]  result_shift                 Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication
+     * @param[in]  result_offset_after_shift    Offset to be applied to result before converting it back to QASYMM8
+     * @param[in]  min                          (Optional) Min value used to saturate down the output result before converting back to QASYMM8
+     * @param[in]  max                          (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
+     *                                          Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
+                   int min = 0, int max = 0);
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
      *
      * @param[in] input  Input tensor. Data type supported: S32
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h
index 71681cf628..4b610fa6d0 100644
--- a/arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h
@@ -57,6 +57,18 @@ public:
      *                    - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
      */
     virtual void configure(const ICLTensor *input, ICLTensor *output, const GEMMLowpReductionKernelInfo &info) = 0;
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. Data type supported: S8
+     * @param[out] output          Output row-vector of sums of all the entries in each row/col of input tensor. Data type supported: S32
+     * @param[in]  info            Kernel metadata:
+     *                             - k            Number of matrix columns/rows depending on the type of reduction.
+     *                             - is_reshaped  True if the matrix has been reshaped.
+     *                             - scalar       Scalar value to multiply each reduced column/row by.
+     *                             - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
+     */
+    virtual void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const GEMMLowpReductionKernelInfo &info) = 0;
 
 protected:
     const ICLTensor *_input;
@@ -82,6 +94,18 @@ public:
      *                            - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
      */
     void configure(const ICLTensor *mtx_a, ICLTensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) override;
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  mtx_a           Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED
+     * @param[out] vector_sum_row  Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32
+     * @param[in]  info            Kernel metadata:
+     *                             - k            Number of matrix columns/rows depending on the type of reduction.
+     *                             - is_reshaped  True if the matrix has been reshaped.
+     *                             - scalar       Scalar value to multiply each reduced column/row by.
+     *                             - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *mtx_a, ICLTensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) override;
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixAReductionKernel
      *
      * @param[in] mtx_a          Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED
@@ -119,6 +143,18 @@ public:
      *                            - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
      */
     void configure(const ICLTensor *mtx_b, ICLTensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) override;
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  mtx_b           Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
+     * @param[out] vector_sum_col  Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32
+     * @param[in]  info            Kernel metadata:
+     *                             - k            Number of matrix columns/rows depending on the type of reduction.
+     *                             - is_reshaped  True if the matrix has been reshaped.
+     *                             - scalar       Scalar value to multiply each reduced column/row by.
+     *                             - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *mtx_b, ICLTensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) override;
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixBReductionKernel
      *
      * @param[in] mtx_b          Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h
index bff419cd35..037ec4d116 100644
--- a/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -50,6 +50,13 @@ public:
      * @param[in]      biases The shared biases tensor to append. It must be 1D tensor. Data types supported: Same as @p input
      */
     void configure(ICLTensor *accum, const ICLTensor *biases);
+    /** Set the accumulate buffer and the biases of the kernel.
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] accum           The accumulate tensor to convert. Data types supported: F16/F32
+     * @param[in]      biases          The shared biases tensor to append. It must be 1D tensor. Data types supported: Same as @p input
+     */
+    void configure(CLCompileContext &compile_context, ICLTensor *accum, const ICLTensor *biases);
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixAccumulateBiasesKernel
      *
      * @param[in] accum      The accumulate tensor to convert. Data types supported: F16/F32
diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h
index 82c4091c6e..fe34735fe4 100644
--- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -68,6 +68,23 @@ public:
      */
     void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta = 0.f,
                    bool is_interleaved_transposed = true, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo(), bool fp_mixed_precision = false, const ActivationLayerInfo &activation_info = ActivationLayerInfo());
+    /** Initialise the kernel's input, output and alpha
+     *
+     * @param[in]  compile_context           The compile context to be used.
+     * @param[in]  input0                    Input tensor containing the Matrix A. Data types supported: F16/F32
+     * @param[in]  input1                    Input tensor containing the Matrix B. Data type supported: same as @p input0
+     * @param[in]  input2                    Input tensor containing the Matrix C (bias). Can be nullptr. Data type supported: same as @p input0
+     * @param[out] output                    Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
+     * @param[in]  alpha                     Weight of the matrix product
+     * @param[in]  beta                      (Optional) Weight of vector C. Default value is 0. Only beta = 1 is currently supported.
+     * @param[in]  is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel
+     * @param[in]  reshape_info              (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped
+     * @param[in]  fp_mixed_precision        (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy
+     * @param[in]  activation_info           (Optional) Activation to apply after the matrix multiplication
+     *
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta = 0.f,
+                   bool is_interleaved_transposed = true, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo(), bool fp_mixed_precision = false, const ActivationLayerInfo &activation_info = ActivationLayerInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyKernel
      *
      * @param[in] input0                    Input tensor containing the Matrix A info. Data types supported: F16/F32
diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h
index 9bac8c9716..370ef8b3c8 100644
--- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -65,6 +65,27 @@ public:
     void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info,
                    const GEMMRHSMatrixInfo &rhs_info,
                    const GEMMKernelInfo    &gemm_info);
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input0          Input tensor for the LHS matrix. Data type supported: F32. The number of dimensions for the LHS matrix must be less or equal than 4.
+     * @param[in]  input1          Input tensor for the RHS matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
+     * @param[in]  input2          Input tensor containing the bias matrix. Data type supported: same as @p input0.
+     * @param[out] output          Output tensor info. Data type supported: same as @p input0
+     * @param[in]  alpha           Weight of the matrix product
+     * @param[in]  beta            Weight of the matrix bias
+     * @param[in]  lhs_info        LHS matrix information used to retrieve the number of rows and accumulations to be processed by each thread. Only the following values are supported:
+     *                             lhs_info.m0: 1,2,3,4,5,6,7,8
+     *                             lhs_info.k0: 2,3,4,8,16
+     * @param[in]  rhs_info        RHS matrix information used to retrieve the number of columns and accumulations to be processed by each thread. Only the following values are supported:
+     *                             rhs_info.n0: 2,3,4,8,16
+     *                             rhs_info.k0: same of lhs_info.k0
+     * @param[in]  gemm_info       GEMM information used to retrieve the original dimensions of the input matrices
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta,
+                   const GEMMLHSMatrixInfo &lhs_info,
+                   const GEMMRHSMatrixInfo &rhs_info,
+                   const GEMMKernelInfo    &gemm_info);
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyNativeKernel
      *
      * @param[in] input0    Input tensor info for the LHS matrix. Data type supported: F32. The number of dimensions for the LHS matrix must be less or equal than 4.
diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h
index 449c333143..45df67673c 100644
--- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -76,6 +76,35 @@ public:
     void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info,
                    const GEMMRHSMatrixInfo &rhs_info,
                    const GEMMKernelInfo    &gemm_info);
+    /** Initialise the kernel's input and output.
+     *
+     * @note The F16 computation also supports mixed precision through the gemm_info.fp_mixed_precision flag.
+     *       Mixed precision combines different floating precisions during the computation, in particular, F32 for the accumulations and F16 for the
+     *       multiplications. i.e. float c = (half)a * (half)b
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input0          Input tensor containing the LHS reshaped matrix. Data type supported: F16/F32. The number of dimensions for the LHS matrix must be less or equal than 4
+     * @param[in]  input1          Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3
+     * @param[in]  input2          Input tensor containing the bias matrix. Data type supported: same as @p input0.
+     * @param[out] output          Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
+     * @param[in]  alpha           Weight of the matrix product
+     * @param[in]  beta            Weight of the matrix bias
+     * @param[in]  lhs_info        LHS matrix information used for reshaping the input0 tensor.  Only the following values are supported:
+     *                             lhs_info.m0: 2,3,4,5,6,7,8
+     *                             lhs_info.k0: 2,3,4,8,16
+     *                             lhs_info.transpose: false
+     * @param[in]  rhs_info        RHS matrix information used for reshaping the input1 tensor.  Only the following values are supported:
+     *                             rhs_info.n0: 2,3,4,8,16
+     *                             rhs_info.k0: 2,3,4,8,16
+     *                             rhs_info.transpose: true
+     * @param[in]  gemm_info       GEMM information used to retrieve the original dimensions of the input matrices
+     *
+     * @note lhs_info.k0 must be equal to rhs_info.k0
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta,
+                   const GEMMLHSMatrixInfo &lhs_info,
+                   const GEMMRHSMatrixInfo &rhs_info,
+                   const GEMMKernelInfo    &gemm_info);
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyReshapedKernel
      *
      * @param[in] input0    Input tensor containing the LHS reshaped matrix. Data type supported: F16/F32. The number of dimensions for the LHS matrix must be less or equal than 4
diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h
index b91b7ee4bd..b6285dd4db 100644
--- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -68,6 +68,27 @@ public:
     void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info,
                    const GEMMRHSMatrixInfo &rhs_info,
                    const GEMMKernelInfo    &gemm_info);
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input0          Input tensor containing the LHS matrix. Data type supported: F16/F32. The number of dimensions for the LHS matrix must be less or equal than 4.
+     * @param[in]  input1          Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
+     * @param[in]  input2          Input tensor containing the bias matrix. Data type supported: same as @p input0.
+     * @param[out] output          Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
+     * @param[in]  alpha           Weight of the matrix product
+     * @param[in]  beta            Weight of the matrix bias
+     * @param[in]  lhs_info        LHS matrix information used to retrieve the number of rows to be processed by each thread. Only the following values are supported:
+     *                             lhs_info.m0: 1,2,3,4,5,6,7,8
+     * @param[in]  rhs_info        RHS matrix information used for reshaping the input1 tensor.  Only the following values are supported:
+     *                             rhs_info.k0: 2,3,4,8,16
+     *                             rhs_info.n0: 2,3,4,8,16
+     *                             rhs_info.transpose: true,false
+     * @param[in]  gemm_info       GEMM information used to retrieve the original dimensions of the input matrices
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta,
+                   const GEMMLHSMatrixInfo &lhs_info,
+                   const GEMMRHSMatrixInfo &rhs_info,
+                   const GEMMKernelInfo    &gemm_info);
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel
      *
      * @param[in] input0    Input tensor info for the LHS matrix. Data type supported: F16/F32. The number of dimensions for the LHS matrix must be less or equal than 4.
diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h
index 8ee911dc0e..f31c5c2280 100644
--- a/arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h
@@ -51,6 +51,14 @@ public:
      * @param[out] output The output 2D tensor. Data types supported: Same as @p input, S32 for QASYMM8/QASYMM8_SIGNED.
      */
     void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output);
+    /** Set the input and output of the kernel.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input0          The reshaped input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
+     * @param[in]  input1          The 2D reshaped weights tensor. Data type supported: Same as @p input.
+     * @param[out] output          The output 2D tensor. Data types supported: Same as @p input, S32 for QASYMM8/QASYMM8_SIGNED.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixVectorMultiplyKernel
      *
      * @param[in] input0 The reshaped input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
diff --git a/arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h b/arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h
index 7955b95b9a..e8e02ac281 100644
--- a/arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h
@@ -61,6 +61,21 @@ public:
      * @param[in]  reinterpret_input_as_3d (Optional) True if the input has to be reinterpreted as 3D tensor
      */
     void configure(const ICLTensor *input, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d = false);
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]  compile_context         The compile context to be used.
+     * @param[in]  input                   Input tensor. Data types supported: All
+     * @param[out] output                  Output tensor. Data type supported: same as @p input
+     * @param[in]  lhs_info                LHS matrix information to be used for reshaping. This object contains all the necessary
+     *                                     information to reshape the input tensor. Only the following values are supported:
+     *                                     lhs_info.m0: 2,3,4,5,6,7,8
+     *                                     lhs_info.k0: 2,3,4,8,16
+     *                                     lhs_info.v0: greater than 0
+     *                                     lhs_info.transpose: true, false
+     *                                     lhs_info.interleave: true, false
+     * @param[in]  reinterpret_input_as_3d (Optional) True if the input has to be reinterpreted as 3D tensor
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d = false);
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMReshapeLHSMatrixKernel
      *
      * @param[in] input                   Input tensor info. Data types supported: All
diff --git a/arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h b/arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h
index 19acd1f0e0..ada8889ac0 100644
--- a/arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -59,6 +59,20 @@ public:
      *                      rhs_info.interleave: true, false
      */
     void configure(const ICLTensor *input, ICLTensor *output, const GEMMRHSMatrixInfo &rhs_info);
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. Data types supported: All
+     * @param[out] output          Output tensor. Data type supported: same as @p input
+     * @param[in]  rhs_info        RHS matrix information to be used for reshaping. This object contains all the necessary
+     *                             information to reshape the input tensor. Only the following values are supported:
+     *                             rhs_info.n0: 2,3,4,8,16
+     *                             rhs_info.k0: 1,2,3,4,8,16 (k0 = 1 only if rhs_info.transpose = false)
+     *                             rhs_info.h0: greater than 0
+     *                             rhs_info.transpose: true, false
+     *                             rhs_info.interleave: true, false
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const GEMMRHSMatrixInfo &rhs_info);
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMReshapeRHSMatrixKernel
      *
      * @param[in] input    Input tensor info. Data types supported: All
diff --git a/arm_compute/core/CL/kernels/CLGatherKernel.h b/arm_compute/core/CL/kernels/CLGatherKernel.h
index 937d744108..c91b95de89 100644
--- a/arm_compute/core/CL/kernels/CLGatherKernel.h
+++ b/arm_compute/core/CL/kernels/CLGatherKernel.h
@@ -55,6 +55,15 @@ public:
      * @param[in]  axis    (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0
      */
     void configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0);
+    /** Initialise the kernel's inputs and outputs
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Supported tensor rank: up to 4. Data type supported: All.
+     * @param[in]  indices         Indices tensor. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis])
+     * @param[out] output          Destination tensor. Data type supported: Same as @p input
+     * @param[in]  axis            (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0);
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLGatherKernel
      *
diff --git a/arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h b/arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h
index f377c527d0..7eb7f7ae89 100644
--- a/arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h
+++ b/arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,6 +43,14 @@ public:
      * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
      */
     void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]  compile_context  The compile context to be used.
+     * @param[in]  input            An input tensor. Data types supported: U8
+     * @param[out] output           The output tensor. Data types supported: U8.
+     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
 
     // Inherited methods overridden:
     BorderSize border_size() const override;
diff --git a/arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h b/arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h
index 4d0402de66..37a7727d7a 100644
--- a/arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h
+++ b/arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -41,6 +41,14 @@ public:
      * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
      */
     void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
+    /** Initialise the kernel's source, destination and border.
+     *
+     * @param[in]  compile_context  The compile context to be used.
+     * @param[in]  input            Source tensor. Data types supported: U8.
+     * @param[out] output           Destination tensor. Data types supported: S16.
+     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
 
 private:
     //Make the configure method of the parent class private
@@ -58,6 +66,14 @@ public:
      * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
      */
     void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
+    /** Initialise the kernel's source, destination and border.
+     *
+     * @param[in]  compile_context  The compile context to be used.
+     * @param[in]  input            Input tensor(output of horizontal pass). Data types supported: S16.
+     * @param[out] output           Destination tensor. Data types supported: U8.
+     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
 
 private:
     //Make the configure method of the parent class private
diff --git a/arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h b/arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h
index a3623f1a84..5acd7fd9b6 100644
--- a/arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h
+++ b/arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -53,6 +53,13 @@ public:
      * @param[out] output Destination tensor. Output should have half the input width. Data types supported: U16.
      */
     void configure(const ICLTensor *input, ICLTensor *output);
+    /** Initialise the kernel's source, destination and border mode.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: U8.
+     * @param[out] output          Destination tensor. Output should have half the input width. Data types supported: U16.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
@@ -85,6 +92,13 @@ public:
      * @param[out] output Destination tensor. Output should have half the input height. Data types supported: U8.
      */
     void configure(const ICLTensor *input, ICLTensor *output);
+    /** Initialise the kernel's source, destination and border mode.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: U16.
+     * @param[out] output          Destination tensor. Output should have half the input height. Data types supported: U8.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h b/arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h
index abd39a4b5d..abac4b74fe 100644
--- a/arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -54,6 +54,15 @@ public:
      *
      */
     void configure(const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  anchors         Source tensor. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32
+     * @param[out] all_anchors     Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input
+     * @param[in]  info            Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo
+     *
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info);
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLComputeAllAnchorsKernel
      *
diff --git a/arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h b/arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h
index fb2019a176..1b1610e328 100644
--- a/arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h
+++ b/arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -57,6 +57,15 @@ public:
      * @param[in]  hog_info        HOG's metadata
      */
     void configure(const ICLTensor *input_magnitude, const ICLTensor *input_phase, ICLTensor *output, const HOGInfo *hog_info);
+    /**  Initialise the kernel's inputs, output and HOG's metadata
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16.
+     * @param[in]  input_phase     Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8
+     * @param[out] output          Output tensor which stores the local HOG for each cell. DataType supported: F32. Number of channels supported: equal to the number of histogram bins per cell
+     * @param[in]  hog_info        HOG's metadata
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input_magnitude, const ICLTensor *input_phase, ICLTensor *output, const HOGInfo *hog_info);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
@@ -92,6 +101,14 @@ public:
      * @param[in]  hog_info HOG's metadata
      */
     void configure(const ICLTensor *input, ICLTensor *output, const HOGInfo *hog_info);
+    /** Initialise the kernel's input, output and HOG's metadata
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell
+     * @param[out] output          Output tensor which stores the normalised blocks. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block
+     * @param[in]  hog_info        HOG's metadata
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const HOGInfo *hog_info);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/CL/kernels/CLHOGDetectorKernel.h b/arm_compute/core/CL/kernels/CLHOGDetectorKernel.h
index 3018c56b65..8a326429a2 100644
--- a/arm_compute/core/CL/kernels/CLHOGDetectorKernel.h
+++ b/arm_compute/core/CL/kernels/CLHOGDetectorKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -68,6 +68,21 @@ public:
      */
     void configure(const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, cl::Buffer *num_detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f,
                    uint16_t idx_class = 0);
+    /** Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window, the threshold and index of the object to detect
+     *
+     * @param[in]  compile_context         The compile context to be used.
+     * @param[in]  input                   Input tensor which stores the HOG descriptor obtained with @ref CLHOGOrientationBinningKernel. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block
+     * @param[in]  hog                     HOG data object used by @ref CLHOGOrientationBinningKernel and  @ref CLHOGBlockNormalizationKernel
+     * @param[out] detection_windows       Array of @ref DetectionWindow. This array stores all the detected objects
+     * @param[in]  num_detection_windows   Number of detected objects
+     * @param[in]  detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions.
+     *                                     It must be multiple of the hog->info()->block_stride()
+     * @param[in]  threshold               (Optional) Threshold for the distance between features and SVM classifying plane
+     * @param[in]  idx_class               (Optional) Index of the class used for evaluating which class the detection window belongs to
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, cl::Buffer *num_detection_windows,
+                   const Size2D &detection_window_stride, float threshold = 0.0f,
+                   uint16_t idx_class = 0);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue);
diff --git a/arm_compute/core/CL/kernels/CLHarrisCornersKernel.h b/arm_compute/core/CL/kernels/CLHarrisCornersKernel.h
index 3591afdf96..ed91aafb5e 100644
--- a/arm_compute/core/CL/kernels/CLHarrisCornersKernel.h
+++ b/arm_compute/core/CL/kernels/CLHarrisCornersKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -67,6 +67,21 @@ public:
     void configure(const ICLImage *input1, const ICLImage *input2, ICLImage *output,
                    int32_t block_size, float norm_factor, float strength_thresh, float sensitivity,
                    bool border_undefined);
+    /** Setup the kernel parameters
+     *
+     * @param[in]  compile_context  The compile context to be used.
+     * @param[in]  input1           Source image (gradient X). Data types supported S16, S32. (Must be the same as input2)
+     * @param[in]  input2           Source image (gradient Y). Data types supported S16, S32. (Must be the same as input1)
+     * @param[out] output           Destination image (harris score). Data types supported F32
+     * @param[in]  block_size       The block window size used to compute the Harris Corner score.  Supports: 3, 5 and 7
+     * @param[in]  norm_factor      Normalization factor to use accordingly with the gradient size (Must be different from 0)
+     * @param[in]  strength_thresh  Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel).
+     * @param[in]  sensitivity      Sensitivity threshold k from the Harris-Stephens equation.
+     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
+     */
+    void configure(CLCompileContext &compile_context, const ICLImage *input1, const ICLImage *input2, ICLImage *output,
+                   int32_t block_size, float norm_factor, float strength_thresh, float sensitivity,
+                   bool border_undefined);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h
index 5828280dc1..b9589593fa 100644
--- a/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h
@@ -58,6 +58,15 @@ public:
      *
      */
     void configure(const ICLTensor *input, unsigned int height_offset, ICLTensor *output);
+    /** Initialise the kernel's inputs and output
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. Data types supported: All.
+     * @param[in]  height_offset   The starting offset on the Y axis for the output tensor.
+     * @param[out] output          Output tensor. Data types supported: Same as @p input.
+     *
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, unsigned int height_offset, ICLTensor *output);
     /**  Static function to check if given info will lead to a valid configuration of @ref CLHeightConcatenateLayerKernel
      *
      * @param[in] input         Input tensor info. Data types supported: All.
diff --git a/arm_compute/core/CL/kernels/CLHistogramKernel.h b/arm_compute/core/CL/kernels/CLHistogramKernel.h
index bb1b773802..bb0d0b3c3c 100644
--- a/arm_compute/core/CL/kernels/CLHistogramKernel.h
+++ b/arm_compute/core/CL/kernels/CLHistogramKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -54,6 +54,13 @@ public:
      * @param[out] output Destination distribution.
      */
     void configure(const ICLImage *input, ICLDistribution1D *output);
+    /** Initialise the kernel's input, output and border mode.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source image. Data types supported: U8.
+     * @param[out] output          Destination distribution.
+     */
+    void configure(CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
@@ -85,6 +92,13 @@ public:
      * @param[out] output Destination distribution.
      */
     void configure(const ICLImage *input, ICLDistribution1D *output);
+    /** Initialise the kernel's input, output and border mode.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source image. Data types supported: U8.
+     * @param[out] output          Destination distribution.
+     */
+    void configure(CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/CL/kernels/CLIm2ColKernel.h b/arm_compute/core/CL/kernels/CLIm2ColKernel.h
index 95675c8352..dddbf8d9dd 100644
--- a/arm_compute/core/CL/kernels/CLIm2ColKernel.h
+++ b/arm_compute/core/CL/kernels/CLIm2ColKernel.h
@@ -80,6 +80,22 @@ public:
      */
     void configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U),
                    unsigned int num_groups = 1);
+    /** Set the input and output of the kernel.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
+     *                             while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
+     * @param[out] output          The output tensor. First 2 lower dimensions represent a transform of each 3D input,
+     *                             while every dimension above represents a batch. Data types supported: Same as @p input
+     * @param[in]  kernel_dims     The kernel dimensions (width and height).
+     * @param[in]  conv_info       Contains padding and stride information described in @ref PadStrideInfo.
+     * @param[in]  has_bias        In case biases are provided expands the matrix with 1.
+     * @param[in]  dilation        (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+     * @param[in]  num_groups      (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias,
+                   const Size2D &dilation   = Size2D(1U, 1U),
+                   unsigned int  num_groups = 1);
     /** Static function to check if given info will lead to a valid configuration of @ref CLIm2ColKernel
      *
      * @param[in] input       The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
diff --git a/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h
index 9982cc2f1c..93490d8e12 100644
--- a/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h
@@ -58,6 +58,15 @@ public:
      * @param[in]      info   Kernel meta-data descriptor
      */
     void configure(ICLTensor *input, ICLTensor *output, const InstanceNormalizationLayerKernelInfo &info);
+    /** Set the input and output tensors.
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input           Source tensor. Data types supported: F16/F32. Data layout supported: NCHW, NHWC
+     *                                 In case of @p output tensor = nullptr this tensor will store the result of the normalization.
+     * @param[out]     output          Destination tensor. Data types and data layouts supported: same as @p input.
+     * @param[in]      info            Kernel meta-data descriptor
+     */
+    void configure(CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const InstanceNormalizationLayerKernelInfo &info);
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLInstanceNormalizationLayer.
      *
diff --git a/arm_compute/core/CL/kernels/CLIntegralImageKernel.h b/arm_compute/core/CL/kernels/CLIntegralImageKernel.h
index 42b0be33c5..8e06887dba 100644
--- a/arm_compute/core/CL/kernels/CLIntegralImageKernel.h
+++ b/arm_compute/core/CL/kernels/CLIntegralImageKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -41,6 +41,13 @@ public:
      * @param[out] output Destination tensor, Data types supported: U32.
      */
     void configure(const ICLTensor *input, ICLTensor *output);
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           An input tensor. Data types supported: U8
+     * @param[out] output          Destination tensor, Data types supported: U32.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
 };
 
 /** Interface to run the vertical pass of the integral image kernel. */
@@ -62,6 +69,12 @@ public:
      * @param[in,out] in_out The input/output tensor. Data types supported: U32
      */
     void configure(ICLTensor *in_out);
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]     compile_context The compile context to be used.
+     * @param[in,out] in_out          The input/output tensor. Data types supported: U32
+     */
+    void configure(CLCompileContext &compile_context, ICLTensor *in_out);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h b/arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h
index b6f1be1995..e4b7af7984 100644
--- a/arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -59,6 +59,18 @@ public:
      * @param[in]  epsilon Lower bound value for the normalization.
      */
     void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
+     * @param[in]  sum             Sum values tensor. Data types supported: same as @p input.
+     *                             Sum will have the same number of dimensions as input.
+     * @param[out] output          Destination tensor. Data types and data layouts supported: Same as @p input.
+     *                             Output will have the same number of dimensions as input.
+     * @param[in]  axis            Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2
+     * @param[in]  epsilon         Lower bound value for the normalization.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon);
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLL2NormalizeLayerKernel.
      *
diff --git a/arm_compute/core/CL/kernels/CLLKTrackerKernel.h b/arm_compute/core/CL/kernels/CLLKTrackerKernel.h
index 1f24894aca..3e938c9658 100644
--- a/arm_compute/core/CL/kernels/CLLKTrackerKernel.h
+++ b/arm_compute/core/CL/kernels/CLLKTrackerKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -87,6 +87,21 @@ public:
     void configure(const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates,
                    ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
                    bool use_initial_estimate, size_t level, size_t num_levels, float pyramid_scale);
+    /** Initialise the kernel input and output
+     *
+     * @param[in]  compile_context      The compile context to be used.
+     * @param[in]  old_points           Pointer to the @ref ICLKeyPointArray storing old key points
+     * @param[in]  new_points_estimates Pointer to the @ref ICLKeyPointArray storing new estimates key points
+     * @param[out] old_points_internal  Pointer to the array of internal @ref CLLKInternalKeypoint old points
+     * @param[out] new_points_internal  Pointer to the array of internal @ref CLLKInternalKeypoint new points
+     * @param[in]  use_initial_estimate The flag to indicate whether the initial estimated position should be used
+     * @param[in]  level                The pyramid level
+     * @param[in]  num_levels           The number of pyramid levels
+     * @param[in]  pyramid_scale        Scale factor used for generating the pyramid
+     */
+    void configure(CLCompileContext &compile_context, const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates,
+                   ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
+                   bool use_initial_estimate, size_t level, size_t num_levels, float pyramid_scale);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
@@ -102,6 +117,13 @@ public:
      * @param[out] new_points          Pointer to the @ref ICLKeyPointArray storing new key points
      */
     void configure(ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points);
+    /** Initialise the kernel input and output
+     *
+     * @param[in]  compile_context     The compile context to be used.
+     * @param[in]  new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points
+     * @param[out] new_points          Pointer to the @ref ICLKeyPointArray storing new key points
+     */
+    void configure(CLCompileContext &compile_context, ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
@@ -137,6 +159,23 @@ public:
                    ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
                    ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
                    size_t window_dimension, size_t level);
+    /** Initialise the kernel input and output
+     *
+     * @param[in]      compile_context     The compile context to be used.
+     * @param[in]      old_input           Pointer to the input old tensor. Data types supported: U8
+     * @param[in]      old_scharr_gx       Pointer to the input scharr X tensor. Data types supported: S16
+     * @param[in]      old_scharr_gy       Pointer to the input scharr Y tensor. Data types supported: S16
+     * @param[in]      old_points_internal Pointer to the array of CLLKInternalKeypoint old points
+     * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint new points
+     * @param[out]     coeff_table         Pointer to the array holding the Spatial Gradient coefficients
+     * @param[out]     old_ival            Pointer to the array holding internal values
+     * @param[in]      window_dimension    The size of the window on which to perform the algorithm
+     * @param[in]      level               The pyramid level
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *old_input, const ICLTensor *old_scharr_gx, const ICLTensor *old_scharr_gy,
+                   ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
+                   ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
+                   size_t window_dimension, size_t level);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
@@ -175,6 +214,21 @@ public:
      */
     void configure(const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
                    Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level);
+    /** Initialise the kernel input and output
+     *
+     * @param[in]      compile_context     The compile context to be used.
+     * @param[in]      new_input           Pointer to the input new tensor. Data types supported: U8
+     * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint for new points
+     * @param[in]      coeff_table         Pointer to the array holding the Spatial Gradient coefficients
+     * @param[in]      old_ival            Pointer to the array holding internal values
+     * @param[in]      termination         The criteria to terminate the search of each keypoint.
+     * @param[in]      epsilon             The error for terminating the algorithm
+     * @param[in]      num_iterations      The maximum number of iterations before terminating the algorithm
+     * @param[in]      window_dimension    The size of the window on which to perform the algorithm
+     * @param[in]      level               The pyramid level
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
+                   Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h b/arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h
index 2f5624a0a4..757e3e4f86 100644
--- a/arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h
+++ b/arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -55,6 +55,14 @@ public:
      * @param[out] output Output tensor to store the result. Data type supported: same as @p input0
      */
     void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output);
+    /** Initialise the kernel's input, output and alpha
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input0          First input tensor. Data types supported: F32
+     * @param[in]  input1          Second input tensor. Data type supported: same as @p input0
+     * @param[out] output          Output tensor to store the result. Data type supported: same as @p input0
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLLocallyConnectedMatrixMultiplyKernel
      *
      * @param[in] input0 First input tensor info. Data types supported: F32
diff --git a/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h b/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h
index 75b63f94ae..390da4958d 100644
--- a/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h
+++ b/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -60,6 +60,20 @@ public:
      */
     void configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase,
                    MagnitudeType mag_type = MagnitudeType::L2NORM, PhaseType phase_type = PhaseType::SIGNED);
+    /** Initialise the kernel's input, output.
+     *
+     * @note At least one of output1 or output2 must be set.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  gx              The input gradient X tensor. Data types supported: S16.
+     * @param[in]  gy              The input gradient Y tensor. Data types supported: S16.
+     * @param[out] magnitude       (Optional) The output tensor - Magnitude. Data types supported: S16.
+     * @param[out] phase           (Optional) The output tensor - Phase. Data types supported: U8.
+     * @param[in]  mag_type        (Optional) Magnitude calculation type. Default: L2NORM.
+     * @param[in]  phase_type      (Optional) Phase calculation type. Default: SIGNED.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase,
+                   MagnitudeType mag_type = MagnitudeType::L2NORM, PhaseType phase_type = PhaseType::SIGNED);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h b/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h
index 1f3129feff..ed0213abcc 100644
--- a/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h
+++ b/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -59,6 +59,16 @@ public:
      * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values (Buffer size: 1 cl_ulong).
      */
     void configure(const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr);
+    /** Initialise the kernel's input and outputs.
+     *
+     * @param[in]  compile_context    The compile context to be used.
+     * @param[in]  input              Input image. Data types supported: U8.
+     * @param[out] mean               Input average pixel value.
+     * @param[out] global_sum         Keeps global sum of pixel values (Buffer size: 1 cl_ulong).
+     * @param[out] stddev             (Optional) Output standard deviation of pixel values.
+     * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values (Buffer size: 1 cl_ulong).
+     */
+    void configure(CLCompileContext &compile_context, const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr);
     /** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDevKernel.
      *
      * @param[in] input              Input image info. Data types supported: U8.
diff --git a/arm_compute/core/CL/kernels/CLMeanStdDevNormalizationKernel.h b/arm_compute/core/CL/kernels/CLMeanStdDevNormalizationKernel.h
index ece0ec46e6..a21a6eed73 100644
--- a/arm_compute/core/CL/kernels/CLMeanStdDevNormalizationKernel.h
+++ b/arm_compute/core/CL/kernels/CLMeanStdDevNormalizationKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -56,6 +56,17 @@ public:
      * @param[in]      epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8.
      */
     void configure(ICLTensor *input, ICLTensor *output = nullptr, float epsilon = 1e-8f);
+    /** Initialise the kernel's input and outputs.
+     *
+     * @note If the output tensor is a nullptr, the normalization will be performed in-place.
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input           Source tensor with 2 dimensions. In case of @p output tensor = nullptr,
+     *                                 this tensor will store the result of the normalization. Data types supported: F16/F32.
+     * @param[out]     output          (Optional) Destination tensor. It can be nullptr in case of in-place computation. Data type supported: same as @p input
+     * @param[in]      epsilon         (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8.
+     */
+    void configure(CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output = nullptr, float epsilon = 1e-8f);
     /** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDevNormalizationKernel
      *
      * @param[in] input   Source tensor info with 2 dimensions. In case of @p output tensor info = nullptr,
diff --git a/arm_compute/core/CL/kernels/CLMedian3x3Kernel.h b/arm_compute/core/CL/kernels/CLMedian3x3Kernel.h
index 7fe5116782..df40fcf7e9 100644
--- a/arm_compute/core/CL/kernels/CLMedian3x3Kernel.h
+++ b/arm_compute/core/CL/kernels/CLMedian3x3Kernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,6 +43,14 @@ public:
      * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
      */
     void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]  compile_context  The compile context to be used.
+     * @param[in]  input            An input tensor. Data types supported: U8
+     * @param[out] output           The output tensor. Data types supported: U8.
+     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
 
     // Inherited methods overridden:
     BorderSize border_size() const override;
diff --git a/arm_compute/core/CL/kernels/CLMemsetKernel.h b/arm_compute/core/CL/kernels/CLMemsetKernel.h
index 3e1eadfbbd..a2e61a1782 100644
--- a/arm_compute/core/CL/kernels/CLMemsetKernel.h
+++ b/arm_compute/core/CL/kernels/CLMemsetKernel.h
@@ -56,6 +56,14 @@ public:
      * @param[in]     window         Window to be used in case setting only part of a tensor. Default is nullptr.
      */
     void configure(ICLTensor *tensor, const PixelValue &constant_value, Window *window = nullptr);
+    /** Initialise the kernel's tensor and filling value
+     *
+     * @param[in]     compile_context The compile context to be used.
+     * @param[in,out] tensor          Input tensor to fill. Supported data types: All.
+     * @param[in]     constant_value  The value used to fill the planes of the tensor
+     * @param[in]     window          Window to be used in case setting only part of a tensor. Default is nullptr.
+     */
+    void configure(CLCompileContext &compile_context, ICLTensor *tensor, const PixelValue &constant_value, Window *window = nullptr);
     /** Static function to check if given info will lead to a valid configuration of @ref CLMemsetKernel
      *
      * @param[in] tensor         Source tensor info. Data types supported: All.
diff --git a/arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h b/arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h
index f24cebb985..7a31d71553 100644
--- a/arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -52,6 +52,14 @@ public:
      *                    The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32.
      */
     void configure(const ICLTensor *input, ICLTensor *output);
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches.Data types supported: F32.
+     * @param[out] output          Output tensor with shape [2, batches, ...] which stores the minimum and maximum values for each 3D input tensor.
+     *                    The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLMinMaxLayerKernel
      *
      * @param[in] input  Input tensor info.  Data types supported: F32.
diff --git a/arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h b/arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h
index 67b5b38384..e57f7587fa 100644
--- a/arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h
+++ b/arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -55,6 +55,13 @@ public:
      * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32.
      */
     void configure(const ICLImage *input, cl::Buffer *min_max);
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input Image. Data types supported: U8/S16/F32.
+     * @param[out] min_max         Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32.
+     */
+    void configure(CLCompileContext &compile_context, const ICLImage *input, cl::Buffer *min_max);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
@@ -92,6 +99,19 @@ public:
      */
     void configure(const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count,
                    ICLCoordinates2DArray *min_loc = nullptr, ICLCoordinates2DArray *max_loc = nullptr);
+    /** Initialise the kernel's input and outputs.
+     *
+     * @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input image. Data types supported: U8/S16/F32.
+     * @param[out] min_max         Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32.
+     * @param[out] min_max_count   Buffer of 2 elements to store the min value occurrences at position 0 and the max value occurrences at position 1. Data type supported: S32
+     * @param[out] min_loc         (Optional) Array of Coordinates2D used to store minimum value locations.
+     * @param[out] max_loc         (Optional) Array of Coordinates2D used to store maximum value locations.
+     */
+    void configure(CLCompileContext &compile_context, const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count,
+                   ICLCoordinates2DArray *min_loc = nullptr, ICLCoordinates2DArray *max_loc = nullptr);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h b/arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h
index a6846b2541..b255f0cb90 100644
--- a/arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h
+++ b/arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -52,6 +52,20 @@ public:
     void configure(const ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function,
                    unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask,
                    bool border_undefined);
+    /** Set the source, destination and border mode of the kernel
+     *
+     * @param[in]  compile_context  The compile context to be used.
+     * @param[in]  input            Source tensor. Data types supported: U8
+     * @param[out] output           Destination tensor. Data types supported: U8
+     * @param[in]  function         Non linear function to perform
+     * @param[in]  mask_size        Mask size. Supported sizes: 3, 5
+     * @param[in]  pattern          Mask pattern
+     * @param[in]  mask             The given mask. Will be used only if pattern is specified to PATTERN_OTHER
+     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function,
+                   unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask,
+                   bool border_undefined);
 
     // Inherited methods overridden:
     BorderSize border_size() const override;
diff --git a/arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h b/arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h
index dd36a29c2a..084c77bf26 100644
--- a/arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h
+++ b/arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -44,6 +44,14 @@ public:
      * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
      */
     void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
+    /** Initialise the kernel's sources, destinations and border mode.
+     *
+     * @param[in]  compile_context  The compile context to be used.
+     * @param[in]  input            Source tensor. Data types supported: U8, F32. (Must be the same as the output tensor)
+     * @param[out] output           Destination tensor. Data types supported: U8, F32. (Must be the same as the input tensor)
+     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
 
     // Inherited methods overridden:
     BorderSize border_size() const override;
diff --git a/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h
index 43e219a4de..350b504d50 100644
--- a/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -54,6 +54,16 @@ public:
      * @param[in]  norm_info Normalization layer information like the normalization type, normalization size and other parameters.
      */
     void configure(const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
+     *                             and an optional 4th dimension for batch of inputs. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
+     * @param[out] output          Destination tensor. Output will have the same number of dimensions as input. Data types supported: same as @p input.
+     *                             Data layouts supported: same as @p input.
+     * @param[in]  norm_info       Normalization layer information like the normalization type, normalization size and other parameters.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info);
     /** Static function to check if given info will lead to a valid configuration of @ref CLNormalizationLayerKernel
      *
      * @param[in] input     Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
diff --git a/arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h b/arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h
index 4334882fd8..addd3942eb 100644
--- a/arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h
@@ -57,6 +57,17 @@ public:
      *                    Data types supported: same as @p input
      */
     void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, channels].
+     *                             Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[out] output          Destination tensor. Data type supported: same as @p input
+     * @param[in]  mean            Mean values tensor. 1 dimension with size equal to the number of input channels. Data types supported: same as @p input
+     * @param[in]  std             Standard deviation values tensor. 1 dimension with size equal to the number of input channels.
+     *                             Data types supported: same as @p input
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std);
     /** Static function to check if given info will lead to a valid configuration of @ref CLNormalizePlanarYUVLayerKernel
      *
      * @param[in]  input  Source tensor info. 3 lower dimensions represent a single input with dimensions [width, height, channels].
diff --git a/arm_compute/core/CL/kernels/CLPadLayerKernel.h b/arm_compute/core/CL/kernels/CLPadLayerKernel.h
index 6865ae6524..09f72088c4 100644
--- a/arm_compute/core/CL/kernels/CLPadLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLPadLayerKernel.h
@@ -58,6 +58,19 @@ public:
      *                            or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT).
      */
     void configure(const ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value = PixelValue(), PaddingMode mode = PaddingMode::CONSTANT);
+    /** Set the input and output tensor.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: All.
+     * @param[out] output          Output tensor. Data type supported: same as @p input
+     * @param[in]  padding         The padding for each spatial dimension of the input tensor. The pair padding[i]
+     *                             specifies the front and the end padding in the i-th dimension.
+     * @param[in]  constant_value  (Optional) Constant value to be used for the padding.
+     * @param[in]  mode            (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT,
+     *                             or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT).
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value = PixelValue(),
+                   PaddingMode mode = PaddingMode::CONSTANT);
     /** Static function to check if given info will lead to a valid configuration of @ref CLPadLayerKernel
      *
      * @param[in] input          Source tensor info. Data types supported: All.
diff --git a/arm_compute/core/CL/kernels/CLPermuteKernel.h b/arm_compute/core/CL/kernels/CLPermuteKernel.h
index bc51f105f5..6414edb113 100644
--- a/arm_compute/core/CL/kernels/CLPermuteKernel.h
+++ b/arm_compute/core/CL/kernels/CLPermuteKernel.h
@@ -56,6 +56,16 @@ public:
      * @param[in] perm   Permutation vector
      */
     void configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm);
+    /** Set the input and output of the kernel.
+     *
+     * @note Arbitrary permutation vectors are supported with rank not greater than 4
+     *
+     * @param[in] compile_context The compile context to be used.
+     * @param[in] input           The input tensor to permute. Data types supported: All.
+     * @param[in] output          The output tensor. Data types supported: Same as @p input
+     * @param[in] perm            Permutation vector
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PermutationVector &perm);
     /** Static function to check if given info will lead to a valid configuration of @ref CLPermuteKernel
      *
      * @note Arbitrary permutation vectors are supported with rank not greater than 4
diff --git a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
index 2a54a4bc48..a9cfcc57de 100644
--- a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
+++ b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
@@ -67,6 +67,20 @@ public:
      */
     void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float scale,
                    ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+    /** Initialise the kernel's input, output and border mode.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input1          An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
+     * @param[in]  input2          An input tensor. Data types supported: same as @p input1.
+     * @param[out] output          The output tensor, Data types supported: same as @p input1. Note: U8 requires both inputs to be U8.
+     * @param[in]  scale           Scale to apply after multiplication.
+     *                             Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
+     * @param[in]  overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
+     * @param[in]  rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even.
+     * @param[in]  act_info        (Optional) Activation layer information in case of a fused activation.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float scale,
+                   ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref CLPixelWiseMultiplicationKernel
      *
      * @param[in] input1          An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
@@ -123,6 +137,15 @@ public:
      * @param[in]  act_info (Optional) Activation layer information in case of a fused activation.
      */
     void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+    /** Initialise the kernel's input, output and border mode.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input1          An input tensor. Data types supported: F32. Number of channels supported: 2.
+     * @param[in]  input2          An input tensor. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
+     * @param[out] output          The output tensor, Data types supported: same as @p input1. Number of channels supported: same as @p input1.
+     * @param[in]  act_info        (Optional) Activation layer information in case of a fused activation.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref CLComplexPixelWiseMultiplicationKernel
      *
      * @param[in] input1   An input tensor info. Data types supported: F32. Number of channels supported: 2.
diff --git a/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h b/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h
index fdd10f3f66..4ab6955110 100644
--- a/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h
@@ -58,6 +58,16 @@ public:
      * @param[out] indices   (optional) The indices of the maximal values. Data type supported: U32.
      */
     void configure(const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info, ICLTensor *indices = nullptr);
+    /** Set the input and output tensors.
+     *
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[out] output          Destination tensor. Data types supported: Same as @p input.
+     * @param[in]  pool_info       Contains pooling operation information described in @ref PoolingLayerInfo.
+     * @param[out] indices         (optional) The indices of the maximal values. Data type supported: U32.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info, ICLTensor *indices = nullptr);
     /** Static function to check if given info will lead to a valid configuration of @ref CLPoolingLayerKernel
      *
      * @param[in] input     Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
diff --git a/arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h b/arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h
index 8376934a1f..89fd656581 100644
--- a/arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -58,6 +58,19 @@ public:
      * @param[in]  aspect_ratios Aspect ratio values
      */
     void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info, cl::Buffer *min, cl::Buffer *max, cl::Buffer *aspect_ratios);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input1          First source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC.
+     * @param[in]  input2          Second source tensor. Data types and layouts supported: same as @p input1
+     * @param[out] output          Destination tensor. Output dimensions are [W * H * num_priors * 4, 2]. Data types and layouts supported: same as @p input1
+     * @param[in]  info            Prior box layer info.
+     * @param[in]  min             Minimum prior box values
+     * @param[in]  max             Maximum prior box values
+     * @param[in]  aspect_ratios   Aspect ratio values
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info, cl::Buffer *min, cl::Buffer *max,
+                   cl::Buffer *aspect_ratios);
     /** Static function to check if given info will lead to a valid configuration of @ref CLPriorBoxLayerKernel
      *
      * @param[in] input1 First source tensor info. Data types supported: F32. Data layouts supported: NCHW/NHWC.
diff --git a/arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h b/arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h
index 07c93d3306..a651529f2b 100644
--- a/arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h
@@ -57,6 +57,15 @@ public:
      * @note Output auto initialization is not supported by this kernel
      */
     void configure(const ICLTensor *input, ICLTensor *output);
+    /** Set the input, output.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
+     * @param[out] output          Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
+     *
+     * @note Output auto initialization is not supported by this kernel
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLQuantizationLayerKernel
      *
      * @param[in] input  Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
diff --git a/arm_compute/core/CL/kernels/CLROIAlignLayerKernel.h b/arm_compute/core/CL/kernels/CLROIAlignLayerKernel.h
index 8dc7e4d3fc..8f4485a03b 100644
--- a/arm_compute/core/CL/kernels/CLROIAlignLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLROIAlignLayerKernel.h
@@ -64,6 +64,22 @@ public:
      * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
      */
     void configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[in]  rois            ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
+     *                             as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ].
+     *                             Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED, otherwise same as @p input
+     * @param[out] output          Destination tensor. Data types supported: Same as @p input.
+     * @param[in]  pool_info       Contains pooling operation information described in @ref ROIPoolingLayerInfo.
+     *
+     * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled
+     * width and pooled height.
+     * @note The z dimensions of @p output tensor and @p input tensor must be the same.
+     * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
     /** Static function to check if given info will lead to a valid configuration of @ref CLROIAlignLayerKernel
      *
      * @param[in] input     Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
diff --git a/arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h b/arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h
index 2936877390..8ba1b35171 100644
--- a/arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -63,6 +63,21 @@ public:
      * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
      */
     void configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: F16/F32.
+     * @param[in]  rois            ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
+     *                             as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16
+     * @param[out] output          Destination tensor. Data types supported: Same as @p input.
+     * @param[in]  pool_info       Contains pooling operation information described in @ref ROIPoolingLayerInfo.
+     *
+     * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled
+     * width and pooled height.
+     * @note The z dimensions of @p output tensor and @p input tensor must be the same.
+     * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/CL/kernels/CLRangeKernel.h b/arm_compute/core/CL/kernels/CLRangeKernel.h
index 27dd813010..5cc4a220ca 100644
--- a/arm_compute/core/CL/kernels/CLRangeKernel.h
+++ b/arm_compute/core/CL/kernels/CLRangeKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -59,6 +59,15 @@ public:
      * @param[in]  step   The gap between each pair of values in the sequence.
      */
     void configure(ICLTensor *output, float start, float end, float step);
+    /** Initialize the kernel's output tensor, start, end and step of the sequence.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[out] output          Output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+     * @param[in]  start           The starting value of the sequence.
+     * @param[in]  end             The ending (not including) value of the sequence.
+     * @param[in]  step            The gap between each pair of values in the sequence.
+     */
+    void configure(CLCompileContext &compile_context, ICLTensor *output, float start, float end, float step);
     /** Static function to check if given info will lead to a valid configuration of @ref CLRangeKernel
      *
      * @param[in] output Output tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
diff --git a/arm_compute/core/CL/kernels/CLReductionOperationKernel.h b/arm_compute/core/CL/kernels/CLReductionOperationKernel.h
index 07ebd89819..bdab58bea1 100644
--- a/arm_compute/core/CL/kernels/CLReductionOperationKernel.h
+++ b/arm_compute/core/CL/kernels/CLReductionOperationKernel.h
@@ -59,6 +59,17 @@ public:
      * @param[in]  width  (Optional)  In case of x-axis we also need to provide the width of the input image.
      */
     void configure(const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, unsigned int width = 0);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32.
+     * @param[out] output          Destination tensor. Data types and data layouts supported: Same as @p input.
+     *                             Output will have the same number of dimensions as input.
+     * @param[in]  axis            Axis along which to reduce. Supported reduction axis : 0,1,2,3
+     * @param[in]  op              Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX
+     * @param[in]  width           (Optional)  In case of x-axis we also need to provide the width of the input image.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, unsigned int width = 0);
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLReductionOperationKernel.
      *
diff --git a/arm_compute/core/CL/kernels/CLRemapKernel.h b/arm_compute/core/CL/kernels/CLRemapKernel.h
index ce094bc321..14f4b2ddb5 100644
--- a/arm_compute/core/CL/kernels/CLRemapKernel.h
+++ b/arm_compute/core/CL/kernels/CLRemapKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -55,6 +55,17 @@ public:
      * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
      */
     void configure(const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, bool border_undefined);
+    /** Initialize the kernel's input, output and border mode.
+     *
+     * @param[in]  compile_context  The compile context to be used.
+     * @param[in]  input            Source tensor. Data types supported: U8.
+     * @param[in]  map_x            Map for X coordinates. Data types supported: F32.
+     * @param[in]  map_y            Map for Y coordinates. Data types supported: F32.
+     * @param[out] output           Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane.
+     * @param[in]  policy           The interpolation type.
+     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, bool border_undefined);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/CL/kernels/CLReorgLayerKernel.h b/arm_compute/core/CL/kernels/CLReorgLayerKernel.h
index c1bbb0a2ce..65304c1cc6 100644
--- a/arm_compute/core/CL/kernels/CLReorgLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLReorgLayerKernel.h
@@ -55,6 +55,17 @@ public:
      *                    It defines the spatial distance between 2 consecutive pixels in the x and y direction
      */
     void configure(const ICLTensor *input, ICLTensor *output, int32_t stride);
+    /** Initialize the kernel's input, output.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/F16/U32/S32/F32.
+     * @param[out] output          Destination tensor with tensor shape:
+     *                             [width_input / stride, height_input / stride, channels_input * stride * stride, batch_size]. This means the output has
+     *                             the same number of input elements. Data types supported: same as @p input.
+     * @param[in]  stride          Stride value to use for reorganizing the values in the output tensor.
+     *                             It defines the spatial distance between 2 consecutive pixels in the x and y direction
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t stride);
     /** Static function to check if given info will lead to a valid configuration of @ref CLReorgLayerKernel
      *
      * @param[in] input  Source tensor. Data types supported: All.
diff --git a/arm_compute/core/CL/kernels/CLReshapeLayerKernel.h b/arm_compute/core/CL/kernels/CLReshapeLayerKernel.h
index 77aedf32cc..f9588e818f 100644
--- a/arm_compute/core/CL/kernels/CLReshapeLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLReshapeLayerKernel.h
@@ -53,6 +53,13 @@ public:
      * @param[out] output Destination tensor. Data type supported: Same as @p input
      */
     void configure(const ICLTensor *input, ICLTensor *output);
+    /** Set the input and output of the kernel
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data type supported: All.
+     * @param[out] output          Destination tensor. Data type supported: Same as @p input
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLReshapeLayerKernel
      *
diff --git a/arm_compute/core/CL/kernels/CLReverseKernel.h b/arm_compute/core/CL/kernels/CLReverseKernel.h
index c8d10f7ace..b1547cfb9f 100644
--- a/arm_compute/core/CL/kernels/CLReverseKernel.h
+++ b/arm_compute/core/CL/kernels/CLReverseKernel.h
@@ -53,6 +53,14 @@ public:
      * @param[in]  axis   Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32
      */
     void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *axis);
+    /** Initialise the kernel's inputis and output
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. Data types supported: All.
+     * @param[out] output          Output tensor. Data type supported: Same as @p input
+     * @param[in]  axis            Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *axis);
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLReverseKernel
      *
diff --git a/arm_compute/core/CL/kernels/CLScaleKernel.h b/arm_compute/core/CL/kernels/CLScaleKernel.h
index f93286b07d..02dfb3eccf 100644
--- a/arm_compute/core/CL/kernels/CLScaleKernel.h
+++ b/arm_compute/core/CL/kernels/CLScaleKernel.h
@@ -46,6 +46,19 @@ public:
      * @param[in]  align_corners   (Optional) Align corners of input and output, only affecting bilinear policy with TOP_LEFT sampling policy. Defaults to false.
      */
     void configure(const ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool align_corners = false);
+    /** Initialise the kernel's inputs, output and interpolation policy
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32
+     * @param[out] output          Destination tensor. Data types supported: Same as @p input
+     *                             All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
+     * @param[in]  policy          Interpolation type to use
+     * @param[in]  border_mode     Selected border mode.
+     * @param[in]  sampling_policy (Optional) Sampling policy used by the interpolation. Defaults to @ref SamplingPolicy::CENTER
+     * @param[in]  align_corners   (Optional) Align corners of input and output, only affecting bilinear policy with TOP_LEFT sampling policy. Defaults to false.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode,
+                   SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool align_corners = false);
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLScaleKernel
      *
diff --git a/arm_compute/core/CL/kernels/CLScharr3x3Kernel.h b/arm_compute/core/CL/kernels/CLScharr3x3Kernel.h
index 3d4e4ebb99..1cdb66715e 100644
--- a/arm_compute/core/CL/kernels/CLScharr3x3Kernel.h
+++ b/arm_compute/core/CL/kernels/CLScharr3x3Kernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -70,6 +70,17 @@ public:
      * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
      */
     void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
+    /** Initialise the kernel's source, destination and border.
+     *
+     * @note At least one of output_x or output_y must be set.
+     *
+     * @param[in]  compile_context  The compile context to be used.
+     * @param[in]  input            Source tensor. Data types supported: U8.
+     * @param[out] output_x         (Optional) Destination tensor for the X gradient, Data types supported: S16.
+     * @param[out] output_y         (Optional) Destination tensor for the Y gradient, Data types supported: S16.
+     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/CL/kernels/CLSelectKernel.h b/arm_compute/core/CL/kernels/CLSelectKernel.h
index be43add843..02f4cccfdb 100644
--- a/arm_compute/core/CL/kernels/CLSelectKernel.h
+++ b/arm_compute/core/CL/kernels/CLSelectKernel.h
@@ -60,6 +60,15 @@ public:
      * @param[in]  output Output tensor. Data types supported: Same as @p x.
      */
     void configure(const ICLTensor *c, const ICLTensor *x, const ICLTensor *y, ICLTensor *output);
+    /** Initialise the kernel's inputs and output.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  c               Condition input tensor. Data types supported: U8.
+     * @param[in]  x               First input tensor. Data types supported: All.
+     * @param[out] y               Second input tensor. Data types supported: Same as @p x
+     * @param[in]  output          Output tensor. Data types supported: Same as @p x.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *c, const ICLTensor *x, const ICLTensor *y, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLSelectKernel
      *
      * @param[in] c      Condition input tensor. Data types supported: U8.
diff --git a/arm_compute/core/CL/kernels/CLSobel3x3Kernel.h b/arm_compute/core/CL/kernels/CLSobel3x3Kernel.h
index 74d83468f3..3970c07b5a 100644
--- a/arm_compute/core/CL/kernels/CLSobel3x3Kernel.h
+++ b/arm_compute/core/CL/kernels/CLSobel3x3Kernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -56,6 +56,17 @@ public:
      * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
      */
     void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
+    /** Initialise the kernel's source, destination and border.
+     *
+     * @note At least one of output_x or output_y must be set.
+     *
+     * @param[in]  compile_context  The compile context to be used.
+     * @param[in]  input            Source tensor. Data types supported: U8.
+     * @param[out] output_x         (Optional) Destination tensor for the X gradient, Data types supported: S16.
+     * @param[out] output_y         (Optional) Destination tensor for the Y gradient, Data types supported: S16.
+     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/CL/kernels/CLSobel5x5Kernel.h b/arm_compute/core/CL/kernels/CLSobel5x5Kernel.h
index 20a69ea780..0aff209931 100644
--- a/arm_compute/core/CL/kernels/CLSobel5x5Kernel.h
+++ b/arm_compute/core/CL/kernels/CLSobel5x5Kernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -57,6 +57,17 @@ public:
      * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
      */
     void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
+    /** Initialise the kernel's source, destination and border.
+     *
+     * @note At least one of output_x or output_y must be set.
+     *
+     * @param[in]  compile_context  The compile context to be used.
+     * @param[in]  input            Source tensor. Data types supported: U8.
+     * @param[out] output_x         (Optional) Destination tensor for the X gradient, Data types supported: S16.
+     * @param[out] output_y         (Optional) Destination tensor for the Y gradient, Data types supported: S16.
+     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
@@ -99,6 +110,18 @@ public:
      * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
      */
     void configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
+    /** Initialise the kernel's source, destination and border.
+     *
+     * @note At least one of output_x or output_y must be set and the corresponding input.
+     *
+     * @param[in]  compile_context  The compile context to be used.
+     * @param[in]  input_x          (Optional) Input for X (X output of horizontal pass). Data types supported: S16.
+     * @param[in]  input_y          (Optional) Input for Y (Y output of horizontal pass). Data types supported: S16.
+     * @param[out] output_x         (Optional) Destination tensor for the X gradient, Data types supported: S16.
+     * @param[out] output_y         (Optional) Destination tensor for the Y gradient, Data types supported: S16.
+     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/CL/kernels/CLSobel7x7Kernel.h b/arm_compute/core/CL/kernels/CLSobel7x7Kernel.h
index 3c224f7e3b..31809b1cf4 100644
--- a/arm_compute/core/CL/kernels/CLSobel7x7Kernel.h
+++ b/arm_compute/core/CL/kernels/CLSobel7x7Kernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -57,6 +57,17 @@ public:
      * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
      */
     void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
+    /** Initialise the kernel's source, destination and border.
+     *
+     * @note At least one of output_x or output_y must be set.
+     *
+     * @param[in]  compile_context  The compile context to be used.
+     * @param[in]  input            Source tensor. Data types supported: U8.
+     * @param[out] output_x         (Optional) Destination tensor for the X gradient, Data types supported: S32.
+     * @param[out] output_y         (Optional) Destination tensor for the Y gradient, Data types supported: S32.
+     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
@@ -99,6 +110,18 @@ public:
      * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
      */
     void configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
+    /** Initialise the kernel's source, destination and border.
+     *
+     * @note At least one of output_x or output_y must be set and the corresponding input.
+     *
+     * @param[in]  compile_context  The compile context to be used.
+     * @param[in]  input_x          (Optional) Input for X (X output of horizontal pass). Data types supported: S32.
+     * @param[in]  input_y          (Optional) Input for Y (Y output of horizontal pass). Data types supported: S32.
+     * @param[out] output_x         (Optional) Destination tensor for the X gradient, Data types supported: S32.
+     * @param[out] output_y         (Optional) Destination tensor for the Y gradient, Data types supported: S32.
+     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h b/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h
index f64739ae32..800d909a1c 100644
--- a/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,6 +43,13 @@ public:
      * @param[out] output Destination tensor. Data types supported: same as @p input
      */
     void configure(const ICLTensor *input, ICLTensor *output);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: QASYMM8/F16/F32
+     * @param[out] output          Destination tensor. Data types supported: same as @p input
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DMaxKernel
      *
      * @param[in] input  Source tensor. Data types supported: QASYMM8/F16/F32
@@ -76,6 +83,16 @@ public:
      * @param[in]  beta   (Optional) A scaling factor for the exponent. Defaults to 1.0
      */
     void configure(const ICLTensor *input, const ICLTensor *max, ICLTensor *output, ICLTensor *sum, float beta = 1.0f);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: QASYMM8/F16/F32
+     * @param[in]  max             Max values tensor. Data types supported: same as @p input
+     * @param[out] output          Destination tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input
+     * @param[out] sum             Sum of 1D logits tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input
+     * @param[in]  beta            (Optional) A scaling factor for the exponent. Defaults to 1.0
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *max, ICLTensor *output, ICLTensor *sum, float beta = 1.0f);
     /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DShiftExpSumKernel
      *
      * @param[in] input  Source tensor. Data types supported: QASYMM8/F16/F32
@@ -124,6 +141,16 @@ public:
      * @param[in]     info   Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
      */
     void configure(const ICLTensor *input, ICLTensor *max, ICLTensor *output, ICLTensor *sum, const SoftmaxKernelInfo &info);
+    /** Set the input and output tensors.
+     *
+     * @param[in]     compile_context The compile context to be used.
+     * @param[in]     input           Source tensor. Data types supported: F16/F32
+     * @param[in,out] max             Max values tensor. Data types supported: same as @p input
+     * @param[out]    output          Destination tensor. Data types supported: same as @p input
+     * @param[out]    sum             Sum of 1D logits tensor. Data types supported: same as @p input
+     * @param[in]     info            Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *max, ICLTensor *output, ICLTensor *sum, const SoftmaxKernelInfo &info);
     /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DMaxShiftExpSumKernel
      *
      * @param[in] input  Source tensor. Data types supported: F16/F32
@@ -182,6 +209,15 @@ public:
      * @param[in]  info   Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
      */
     void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, const SoftmaxKernelInfo &info);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: S32/F16/F32
+     * @param[in]  sum             Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input
+     * @param[out] output          Destination tensor. Data types supported: QASYMM8 for S32 @p input, or same as @p input
+     * @param[in]  info            Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, const SoftmaxKernelInfo &info);
     /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DNormKernel
      *
      * @param[in] input  Source tensor. Data types supported: S32/F16/F32
diff --git a/arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h b/arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h
index 6295430d40..34f0b669c4 100644
--- a/arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h
@@ -55,6 +55,15 @@ public:
      * @param[out] output      Tensor output. Data types supported: same as @p input
      */
     void configure(const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output);
+    /** Initialise the kernel's inputs and output.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Tensor input. Supported tensor rank: 4. Data types supported: All.
+     * @param[in]  block_shape     1-D tensor with shape [M]. Data types supported: S32
+     * @param[in]  paddings        2-D tensor with shape [2, M]. Data types supported: S32
+     * @param[out] output          Tensor output. Data types supported: same as @p input
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output);
     /** Initialise the kernel's input and output. (Static block shape and paddings)
      *
      * @param[in]  input         Tensor input. Supported tensor rank: 4. Data types supported: All.
@@ -65,6 +74,17 @@ public:
      * @param[out] output        Tensor output. Data types supported: same as @p input
      */
     void configure(const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ICLTensor *output);
+    /** Initialise the kernel's input and output. (Static block shape and paddings)
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Tensor input. Supported tensor rank: 4. Data types supported: All.
+     * @param[in]  block_shape_x   Block shape x value.
+     * @param[in]  block_shape_y   Block shape y value.
+     * @param[in]  padding_left    The left padding of the output tensor.
+     * @param[in]  padding_right   The right padding of the output tensor.
+     * @param[out] output          Tensor output. Data types supported: same as @p input
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToBatchLayerKernel
      *
      * @param[in] input       Tensor input. Supported tensor rank: 4. Data types supported: All.
diff --git a/arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h b/arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h
index 085c337627..3f20f665dd 100644
--- a/arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h
@@ -54,6 +54,14 @@ public:
      * @param[in]  block_shape Block shape value.
      */
     void configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape);
+    /** Initialise the kernel's inputs and output.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Tensor input. Supported tensor rank: 4. Data types supported: All.
+     * @param[out] output          Tensor output. Data types supported: same as @p input
+     * @param[in]  block_shape     Block shape value.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape);
     /** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToDepthLayerKernel.
      *
      * @param[in] input       Tensor input info. Supported tensor rank: 4. Data types supported: All.
diff --git a/arm_compute/core/CL/kernels/CLStackLayerKernel.h b/arm_compute/core/CL/kernels/CLStackLayerKernel.h
index 073c896035..19925c251d 100644
--- a/arm_compute/core/CL/kernels/CLStackLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLStackLayerKernel.h
@@ -61,6 +61,20 @@ public:
      *
      */
     void configure(const ICLTensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ICLTensor *output);
+    /** Initialise the kernel's inputs and output
+     *
+     * @note Supported input tensor rank: up to 4
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. Data types supported: All.
+     * @param[in]  axis            The dimension to stack the tensors along. It must be smaller than the number of input dimensions.
+     * @param[in]  idx_input       Index of the input tensor in the list of tensors to stack.
+     *                             All tensors in the list must have the same shape
+     * @param[in]  num_tensors     Number of tensors to stack
+     * @param[out] output          Output tensor. Data types supported: Same as @p input.
+     *
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLStackLayerKernel
      *
      * @note Supported input tensor rank: up to 4
diff --git a/arm_compute/core/CL/kernels/CLStridedSliceKernel.h b/arm_compute/core/CL/kernels/CLStridedSliceKernel.h
index 3bcabaf5e0..2e668821bd 100644
--- a/arm_compute/core/CL/kernels/CLStridedSliceKernel.h
+++ b/arm_compute/core/CL/kernels/CLStridedSliceKernel.h
@@ -67,6 +67,24 @@ public:
     void configure(const ICLTensor *input, ICLTensor *output,
                    const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
                    int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask);
+    /** Configure kernel
+     *
+     * @note Supported tensor rank: up to 4
+     *
+     * @param[in]  compile_context  The compile context to be used.
+     * @param[in]  input            Source tensor. Data type supported: All.
+     * @param[out] output           Destination tensor. Data type supported: Same as @p input
+     * @param[in]  starts           The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+     * @param[in]  ends             The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+     * @param[in]  strides          The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+     * @param[in]  begin_mask       If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead.
+     * @param[in]  end_mask         If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead.
+     * @param[in]  shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
+     *                              A slice of size 1 starting from starts[i] in the dimension must be preserved.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output,
+                   const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
+                   int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask);
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLStridedSliceKernel
      *
diff --git a/arm_compute/core/CL/kernels/CLTableLookupKernel.h b/arm_compute/core/CL/kernels/CLTableLookupKernel.h
index 67d9cde6fe..9bbaf26d7a 100644
--- a/arm_compute/core/CL/kernels/CLTableLookupKernel.h
+++ b/arm_compute/core/CL/kernels/CLTableLookupKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -42,6 +42,14 @@ public:
      * @param[out] output The output tensor. Data types supported: U8, S16.
      */
     void configure(const ICLTensor *input, const ICLLut *lut, ICLTensor *output);
+    /** Initialise the kernel's input, lut and output.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           An input tensor. Data types supported: U8, S16.
+     * @param[in]  lut             The input LUT. Data types supported: U8, S16.
+     * @param[out] output          The output tensor. Data types supported: U8, S16.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLLut *lut, ICLTensor *output);
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_CLTABLELOOKUPKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLThresholdKernel.h b/arm_compute/core/CL/kernels/CLThresholdKernel.h
index 3523e36fe5..79e9f01aa2 100644
--- a/arm_compute/core/CL/kernels/CLThresholdKernel.h
+++ b/arm_compute/core/CL/kernels/CLThresholdKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -51,6 +51,19 @@ public:
      */
     void configure(const ICLTensor *input, ICLTensor *output, uint8_t threshold,
                    uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper);
+    /**Initialise the kernel's input, output and threshold parameters.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           An input tensor. Data types supported: U8
+     * @param[out] output          The output tensor. Data types supported: U8.
+     * @param[in]  threshold       Threshold. When the threshold type is RANGE, this is used as the lower threshold.
+     * @param[in]  false_value     value to set when the condition is not respected.
+     * @param[in]  true_value      value to set when the condition is respected.
+     * @param[in]  type            Thresholding type. Either RANGE or BINARY.
+     * @param[in]  upper           Upper threshold. Only used when the thresholding type is RANGE.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, uint8_t threshold,
+                   uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper);
 };
 } // namespace arm_compute
 #endif /*ARM_COMPUTE_NETHRESHOLDKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLTileKernel.h b/arm_compute/core/CL/kernels/CLTileKernel.h
index 2b0c4305cb..1c9186c4dd 100644
--- a/arm_compute/core/CL/kernels/CLTileKernel.h
+++ b/arm_compute/core/CL/kernels/CLTileKernel.h
@@ -55,6 +55,16 @@ public:
      *
      */
     void configure(const ICLTensor *input, ICLTensor *output, const Multiples &multiples);
+    /** Set the source, destination of the kernel
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data type supported: All.
+     * @param[in]  multiples       Contains the number of times the input tensor should be replicated on the given dimension.
+     *                             Cannot have more than 4 elements (tiling in dimensions greater than 4 is not supported).
+     * @param[out] output          Destination tensor. Same as @p input
+     *
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Multiples &multiples);
     /** Static function to check if given info will lead to a valid configuration of @ref CLTileKernel
      *
      * @param[in] input     Source tensor info. Data type supported: All.
diff --git a/arm_compute/core/CL/kernels/CLTransposeKernel.h b/arm_compute/core/CL/kernels/CLTransposeKernel.h
index 0adebde398..37bd716f3d 100644
--- a/arm_compute/core/CL/kernels/CLTransposeKernel.h
+++ b/arm_compute/core/CL/kernels/CLTransposeKernel.h
@@ -44,6 +44,13 @@ public:
      * @param[out] output Output tensor. Data type supported: Same as @p input
      */
     void configure(const ICLTensor *input, ICLTensor *output);
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. Data types supported: All.
+     * @param[out] output          Output tensor. Data type supported: Same as @p input
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLTransposeKernel
      *
      * @param[in] input  Input tensor. Data types supported: All.
diff --git a/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h b/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h
index 6f632aab46..556e5484d7 100644
--- a/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h
@@ -55,6 +55,15 @@ public:
      * @param[in]  upsampling_policy Defines the policy to fill the intermediate pixels.
      */
     void configure(const ICLTensor *input, ICLTensor *output, const Size2D &info, const InterpolationPolicy upsampling_policy);
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]  compile_context   The compile context to be used.
+     * @param[in]  input             Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[out] output            Destination tensor. Data types supported: same as @p input.
+     * @param[in]  info              Contains stride information described in @ref Size2D.
+     * @param[in]  upsampling_policy Defines the policy to fill the intermediate pixels.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &info, const InterpolationPolicy upsampling_policy);
     /** Static function to check if given info will lead to a valid configuration of @ref CLUpsampleLayerKernel
      *
      * @param[in] input             Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
diff --git a/arm_compute/core/CL/kernels/CLWarpAffineKernel.h b/arm_compute/core/CL/kernels/CLWarpAffineKernel.h
index e4d0be62c6..bd26705ea4 100644
--- a/arm_compute/core/CL/kernels/CLWarpAffineKernel.h
+++ b/arm_compute/core/CL/kernels/CLWarpAffineKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -44,6 +44,16 @@ public:
      * @param[in]  policy The interpolation type.
      */
     void configure(const ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy);
+    /** Initialize the function's source, destination, interpolation policy and border_mode.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: U8.
+     * @param[out] output          Destination tensor, Data types supported: U8.
+     * @param[in]  matrix          The perspective matrix. Must be 2x3 of type float
+     *                             The matrix argument requires 9 values, the last 3 values are ignored.
+     * @param[in]  policy          The interpolation type.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy);
 
     // Inherited methods overridden:
     BorderSize border_size() const override;
diff --git a/arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h b/arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h
index cf6cb4cf64..4f4ff34f1d 100644
--- a/arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h
+++ b/arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -42,6 +42,15 @@ public:
      * @param[in]  policy The interpolation type.
      */
     void configure(const ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy);
+    /** Initialize the function's source, destination, interpolation policy and border_mode.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: U8.
+     * @param[out] output          Destination tensor, Data types supported: U8.
+     * @param[in]  matrix          The perspective matrix. Must be 3x3 of type float.
+     * @param[in]  policy          The interpolation type.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy);
 
     // Inherited methods overridden:
     BorderSize border_size() const override;
diff --git a/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h b/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h
index 3e29feaa4d..f09eea958c 100644
--- a/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h
+++ b/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -79,6 +79,20 @@ public:
      *                        Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it.
      */
     void configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups = 1);
+    /** Set the input and output of the kernel.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
+     *                             and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM,  num_patches] if unshared. Data types supported: All
+     * @param[in]  biases          The shared biases tensor to append.  Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with
+     *                             dimensions [OFM, num_patches] if unshared. Data types supported: F16/F32, for quantized types this must be nullptr.
+     *                             @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
+     * @param[out] output          The output tensor. Should be a 2D Tensor if there are no groups and the weights are not shared; a 3D Tensor otherwise.
+     *                             Data types supported: Same as @p input
+     * @param[in]  num_groups      (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
+     *                             Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups = 1);
     /** Static function to check if given info will lead to a valid configuration of @ref CLWeightsReshapeKernel
      *
      * @param[in] input      The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
diff --git a/arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h b/arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h
index 1a781446e6..50abf65983 100644
--- a/arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h
+++ b/arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h
@@ -57,6 +57,14 @@ public:
      * @param[out] output Output tensor. Data types supported: Same as @p input1.
      */
     void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+    /** Initialise the kernel's input1s and output
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input1          First input tensor. Data types supported: All.
+     * @param[in]  input2          Second input tensor. Data types supported: same as @p input1
+     * @param[out] output          Output tensor. Data types supported: Same as @p input1.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
     /**  Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenate2TensorsKernel
      *
      * @param[in] input1 First tensor info. Data types supported: All.
diff --git a/arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h b/arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h
index 34b8257f00..f203602a12 100644
--- a/arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h
+++ b/arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h
@@ -59,6 +59,16 @@ public:
      * @param[out] output Output tensor. Data types supported: Same as @p input1.
      */
     void configure(const ICLTensor *input1, const ICLTensor *input2, const ICLTensor *input3, const ICLTensor *input4, ICLTensor *output);
+    /** Initialise the kernel's input1s and output
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input1          First input tensor. Data types supported: All.
+     * @param[in]  input2          Second input tensor. Data types supported: same as @p input1
+     * @param[in]  input3          Third input tensor. Data types supported: same as @p input1
+     * @param[in]  input4          Fourth input tensor. Data types supported: same as @p input1
+     * @param[out] output          Output tensor. Data types supported: Same as @p input1.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, const ICLTensor *input3, const ICLTensor *input4, ICLTensor *output);
     /**  Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenate4TensorsKernel
      *
      * @param[in] input1 First tensor info. Data types supported: All.
diff --git a/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h
index 5dcae67c45..4564d774e3 100644
--- a/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h
@@ -58,6 +58,15 @@ public:
      *
      */
     void configure(const ICLTensor *input, unsigned int width_offset, ICLTensor *output);
+    /** Initialise the kernel's inputs and output
+     *
+     * @param[in]     compile_context The compile context to be used.
+     * @param[in]     input           Input tensor. Data types supported: All.
+     * @param[in]     width_offset    The offset on the X axis.
+     * @param[in,out] output          Output tensor. Data types supported: Same as @p input.
+     *
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, unsigned int width_offset, ICLTensor *output);
     /**  Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenateLayerKernel
      *
      * @param[in] input        Input tensor info. Data types supported: All.
diff --git a/arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h b/arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h
index 4b1de0fb5c..bc7573dc9e 100644
--- a/arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h
+++ b/arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -64,6 +64,25 @@ public:
      * @param[in]  winograd_info Contains Winograd's information described in @ref WinogradInfo
      */
     void configure(const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info);
+    /** Set the input and output tensor.
+     *
+     * @note Winograd filter transform supports the following configurations for NCWH data layout
+     *       F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3),
+     *                                   F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
+     *                                   F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
+     *
+     * @note Winograd filter transform supports the following configurations for NHWC data layout
+     *       F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
+     *                                   F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
+     *
+     *       Strides: only unit strides
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout) or [IFM, kernel_x, kernel_y, OFM] (NHWC data layout). Data types supported: F16/F32.
+     * @param[out] output          The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_filter_transform_shape. Data types supported: Same as @p input
+     * @param[in]  winograd_info   Contains Winograd's information described in @ref WinogradInfo
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info);
     /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradFilterTransformKernel
      *
      * @note Winograd filter transform supports the following configurations for NCWH data layout
diff --git a/arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h b/arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h
index 03c215bf1f..6bb8d6e616 100644
--- a/arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h
+++ b/arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -62,6 +62,25 @@ public:
      * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo.
      */
     void configure(const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info);
+    /** Set the input and output of the kernel.
+     *
+     * @note Winograd input transform supports the following configurations for NCWH data layout
+     *       F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3),
+     *                                   F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
+     *                                   F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
+     *
+     * @note Winograd input transform supports the following configurations for NHWC data layout
+     *       F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
+     *                                   F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
+     *
+     *       Strides: only unit strides
+     *
+     * @param[in] compile_context The compile context to be used.
+     * @param[in] input           The input tensor to transform. Data types supported: F16/F32
+     * @param[in] output          The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input
+     * @param[in] winograd_info   Contains Winograd's information described in @ref WinogradInfo.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info);
     /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradInputTransformKernel
      *
      * @note Winograd input transform supports the following configurations for NCWH data layout
diff --git a/arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h b/arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h
index c61e78674f..aab244bb90 100644
--- a/arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h
+++ b/arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -66,6 +66,28 @@ public:
      * @param[in]  act_info      (Optional) Activation layer information in case of a fused activation.
      */
     void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+    /** Set the input and output tensor.
+     *
+     * @note Winograd output transform supports the following configurations for NCWH data layout
+     *       F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3),
+     *                                   F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
+     *                                   F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
+     *
+     * @note Winograd output transform supports the following configurations for NHWC data layout
+     *       F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
+     *                                   F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
+     *
+     *       Strides: only unit strides
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor with shape [C, N, K, batches]. Data types supported: F16/F32.
+     * @param[in]  bias            Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input
+     * @param[out] output          The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_output_transform_shape. Data types supported: Same as @p input
+     * @param[in]  winograd_info   Contains Winograd's information described in @ref WinogradInfo
+     * @param[in]  act_info        (Optional) Activation layer information in case of a fused activation.
+     */
+    void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const WinogradInfo &winograd_info,
+                   const ActivationLayerInfo &act_info = ActivationLayerInfo());
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradOutputTransformKernel
      *
diff --git a/arm_compute/core/CL/kernels/CLYOLOLayerKernel.h b/arm_compute/core/CL/kernels/CLYOLOLayerKernel.h
index 1fae0ed44b..c03fc94f91 100644
--- a/arm_compute/core/CL/kernels/CLYOLOLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLYOLOLayerKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -62,6 +62,18 @@ public:
      * @param[in]      num_classes Number of classes to activate (must be submultiple of @p input channels)
      */
     void configure(ICLTensor *input, ICLTensor *output, const ActivationLayerInfo &act_info, int32_t num_classes);
+    /** Set the input and output tensor.
+     *
+     * @note If the output tensor is a nullptr, the activation function will be performed in-place
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input           Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
+     *                                 of the activation function. Data types supported: F16/F32.
+     * @param[out]     output          Destination tensor. Data type supported: same as @p input
+     * @param[in]      act_info        Activation layer information.
+     * @param[in]      num_classes     Number of classes to activate (must be submultiple of @p input channels)
+     */
+    void configure(CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ActivationLayerInfo &act_info, int32_t num_classes);
     /** Static function to check if given info will lead to a valid configuration of @ref CLYOLOLayerKernel
      *
      * @param[in] input       Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
diff --git a/arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h b/arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h
index 9c62f96f6d..040ca157de 100644
--- a/arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h
+++ b/arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -68,6 +68,27 @@ public:
     virtual void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
                            unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
                            const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) = 0;
+    /** Initialize the function's source, destination, conv and border_size.
+     *
+     * @param[in]  compile_context    The compile context to be used.
+     * @param[in]  input              Source tensor. DataType supported: QASYMM8/F16/F32.
+     * @param[in]  weights            Weights tensor. A 3D tensor with dimensions [3, 3, IFM].
+     *                                Data type supported: Same as @p input, QASYMM8/QSYMM8_PER_CHANNEL when input is QASYMM8.
+     * @param[in]  biases             Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+     *                                Data type supported: Same as @p input, S32 when input is QASYMM8.
+     * @param[out] output             Destination tensor. Data type supported: Same as @p input.
+     * @param[in]  conv_info          Padding and stride information to use for the convolution.
+     * @param[in]  depth_multiplier   (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+     * @param[in]  act_info           (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported for QASYMM8.
+     * @param[in]  dilation           (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+     * @param[in]  output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
+     *                                the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+     * @param[in]  output_shifts      (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
+     *                                the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+     */
+    virtual void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+                           unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
+                           const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) = 0;
 
 protected:
     BorderSize       _border_size;
-- 
cgit v1.2.1