From ebcebf1dee7f8314976b1e0cabd62b4cf893d765 Mon Sep 17 00:00:00 2001
From: Michalis Spyrou <michalis.spyrou@arm.com>
Date: Wed, 21 Oct 2020 00:04:14 +0100
Subject: COMPMID-3638: Move NEON kernels

Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com>
Change-Id: Ieed3e4bc8be7fef80c90c5094599b477a56fc473
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4285
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
---
 arm_compute/runtime/NEON/INEOperator.h             |   5 +-
 arm_compute/runtime/NEON/INESimpleFunction.h       |  21 ++-
 .../runtime/NEON/INESimpleFunctionNoBorder.h       |   7 +-
 .../runtime/NEON/functions/NEAbsoluteDifference.h  |  18 +-
 arm_compute/runtime/NEON/functions/NEAccumulate.h  |  38 ++++-
 .../runtime/NEON/functions/NEActivationLayer.h     |  17 +-
 .../runtime/NEON/functions/NEArgMinMaxLayer.h      |  10 ++
 .../runtime/NEON/functions/NEArithmeticAddition.h  |  12 ++
 .../NEON/functions/NEBatchNormalizationLayer.h     |  20 ++-
 .../runtime/NEON/functions/NEBatchToSpaceLayer.h   |  16 +-
 arm_compute/runtime/NEON/functions/NEBitwiseAnd.h  |  14 +-
 arm_compute/runtime/NEON/functions/NEBitwiseNot.h  |   2 +-
 arm_compute/runtime/NEON/functions/NEBitwiseOr.h   |   2 +-
 arm_compute/runtime/NEON/functions/NEBitwiseXor.h  |   2 +-
 .../NEON/functions/NEBoundingBoxTransform.h        |   7 +-
 arm_compute/runtime/NEON/functions/NEBox3x3.h      |   2 +-
 arm_compute/runtime/NEON/functions/NECannyEdge.h   |  36 ++--
 arm_compute/runtime/NEON/functions/NECast.h        |   5 +-
 .../runtime/NEON/functions/NEChannelCombine.h      |   2 +-
 .../runtime/NEON/functions/NEChannelExtract.h      |   2 +-
 .../runtime/NEON/functions/NEChannelShuffleLayer.h |   4 +-
 arm_compute/runtime/NEON/functions/NECol2Im.h      |   5 +-
 .../runtime/NEON/functions/NEColorConvert.h        |   2 +-
 .../runtime/NEON/functions/NEComputeAllAnchors.h   |   9 +-
 .../runtime/NEON/functions/NEConcatenateLayer.h    |  19 ++-
 .../functions/NEConvertFullyConnectedWeights.h     |  17 +-
 arm_compute/runtime/NEON/functions/NEConvolution.h |  59 +++++--
 .../runtime/NEON/functions/NEConvolutionLayer.h    |  13 +-
 arm_compute/runtime/NEON/functions/NECopy.h        |  15 +-
 arm_compute/runtime/NEON/functions/NECropResize.h  |   4 +-
 .../runtime/NEON/functions/NEDepthConvertLayer.h   |   3 +
 .../runtime/NEON/functions/NEDepthToSpaceLayer.h   |  16 +-
 .../NEON/functions/NEDepthwiseConvolutionLayer.h   |  71 ++++----
 .../runtime/NEON/functions/NEDequantizationLayer.h |   1 +
 arm_compute/runtime/NEON/functions/NEDerivative.h  |  22 ++-
 .../NEON/functions/NEDetectionPostProcessLayer.h   |   2 +
 arm_compute/runtime/NEON/functions/NEDilate.h      |   2 +-
 .../NEON/functions/NEDirectConvolutionLayer.h      |  37 ++--
 .../NEON/functions/NEElementwiseUnaryLayer.h       |   2 +
 .../runtime/NEON/functions/NEEqualizeHistogram.h   |  34 ++--
 arm_compute/runtime/NEON/functions/NEErode.h       |   2 +-
 arm_compute/runtime/NEON/functions/NEFFT1D.h       |  38 +++--
 arm_compute/runtime/NEON/functions/NEFFT2D.h       |  12 +-
 .../runtime/NEON/functions/NEFFTConvolutionLayer.h |   4 +-
 arm_compute/runtime/NEON/functions/NEFastCorners.h |  36 ++--
 arm_compute/runtime/NEON/functions/NEFill.h        |   3 +-
 arm_compute/runtime/NEON/functions/NEFillBorder.h  |   7 +-
 .../runtime/NEON/functions/NEFlattenLayer.h        |   3 +-
 arm_compute/runtime/NEON/functions/NEFloor.h       |   3 +-
 .../runtime/NEON/functions/NEFullyConnectedLayer.h |  23 ++-
 .../NEON/functions/NEFuseBatchNormalization.h      |   8 +-
 arm_compute/runtime/NEON/functions/NEGEMM.h        |  33 ++--
 .../NEON/functions/NEGEMMConvolutionLayer.h        |  29 +++-
 .../runtime/NEON/functions/NEGEMMInterleave4x4.h   |   2 +-
 .../NEGEMMLowpAssemblyMatrixMultiplyCore.h         |  23 ++-
 .../NEON/functions/NEGEMMLowpMatrixMultiplyCore.h  |  46 ++---
 .../runtime/NEON/functions/NEGEMMLowpOutputStage.h |  50 ++++++
 .../runtime/NEON/functions/NEGEMMTranspose1xW.h    |  16 +-
 arm_compute/runtime/NEON/functions/NEGather.h      |   3 +-
 arm_compute/runtime/NEON/functions/NEGaussian3x3.h |   2 +-
 arm_compute/runtime/NEON/functions/NEGaussian5x5.h |  27 ++-
 .../runtime/NEON/functions/NEGaussianPyramid.h     |  34 +++-
 .../NEON/functions/NEGenerateProposalsLayer.h      |  34 ++--
 .../runtime/NEON/functions/NEHOGDescriptor.h       |  30 +++-
 arm_compute/runtime/NEON/functions/NEHOGDetector.h |  18 +-
 arm_compute/runtime/NEON/functions/NEHOGGradient.h |  25 ++-
 .../runtime/NEON/functions/NEHOGMultiDetection.h   |  12 +-
 .../runtime/NEON/functions/NEHarrisCorners.h       |  20 ++-
 arm_compute/runtime/NEON/functions/NEHistogram.h   |  25 ++-
 arm_compute/runtime/NEON/functions/NEIm2Col.h      |  18 +-
 .../NEON/functions/NEInstanceNormalizationLayer.h  |  28 +++-
 .../runtime/NEON/functions/NEIntegralImage.h       |  14 +-
 .../runtime/NEON/functions/NEL2NormalizeLayer.h    |  20 ++-
 arm_compute/runtime/NEON/functions/NELSTMLayer.h   |  21 ++-
 .../runtime/NEON/functions/NELSTMLayerQuantized.h  |   4 +-
 .../runtime/NEON/functions/NELaplacianPyramid.h    |  12 +-
 .../NEON/functions/NELaplacianReconstruct.h        |  12 +-
 .../NEON/functions/NELocallyConnectedLayer.h       |  30 ++--
 arm_compute/runtime/NEON/functions/NEMagnitude.h   |  15 +-
 .../runtime/NEON/functions/NEMaxUnpoolingLayer.h   |  22 ++-
 arm_compute/runtime/NEON/functions/NEMeanStdDev.h  |  27 ++-
 .../functions/NEMeanStdDevNormalizationLayer.h     |  15 +-
 arm_compute/runtime/NEON/functions/NEMedian3x3.h   |   2 +-
 .../runtime/NEON/functions/NEMinMaxLocation.h      |  20 ++-
 .../runtime/NEON/functions/NENonLinearFilter.h     |   2 +-
 .../NEON/functions/NENonMaximaSuppression3x3.h     |   2 +-
 .../runtime/NEON/functions/NENormalizationLayer.h  |  21 ++-
 arm_compute/runtime/NEON/functions/NEOpticalFlow.h |  29 ++--
 arm_compute/runtime/NEON/functions/NEPReluLayer.h  |   1 +
 arm_compute/runtime/NEON/functions/NEPadLayer.h    |  38 +++--
 arm_compute/runtime/NEON/functions/NEPermute.h     |   3 +-
 arm_compute/runtime/NEON/functions/NEPhase.h       |   4 +-
 .../NEON/functions/NEPixelWiseMultiplication.h     |   1 +
 .../runtime/NEON/functions/NEPoolingLayer.h        |  24 ++-
 .../runtime/NEON/functions/NEPriorBoxLayer.h       |   4 +-
 arm_compute/runtime/NEON/functions/NEQLSTMLayer.h  | 186 ++++++++++-----------
 .../runtime/NEON/functions/NEQuantizationLayer.h   |   4 +-
 arm_compute/runtime/NEON/functions/NERNNLayer.h    |  25 +--
 .../runtime/NEON/functions/NEROIAlignLayer.h       |   6 +-
 .../runtime/NEON/functions/NEROIPoolingLayer.h     |  18 +-
 arm_compute/runtime/NEON/functions/NERange.h       |  18 +-
 arm_compute/runtime/NEON/functions/NEReduceMean.h  |  11 +-
 .../runtime/NEON/functions/NEReductionOperation.h  |  27 ++-
 arm_compute/runtime/NEON/functions/NERemap.h       |   2 +-
 arm_compute/runtime/NEON/functions/NEReorgLayer.h  |   3 +-
 .../runtime/NEON/functions/NEReshapeLayer.h        |  13 +-
 arm_compute/runtime/NEON/functions/NEReverse.h     |   3 +-
 arm_compute/runtime/NEON/functions/NEScale.h       |   2 +-
 arm_compute/runtime/NEON/functions/NESelect.h      |   7 +-
 arm_compute/runtime/NEON/functions/NESobel3x3.h    |   2 +-
 arm_compute/runtime/NEON/functions/NESobel5x5.h    |  29 +++-
 arm_compute/runtime/NEON/functions/NESobel7x7.h    |  29 +++-
 .../runtime/NEON/functions/NESoftmaxLayer.h        |  31 ++--
 .../runtime/NEON/functions/NESpaceToBatchLayer.h   |  16 +-
 .../runtime/NEON/functions/NESpaceToDepthLayer.h   |  13 +-
 arm_compute/runtime/NEON/functions/NEStackLayer.h  |  22 ++-
 arm_compute/runtime/NEON/functions/NETableLookup.h |   2 +-
 arm_compute/runtime/NEON/functions/NEThreshold.h   |   1 +
 arm_compute/runtime/NEON/functions/NETile.h        |   3 +-
 arm_compute/runtime/NEON/functions/NETranspose.h   |   3 +-
 arm_compute/runtime/NEON/functions/NEUnstack.h     |  10 ++
 .../runtime/NEON/functions/NEUpsampleLayer.h       |  18 +-
 arm_compute/runtime/NEON/functions/NEWarpAffine.h  |   2 +-
 .../runtime/NEON/functions/NEWarpPerspective.h     |   2 +-
 .../NEON/functions/NEWinogradConvolutionLayer.h    |  20 ++-
 arm_compute/runtime/NEON/functions/NEYOLOLayer.h   |   4 +-
 126 files changed, 1443 insertions(+), 621 deletions(-)

(limited to 'arm_compute/runtime/NEON')

diff --git a/arm_compute/runtime/NEON/INEOperator.h b/arm_compute/runtime/NEON/INEOperator.h
index 415e767eec..a5ffc74940 100644
--- a/arm_compute/runtime/NEON/INEOperator.h
+++ b/arm_compute/runtime/NEON/INEOperator.h
@@ -25,7 +25,6 @@
 #define ARM_COMPUTE_INEOPERATOR_H
 
 #include "../../core/ITensor.h"
-#include "arm_compute/core/NEON/INEKernel.h"
 #include "arm_compute/runtime/IOperator.h"
 #include "arm_compute/runtime/IRuntimeContext.h"
 #include "arm_compute/runtime/Types.h"
@@ -34,6 +33,8 @@
 
 namespace arm_compute
 {
+class ICPPKernel;
+using INEKernel = ICPPKernel;
 namespace experimental
 {
 /** Basic interface for functions which have a single async NEON kernel */
@@ -53,6 +54,8 @@ public:
     INEOperator &operator=(const INEOperator &) = delete;
     /** Default move assignment operator */
     INEOperator &operator=(INEOperator &&) = default;
+    /** Default destructor */
+    ~INEOperator();
 
     // Inherited methods overridden:
     void run(ITensorPack &tensors) override;
diff --git a/arm_compute/runtime/NEON/INESimpleFunction.h b/arm_compute/runtime/NEON/INESimpleFunction.h
index 7f2ed2e16f..979a0f7f07 100644
--- a/arm_compute/runtime/NEON/INESimpleFunction.h
+++ b/arm_compute/runtime/NEON/INESimpleFunction.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,27 +24,38 @@
 #ifndef ARM_COMPUTE_INESIMPLEFUNCTION_H
 #define ARM_COMPUTE_INESIMPLEFUNCTION_H
 
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
 #include "arm_compute/runtime/IFunction.h"
 
 #include <memory>
 
 namespace arm_compute
 {
+class ICPPKernel;
+class NEFillBorderKernel;
+using INEKernel = ICPPKernel;
 /** Basic interface for functions which have a single NEON kernel */
 class INESimpleFunction : public IFunction
 {
 public:
     /** Constructor */
     INESimpleFunction();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    INESimpleFunction(const INESimpleFunction &) = delete;
+    /** Default move constructor */
+    INESimpleFunction(INESimpleFunction &&) = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    INESimpleFunction &operator=(const INESimpleFunction &) = delete;
+    /** Default move assignment operator */
+    INESimpleFunction &operator=(INESimpleFunction &&) = default;
+    /** Default destructor */
+    ~INESimpleFunction();
 
     // Inherited methods overridden:
     void run() override final;
 
 protected:
-    std::unique_ptr<INEKernel> _kernel;         /**< Kernel to run */
-    NEFillBorderKernel         _border_handler; /**< Kernel to handle image borders */
+    std::unique_ptr<INEKernel>          _kernel;         /**< Kernel to run */
+    std::unique_ptr<NEFillBorderKernel> _border_handler; /**< Kernel to handle image borders */
 };
 }
 #endif /*ARM_COMPUTE_INESIMPLEFUNCTION_H */
diff --git a/arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h b/arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h
index 7d352eb82b..9df0d78526 100644
--- a/arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h
+++ b/arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,7 +24,6 @@
 #ifndef ARM_COMPUTE_INESIMPLEFUNCTIONNOBORDER_H
 #define ARM_COMPUTE_INESIMPLEFUNCTIONNOBORDER_H
 
-#include "arm_compute/core/NEON/INEKernel.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IRuntimeContext.h"
 
@@ -32,6 +31,8 @@
 
 namespace arm_compute
 {
+class ICPPKernel;
+using INEKernel = ICPPKernel;
 /** Basic interface for functions which have a single NEON kernel and no border */
 class INESimpleFunctionNoBorder : public IFunction
 {
@@ -49,6 +50,8 @@ public:
     INESimpleFunctionNoBorder &operator=(const INESimpleFunctionNoBorder &) = delete;
     /** Default move assignment operator */
     INESimpleFunctionNoBorder &operator=(INESimpleFunctionNoBorder &&) = default;
+    /** Default destructor */
+    ~INESimpleFunctionNoBorder();
 
     // Inherited methods overridden:
     void run() override final;
diff --git a/arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h b/arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h
index 7b35e6db9e..df7dc2d980 100644
--- a/arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h
+++ b/arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,7 +24,7 @@
 #ifndef ARM_COMPUTE_NEABSOLUTEDIFFERENCE_H
 #define ARM_COMPUTE_NEABSOLUTEDIFFERENCE_H
 
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
@@ -35,9 +35,21 @@ class ITensor;
  * @note The image data type for the inputs must be U8 or S16
  * @note The function calculates the absolute difference also when the 2 inputs have different image data types
  */
-class NEAbsoluteDifference : public INESimpleFunction
+class NEAbsoluteDifference : public INESimpleFunctionNoBorder
 {
 public:
+    /** Default constructor */
+    NEAbsoluteDifference() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEAbsoluteDifference(const NEAbsoluteDifference &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEAbsoluteDifference &operator=(const NEAbsoluteDifference &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEAbsoluteDifference(NEAbsoluteDifference &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEAbsoluteDifference &operator=(NEAbsoluteDifference &&) = delete;
+    /** Default destructor */
+    ~NEAbsoluteDifference();
     /** Set the inputs and output images
      *
      * @param[in]  input1 Source tensor. Data types supported: U8/S16.
diff --git a/arm_compute/runtime/NEON/functions/NEAccumulate.h b/arm_compute/runtime/NEON/functions/NEAccumulate.h
index f403a7772b..6dcef09f10 100644
--- a/arm_compute/runtime/NEON/functions/NEAccumulate.h
+++ b/arm_compute/runtime/NEON/functions/NEAccumulate.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -36,6 +36,18 @@ class ITensor;
 class NEAccumulate : public INESimpleFunctionNoBorder
 {
 public:
+    /** Default constructor */
+    NEAccumulate() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEAccumulate(const NEAccumulate &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEAccumulate &operator=(const NEAccumulate &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEAccumulate(NEAccumulate &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEAccumulate &operator=(NEAccumulate &&) = delete;
+    /** Default destructor */
+    ~NEAccumulate();
     /** Set the input and accumulation tensors
      *
      * @param[in]  input  Source tensor. Data type supported: U8.
@@ -48,6 +60,18 @@ public:
 class NEAccumulateWeighted : public INESimpleFunctionNoBorder
 {
 public:
+    /** Default constructor */
+    NEAccumulateWeighted() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEAccumulateWeighted(const NEAccumulateWeighted &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEAccumulateWeighted &operator=(const NEAccumulateWeighted &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEAccumulateWeighted(NEAccumulateWeighted &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEAccumulateWeighted &operator=(NEAccumulateWeighted &&) = delete;
+    /** Default destructor */
+    ~NEAccumulateWeighted();
     /** Set the input and accumulation tensors, and the scale value
      *
      * @param[in]     input    Source tensor. Data type supported: U8.
@@ -62,6 +86,18 @@ public:
 class NEAccumulateSquared : public INESimpleFunctionNoBorder
 {
 public:
+    /** Default constructor */
+    NEAccumulateSquared() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEAccumulateSquared(const NEAccumulateSquared &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEAccumulateSquared &operator=(const NEAccumulateSquared &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEAccumulateSquared(NEAccumulateSquared &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEAccumulateSquared &operator=(NEAccumulateSquared &&) = delete;
+    /** Default destructor */
+    ~NEAccumulateSquared();
     /** Set the input and accumulation tensors and the shift value.
      *
      * @param[in]     input  Source tensor. Data type supported: U8.
diff --git a/arm_compute/runtime/NEON/functions/NEActivationLayer.h b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
index cfece5c392..3f410fcd8c 100644
--- a/arm_compute/runtime/NEON/functions/NEActivationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
@@ -46,8 +46,6 @@ public:
      * @param[in] ctx Runtime context to be used by the function
      */
     NEActivationLayer(IRuntimeContext *ctx = nullptr);
-    /** Destructor */
-    ~NEActivationLayer();
     /** Prevent instances of this class from being copied (As this class contains pointers) */
     NEActivationLayer(const NEActivationLayer &) = delete;
     /** Default move constructor */
@@ -56,6 +54,8 @@ public:
     NEActivationLayer &operator=(const NEActivationLayer &) = delete;
     /** Default move assignment operator */
     NEActivationLayer &operator=(NEActivationLayer &&);
+    /** Destructor */
+    ~NEActivationLayer();
     /** [NEActivationLayer snippet] **/
     /** Set the input and output tensor.
      *
@@ -93,6 +93,19 @@ namespace experimental
 class NEActivationLayer : public INEOperator
 {
 public:
+    /** Constructor */
+    NEActivationLayer() = default;
+    /** Prevent instances of this class from being copied */
+    NEActivationLayer(const NEActivationLayer &) = delete;
+    /** Default move constructor */
+    NEActivationLayer(NEActivationLayer &&) = default;
+    /** Prevent instances of this class from being copied */
+    NEActivationLayer &operator=(const NEActivationLayer &) = delete;
+    /** Default move assignment operator */
+    NEActivationLayer &operator=(NEActivationLayer &&) = default;
+    /** Destructor */
+    ~NEActivationLayer();
+
     /** Set the input and output tensor.
      *
      * @param[in]  input           Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
diff --git a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h
index 61762f37e1..4b13d1f44e 100644
--- a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h
@@ -52,6 +52,16 @@ class NEArgMinMaxLayer : public IFunction
 public:
     /** Constructor */
     NEArgMinMaxLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEArgMinMaxLayer(const NEArgMinMaxLayer &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEArgMinMaxLayer &operator=(const NEArgMinMaxLayer &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEArgMinMaxLayer(NEArgMinMaxLayer &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEArgMinMaxLayer &operator=(NEArgMinMaxLayer &&) = delete;
+    /** Default destructor */
+    ~NEArgMinMaxLayer();
     /** Set the input and output tensors.
      *
      * @param[in]  input  Input source tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/S32/F16/F32.
diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
index e10771ef4b..6aaa5ff4f7 100644
--- a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
+++ b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
@@ -38,6 +38,18 @@ namespace experimental
 class NEArithmeticAddition : public INEOperator
 {
 public:
+    /** Constructor */
+    NEArithmeticAddition() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEArithmeticAddition(const NEArithmeticAddition &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEArithmeticAddition &operator=(const NEArithmeticAddition &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEArithmeticAddition(NEArithmeticAddition &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEArithmeticAddition &operator=(NEArithmeticAddition &&) = delete;
+    /** Default destructor */
+    ~NEArithmeticAddition();
     /** Initialise the kernel's inputs, output and conversion policy.
      *
      * Valid configurations (Input1,Input2) -> Output :
diff --git a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
index 1f77164a43..6d56a267a7 100644
--- a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,14 +24,16 @@
 #ifndef ARM_COMPUTE_NEBATCHNORMALIZATIONLAYER_H
 #define ARM_COMPUTE_NEBATCHNORMALIZATIONLAYER_H
 
-#include "arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
 
+#include <memory>
+
 namespace arm_compute
 {
 class ITensor;
+class NEBatchNormalizationLayerKernel;
 
 /** Basic function to run @ref NENormalizationLayerKernel and simulate a batch normalization layer.
  *
@@ -42,8 +44,18 @@ class ITensor;
 class NEBatchNormalizationLayer : public IFunction
 {
 public:
-    /** Default constructor */
+    /** Constructor */
     NEBatchNormalizationLayer();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEBatchNormalizationLayer(const NEBatchNormalizationLayer &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEBatchNormalizationLayer &operator=(const NEBatchNormalizationLayer &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEBatchNormalizationLayer(NEBatchNormalizationLayer &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEBatchNormalizationLayer &operator=(NEBatchNormalizationLayer &&) = delete;
+    /** Default destructor */
+    ~NEBatchNormalizationLayer();
     /** Set the input and output tensors.
      *
      * @note If the output tensor is a nullptr or is equal to the input, the batch normalization function will be performed in-place
@@ -85,7 +97,7 @@ public:
     void run() override;
 
 private:
-    NEBatchNormalizationLayerKernel _norm_kernel; /**< Batch normalization layer kernel */
+    std::unique_ptr<NEBatchNormalizationLayerKernel> _norm_kernel; /**< Batch normalization layer kernel */
 };
 }
 #endif /* ARM_COMPUTE_NEBATCHNORMALIZATIONLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h b/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h
index 1a6ffa9506..c2fd26d34c 100644
--- a/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,18 +26,30 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NEBatchToSpaceLayerKernel. */
 class NEBatchToSpaceLayer : public INESimpleFunctionNoBorder
 {
 public:
+    /** Constructor */
+    NEBatchToSpaceLayer() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEBatchToSpaceLayer(const NEBatchToSpaceLayer &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEBatchToSpaceLayer &operator=(const NEBatchToSpaceLayer &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEBatchToSpaceLayer(NEBatchToSpaceLayer &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEBatchToSpaceLayer &operator=(NEBatchToSpaceLayer &&) = delete;
+    /** Default destructor */
+    ~NEBatchToSpaceLayer() = default;
     /** Set the input and output tensors.
      *
      * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: All.
diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h b/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h
index c612a146ac..3203d2b9a7 100644
--- a/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h
+++ b/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -34,6 +34,18 @@ class ITensor;
 class NEBitwiseAnd : public INESimpleFunctionNoBorder
 {
 public:
+    /** Constructor */
+    NEBitwiseAnd() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEBitwiseAnd(const NEBitwiseAnd &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEBitwiseAnd &operator=(const NEBitwiseAnd &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEBitwiseAnd(NEBitwiseAnd &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEBitwiseAnd &operator=(NEBitwiseAnd &&) = delete;
+    /** Default destructor */
+    ~NEBitwiseAnd() = default;
     /** Initialise the kernel's inputs and output
      *
      * @param[in]  input1 First tensor input. Data type supported: U8.
diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseNot.h b/arm_compute/runtime/NEON/functions/NEBitwiseNot.h
index f6ef975dc7..9fa0d38caf 100644
--- a/arm_compute/runtime/NEON/functions/NEBitwiseNot.h
+++ b/arm_compute/runtime/NEON/functions/NEBitwiseNot.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseOr.h b/arm_compute/runtime/NEON/functions/NEBitwiseOr.h
index 8fc4b0d362..fba6b784de 100644
--- a/arm_compute/runtime/NEON/functions/NEBitwiseOr.h
+++ b/arm_compute/runtime/NEON/functions/NEBitwiseOr.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseXor.h b/arm_compute/runtime/NEON/functions/NEBitwiseXor.h
index 20e23af234..c6cb584284 100644
--- a/arm_compute/runtime/NEON/functions/NEBitwiseXor.h
+++ b/arm_compute/runtime/NEON/functions/NEBitwiseXor.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h b/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h
index 14d5de4ca4..de8dfef4ed 100644
--- a/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h
+++ b/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h
@@ -24,19 +24,20 @@
 #ifndef ARM_COMPUTE_NEBOUNDINGBOXTRANSOFORM_H
 #define ARM_COMPUTE_NEBOUNDINGBOXTRANSOFORM_H
 
-#include "arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NEBoundingBoxTransformKernel.
  *
  * This function calls the following Neon kernels:
  * -# @ref NEBoundingBoxTransformKernel
  */
-class NEBoundingBoxTransform : public INESimpleFunction
+class NEBoundingBoxTransform : public INESimpleFunctionNoBorder
 {
 public:
     /** Set the input and output tensors.
diff --git a/arm_compute/runtime/NEON/functions/NEBox3x3.h b/arm_compute/runtime/NEON/functions/NEBox3x3.h
index 80cd5084ab..4d8b12684b 100644
--- a/arm_compute/runtime/NEON/functions/NEBox3x3.h
+++ b/arm_compute/runtime/NEON/functions/NEBox3x3.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NECannyEdge.h b/arm_compute/runtime/NEON/functions/NECannyEdge.h
index f171c3bed0..b08646de0d 100644
--- a/arm_compute/runtime/NEON/functions/NECannyEdge.h
+++ b/arm_compute/runtime/NEON/functions/NECannyEdge.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,8 +24,6 @@
 #ifndef ARM_COMPUTE_NECANNYEDGE_H
 #define ARM_COMPUTE_NECANNYEDGE_H
 
-#include "arm_compute/core/NEON/kernels/NECannyEdgeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
@@ -38,6 +36,10 @@
 namespace arm_compute
 {
 class ITensor;
+class NEGradientKernel;
+class NEFillBorderKernel;
+class NEEdgeNonMaxSuppressionKernel;
+class NEEdgeTraceKernel;
 
 /** Basic function to execute canny edge on NEON. This function calls the following NEON kernels and functions:
  *
@@ -64,6 +66,8 @@ public:
     NECannyEdge(const NECannyEdge &) = delete;
     /** Prevent instances of this class from being copied (As this class contains pointers) */
     NECannyEdge &operator=(const NECannyEdge &) = delete;
+    /** Default destructor */
+    ~NECannyEdge();
     /** Initialise the function's source, destination, thresholds, gradient size, normalization type and border mode.
      *
      * @param[in, out] input                 Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
@@ -81,19 +85,19 @@ public:
     void run() override;
 
 private:
-    MemoryGroup                   _memory_group;        /**< Function's memory group */
-    std::unique_ptr<IFunction>    _sobel;               /**< Pointer to Sobel kernel */
-    std::unique_ptr<INEKernel>    _gradient;            /**< Gradient kernel */
-    NEEdgeNonMaxSuppressionKernel _non_max_suppr;       /**< Non-Maxima suppression kernel */
-    NEEdgeTraceKernel             _edge_trace;          /**< Edge tracing kernel */
-    NEFillBorderKernel            _border_mag_gradient; /**< Fill border on magnitude tensor kernel */
-    NEFillBorderKernel            _border_edge_trace;   /**< Fill border before edge trace */
-    Tensor                        _gx;                  /**< Source tensor - Gx component */
-    Tensor                        _gy;                  /**< Source tensor - Gy component */
-    Tensor                        _magnitude;           /**< Source tensor - Magnitude */
-    Tensor                        _phase;               /**< Source tensor - Phase */
-    Tensor                        _nonmax;              /**< Source tensor - Non-Maxima suppressed */
-    ITensor                      *_output;              /**< Output tensor provided by the user. */
+    MemoryGroup                                    _memory_group;        /**< Function's memory group */
+    std::unique_ptr<IFunction>                     _sobel;               /**< Pointer to Sobel kernel */
+    std::unique_ptr<NEGradientKernel>              _gradient;            /**< Gradient kernel */
+    std::unique_ptr<NEEdgeNonMaxSuppressionKernel> _non_max_suppr;       /**< Non-Maxima suppression kernel */
+    std::unique_ptr<NEEdgeTraceKernel>             _edge_trace;          /**< Edge tracing kernel */
+    std::unique_ptr<NEFillBorderKernel>            _border_mag_gradient; /**< Fill border on magnitude tensor kernel */
+    std::unique_ptr<NEFillBorderKernel>            _border_edge_trace;   /**< Fill border before edge trace */
+    Tensor                                         _gx;                  /**< Source tensor - Gx component */
+    Tensor                                         _gy;                  /**< Source tensor - Gy component */
+    Tensor                                         _magnitude;           /**< Source tensor - Magnitude */
+    Tensor                                         _phase;               /**< Source tensor - Phase */
+    Tensor                                         _nonmax;              /**< Source tensor - Non-Maxima suppressed */
+    ITensor                                       *_output;              /**< Output tensor provided by the user. */
 };
 }
 #endif /* ARM_COMPUTE_NECANNYEDGE_H */
diff --git a/arm_compute/runtime/NEON/functions/NECast.h b/arm_compute/runtime/NEON/functions/NECast.h
index ca818bea27..e536317660 100644
--- a/arm_compute/runtime/NEON/functions/NECast.h
+++ b/arm_compute/runtime/NEON/functions/NECast.h
@@ -25,16 +25,17 @@
 #define ARM_COMPUTE_NECAST_H
 
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NEDepthConvertLayerKernel.
  * This function ignores the scale and zeroPoint of quanized tensors,so QASYMM8 input is treated as uint8 values.
  */
-class NECast : public INESimpleFunction
+class NECast : public INESimpleFunctionNoBorder
 {
 public:
     /** Initialize the function's source, destination
diff --git a/arm_compute/runtime/NEON/functions/NEChannelCombine.h b/arm_compute/runtime/NEON/functions/NEChannelCombine.h
index c4ced62e72..44a0504824 100644
--- a/arm_compute/runtime/NEON/functions/NEChannelCombine.h
+++ b/arm_compute/runtime/NEON/functions/NEChannelCombine.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NEChannelExtract.h b/arm_compute/runtime/NEON/functions/NEChannelExtract.h
index 54059e91e1..4b6383d6b1 100644
--- a/arm_compute/runtime/NEON/functions/NEChannelExtract.h
+++ b/arm_compute/runtime/NEON/functions/NEChannelExtract.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h b/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h
index f31518e85b..aa11396c20 100644
--- a/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,12 +24,14 @@
 #ifndef ARM_COMPUTE_NECHANNELSHUFFLELAYER_H
 #define ARM_COMPUTE_NECHANNELSHUFFLELAYER_H
 
+#include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
 // Forward declarations
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NEChannelShuffleLayerKernel
  *
diff --git a/arm_compute/runtime/NEON/functions/NECol2Im.h b/arm_compute/runtime/NEON/functions/NECol2Im.h
index e03ec42c4f..69459a83c1 100644
--- a/arm_compute/runtime/NEON/functions/NECol2Im.h
+++ b/arm_compute/runtime/NEON/functions/NECol2Im.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,12 +26,13 @@
 
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
+#include "arm_compute/core/Error.h"
 #include "arm_compute/core/Size2D.h"
-#include "arm_compute/core/Types.h"
 
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NECol2Im */
 class NECol2Im : public INESimpleFunctionNoBorder
diff --git a/arm_compute/runtime/NEON/functions/NEColorConvert.h b/arm_compute/runtime/NEON/functions/NEColorConvert.h
index b4c4158804..545550c04a 100644
--- a/arm_compute/runtime/NEON/functions/NEColorConvert.h
+++ b/arm_compute/runtime/NEON/functions/NEColorConvert.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h b/arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h
index 44f3f860cf..b63243fec6 100644
--- a/arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h
+++ b/arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,19 +24,20 @@
 #ifndef ARM_COMPUTE_NECOMPUTEALLANCHORS_H
 #define ARM_COMPUTE_NECOMPUTEALLANCHORS_H
 
-#include "arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NEComputeAllAnchorsKernel.
  *
  * This function calls the following NEON kernels:
  * -# @ref NEComputeAllAnchorsKernel
  */
-class NEComputeAllAnchors : public INESimpleFunction
+class NEComputeAllAnchors : public INESimpleFunctionNoBorder
 {
 public:
     /** Set the input and output tensors.
diff --git a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
index 82b4517dd3..fd35d0bc46 100644
--- a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
@@ -26,7 +26,6 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/NEON/INEKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/INEOperator.h"
 #include "support/Requires.h"
@@ -106,8 +105,18 @@ namespace experimental
 class NEConcatenation : public INEOperator
 {
 public:
-    /** Default constructor */
+    /** Constructor */
     NEConcatenation();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEConcatenation(const NEConcatenation &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEConcatenation &operator=(const NEConcatenation &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEConcatenation(NEConcatenation &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEConcatenation &operator=(NEConcatenation &&) = delete;
+    /** Default destructor */
+    ~NEConcatenation() = default;
     /** Initialise the kernel's inputs vector and output.
      *
      * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
@@ -135,9 +144,9 @@ public:
     void run(ITensorPack &tensors) override;
 
 private:
-    std::vector<std::unique_ptr<INEKernel>> _concat_kernels;
-    unsigned int                            _num_inputs;
-    unsigned int                            _axis;
+    std::vector<std::unique_ptr<ICPPKernel>> _concat_kernels;
+    unsigned int                             _num_inputs;
+    unsigned int                             _axis;
 };
 } // namespace experimental
 } // namespace arm_compute
diff --git a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
index 42a62dc0ab..984e8d68c0 100644
--- a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
+++ b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,16 +24,17 @@
 #ifndef ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTS_H
 #define ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTS_H
 
-#include "arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/ITransformWeights.h"
 #include "arm_compute/runtime/NEON/NEScheduler.h"
 #include "arm_compute/runtime/Tensor.h"
+#include <memory>
 
 namespace arm_compute
 {
 // Forward declarations
 class ITensor;
+class NEConvertFullyConnectedWeightsKernel;
 
 /** Basic function to run @ref NEConvertFullyConnectedWeightsKernel. */
 class NEConvertFullyConnectedWeights : public IFunction
@@ -41,6 +42,16 @@ class NEConvertFullyConnectedWeights : public IFunction
 public:
     /** Default constructor */
     NEConvertFullyConnectedWeights();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEConvertFullyConnectedWeights(const NEConvertFullyConnectedWeights &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEConvertFullyConnectedWeights &operator=(const NEConvertFullyConnectedWeights &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEConvertFullyConnectedWeights(NEConvertFullyConnectedWeights &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEConvertFullyConnectedWeights &operator=(NEConvertFullyConnectedWeights &&) = delete;
+    /** Default destructor */
+    ~NEConvertFullyConnectedWeights();
     /** Initialize the function.
      *
      * @param[in]  input                Source weights tensor to convert. Must be 2 dimensional. Data types supported: All.
@@ -64,7 +75,7 @@ public:
     void run() override;
 
 private:
-    NEConvertFullyConnectedWeightsKernel _kernel;
+    std::unique_ptr<NEConvertFullyConnectedWeightsKernel> _kernel;
 };
 
 namespace weights_transformations
diff --git a/arm_compute/runtime/NEON/functions/NEConvolution.h b/arm_compute/runtime/NEON/functions/NEConvolution.h
index eb16a4582e..9415cf0835 100644
--- a/arm_compute/runtime/NEON/functions/NEConvolution.h
+++ b/arm_compute/runtime/NEON/functions/NEConvolution.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,8 +24,6 @@
 #ifndef ARM_COMPUTE_NECONVOLUTION_H
 #define ARM_COMPUTE_NECONVOLUTION_H
 
-#include "arm_compute/core/NEON/kernels/NEConvolutionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
@@ -39,6 +37,13 @@
 namespace arm_compute
 {
 class ITensor;
+class NEFillBorderKernel;
+template <unsigned int matrix_size>
+class NEConvolutionKernel;
+template <unsigned int matrix_size>
+class NESeparableConvolutionHorKernel;
+template <unsigned int matrix_size>
+class NESeparableConvolutionVertKernel;
 
 /** Basic function to execute convolution of size 3x3. This function calls the following NEON kernels:
  *
@@ -49,6 +54,18 @@ class ITensor;
 class NEConvolution3x3 : public INESimpleFunction
 {
 public:
+    /** Constructor */
+    NEConvolution3x3() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEConvolution3x3(const NEConvolution3x3 &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEConvolution3x3 &operator=(const NEConvolution3x3 &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEConvolution3x3(NEConvolution3x3 &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEConvolution3x3 &operator=(NEConvolution3x3 &&) = delete;
+    /** Default destructor */
+    ~NEConvolution3x3();
     /** Initialize the function's source, destination, conv and border_mode.
      *
      * @param[in,out] input                 Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
@@ -74,6 +91,16 @@ class NEConvolutionSquare : public IFunction
 public:
     /** Default constructor */
     NEConvolutionSquare(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEConvolutionSquare(const NEConvolutionSquare &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEConvolutionSquare &operator=(const NEConvolutionSquare &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEConvolutionSquare(NEConvolutionSquare &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEConvolutionSquare &operator=(NEConvolutionSquare &&) = delete;
+    /** Default destructor */
+    ~NEConvolutionSquare();
     /** Initialize the function's source, destination, conv and border_mode.
      *
      * @param[in,out] input                 Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
@@ -89,13 +116,13 @@ public:
     void run() override;
 
 private:
-    MemoryGroup                                   _memory_group;   /**< Function memory group */
-    Tensor                                        _tmp;            /**< temporary buffer for output of horizontal pass */
-    bool                                          _is_separable;   /**< true if the convolution can be separated */
-    NESeparableConvolutionHorKernel<matrix_size>  _kernel_hor;     /**< kernel for horizontal pass of separated convolution */
-    NESeparableConvolutionVertKernel<matrix_size> _kernel_vert;    /**< kernel for vertical pass of separated convolution */
-    NEConvolutionKernel<matrix_size>              _kernel;         /**< kernel for non-separated convolution **/
-    NEFillBorderKernel                            _border_handler; /**< kernel for border handling */
+    MemoryGroup                                                    _memory_group;   /**< Function memory group */
+    Tensor                                                         _tmp;            /**< temporary buffer for output of horizontal pass */
+    bool                                                           _is_separable;   /**< true if the convolution can be separated */
+    std::unique_ptr<NESeparableConvolutionHorKernel<matrix_size>>  _kernel_hor;     /**< kernel for horizontal pass of separated convolution */
+    std::unique_ptr<NESeparableConvolutionVertKernel<matrix_size>> _kernel_vert;    /**< kernel for vertical pass of separated convolution */
+    std::unique_ptr<NEConvolutionKernel<matrix_size>>              _kernel;         /**< kernel for non-separated convolution **/
+    std::unique_ptr<NEFillBorderKernel>                            _border_handler; /**< kernel for border handling */
 };
 
 /** Basic function to run 5x5 convolution. */
@@ -115,6 +142,18 @@ using NEConvolution9x9 = NEConvolutionSquare<9>;
 class NEConvolutionRectangle : public INESimpleFunction
 {
 public:
+    /** Constructor */
+    NEConvolutionRectangle() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEConvolutionRectangle(const NEConvolutionRectangle &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEConvolutionRectangle &operator=(const NEConvolutionRectangle &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEConvolutionRectangle(NEConvolutionRectangle &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEConvolutionRectangle &operator=(NEConvolutionRectangle &&) = delete;
+    /** Default destructor */
+    ~NEConvolutionRectangle();
     /** Initialize the function's source, destination, conv and border_mode.
      *
      * @param[in,out] input                 Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
diff --git a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
index e8b425b459..54dae57752 100644
--- a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -75,7 +75,16 @@ class NEConvolutionLayer : public IFunction
 public:
     /** Constructor */
     NEConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
-
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEConvolutionLayer(const NEConvolutionLayer &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEConvolutionLayer &operator=(const NEConvolutionLayer &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEConvolutionLayer(NEConvolutionLayer &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEConvolutionLayer &operator=(NEConvolutionLayer &&) = delete;
+    /** Default destructor */
+    ~NEConvolutionLayer() = default;
     /** Set the input and output tensors.
      *
      * @param[in]  input            Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
diff --git a/arm_compute/runtime/NEON/functions/NECopy.h b/arm_compute/runtime/NEON/functions/NECopy.h
index df1a49863a..a58ac9e620 100644
--- a/arm_compute/runtime/NEON/functions/NECopy.h
+++ b/arm_compute/runtime/NEON/functions/NECopy.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,11 +30,24 @@
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NECopyKernel */
 class NECopy : public INESimpleFunctionNoBorder
 {
 public:
+    /** Constructor */
+    NECopy() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NECopy(const NECopy &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NECopy &operator=(const NECopy &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NECopy(NECopy &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NECopy &operator=(NECopy &&) = delete;
+    /** Default destructor */
+    ~NECopy();
     /** Initialise the function's source and destination.
      *
      * @param[in]  input  Source tensor. Data types supported: All
diff --git a/arm_compute/runtime/NEON/functions/NECropResize.h b/arm_compute/runtime/NEON/functions/NECropResize.h
index 361c236293..5c3733f8ee 100644
--- a/arm_compute/runtime/NEON/functions/NECropResize.h
+++ b/arm_compute/runtime/NEON/functions/NECropResize.h
@@ -24,7 +24,6 @@
 #ifndef ARM_COMPUTE_NEON_CROP_RESIZE_H
 #define ARM_COMPUTE_NEON_CROP_RESIZE_H
 
-#include "arm_compute/core/NEON/kernels/NECropKernel.h"
 #include "arm_compute/runtime/NEON/functions/NEScale.h"
 
 #include <memory>
@@ -33,6 +32,7 @@ namespace arm_compute
 {
 // Forward Declarations
 class ITensor;
+class NECropKernel;
 
 /** Function to perform cropping and resizing */
 class NECropResize : public IFunction
@@ -49,7 +49,7 @@ public:
     /** Allow instances of this class to be moved */
     NECropResize &operator=(NECropResize &&) = default;
     /** Default destructor */
-    virtual ~NECropResize() = default;
+    ~NECropResize();
 
     /** Configure kernel
      *
diff --git a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
index 89f3958417..c9817a63c1 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
@@ -32,6 +32,7 @@
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /**Basic function to run @ref NEDepthConvertLayerKernel */
 class NEDepthConvertLayer : public INESimpleFunctionNoBorder
@@ -43,6 +44,8 @@ public:
     NEDepthConvertLayer(const NEDepthConvertLayer &) = delete;
     /** Prevent instances of this class from being copied (As this class contains pointers)*/
     const NEDepthConvertLayer &operator=(const NEDepthConvertLayer &) = delete;
+    /** Default destructor */
+    ~NEDepthConvertLayer() = default;
     /** Initialize the function's source, destination
      *
      * Valid conversions Input -> Output :
diff --git a/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h b/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h
index 22bbd6e716..51f7ff7770 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,7 +26,6 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
@@ -34,11 +33,24 @@ namespace arm_compute
 {
 // Forward declarations
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NEDepthToSpaceLayerKernel. */
 class NEDepthToSpaceLayer : public INESimpleFunctionNoBorder
 {
 public:
+    /** Constructor */
+    NEDepthToSpaceLayer() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEDepthToSpaceLayer(const NEDepthToSpaceLayer &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEDepthToSpaceLayer &operator=(const NEDepthToSpaceLayer &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEDepthToSpaceLayer(NEDepthToSpaceLayer &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEDepthToSpaceLayer &operator=(NEDepthToSpaceLayer &&) = delete;
+    /** Default destructor */
+    ~NEDepthToSpaceLayer() = default;
     /** Set the input and output tensors.
      *
      * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: All
diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
index c6b98ed435..dc70aec7ff 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
@@ -24,17 +24,16 @@
 #ifndef ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H
 #define ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H
 
-#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
 #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEPermute.h"
 #include "arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h"
+#include <memory>
 
 namespace arm_compute
 {
 // Forward declarations
 class ITensor;
+class NEDepthwiseConvolutionLayerNativeKernel;
 
 /** Function to execute a depthwise convolution.
  */
@@ -51,6 +50,8 @@ public:
     NEDepthwiseConvolutionLayer &operator=(const NEDepthwiseConvolutionLayer &) = delete;
     /** Default move assignment operator */
     NEDepthwiseConvolutionLayer &operator=(NEDepthwiseConvolutionLayer &&) = default;
+    /** Default destructor */
+    ~NEDepthwiseConvolutionLayer();
     /** Initialize the function's source, destination, weights and convolution information.
      *
      * @param[in, out] input            Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32
@@ -133,6 +134,8 @@ private:
         NEDepthwiseConvolutionLayerOptimizedInternal &operator=(const NEDepthwiseConvolutionLayerOptimizedInternal &) = delete;
         /** Default move assignment operator */
         NEDepthwiseConvolutionLayerOptimizedInternal &operator=(NEDepthwiseConvolutionLayerOptimizedInternal &&) = default;
+        /** Default destructor */
+        ~NEDepthwiseConvolutionLayerOptimizedInternal() = default;
         /** Initialize the function's source, destination, kernels and border_size.
          *
          * @param[in, out] input            Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
@@ -170,25 +173,23 @@ private:
         void prepare() override;
 
     private:
-        MemoryGroup                               _memory_group;
-        NEDepthwiseConvolutionAssemblyDispatch    _dwc_optimized_func;
-        NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;
-        NEFillBorderKernel                        _border_handler;
-        NEPermute                                 _permute_input;
-        NEPermute                                 _permute_weights;
-        NEPermute                                 _permute_output;
-        NEActivationLayer                         _activationlayer_function;
-        Tensor                                    _accumulator;
-        Tensor                                    _permuted_input;
-        Tensor                                    _permuted_weights;
-        Tensor                                    _permuted_output;
-        const ITensor                            *_original_weights;
-        bool                                      _has_bias;
-        bool                                      _is_quantized;
-        bool                                      _is_nchw;
-        bool                                      _permute;
-        bool                                      _is_activationlayer_enabled;
-        bool                                      _is_prepared;
+        MemoryGroup                            _memory_group;
+        NEDepthwiseConvolutionAssemblyDispatch _dwc_optimized_func;
+        NEPermute                              _permute_input;
+        NEPermute                              _permute_weights;
+        NEPermute                              _permute_output;
+        NEActivationLayer                      _activationlayer_function;
+        Tensor                                 _accumulator;
+        Tensor                                 _permuted_input;
+        Tensor                                 _permuted_weights;
+        Tensor                                 _permuted_output;
+        const ITensor                         *_original_weights;
+        bool                                   _has_bias;
+        bool                                   _is_quantized;
+        bool                                   _is_nchw;
+        bool                                   _permute;
+        bool                                   _is_activationlayer_enabled;
+        bool                                   _is_prepared;
     };
 
     /** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernel:
@@ -209,6 +210,8 @@ private:
         NEDepthwiseConvolutionLayerGeneric &operator=(const NEDepthwiseConvolutionLayerGeneric &) = delete;
         /** Default move assignment operator */
         NEDepthwiseConvolutionLayerGeneric &operator=(NEDepthwiseConvolutionLayerGeneric &&) = default;
+        /** Default destructor */
+        ~NEDepthwiseConvolutionLayerGeneric() = default;
         /** Initialize the function's source, destination, weights and convolution information.
          *
          * @param[in, out] input            Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
@@ -248,18 +251,18 @@ private:
         void prepare() override;
 
     private:
-        NEDepthwiseConvolutionLayerNativeKernel _depthwise_conv_kernel;
-        NEPermute                               _permute_input;
-        NEPermute                               _permute_weights;
-        NEPermute                               _permute_output;
-        NEActivationLayer                       _activationlayer_function;
-        Tensor                                  _permuted_input;
-        Tensor                                  _permuted_weights;
-        Tensor                                  _permuted_output;
-        bool                                    _is_prepared;
-        bool                                    _is_nchw;
-        bool                                    _is_activationlayer_enabled;
-        const ITensor                          *_original_weights;
+        std::unique_ptr<NEDepthwiseConvolutionLayerNativeKernel> _depthwise_conv_kernel;
+        NEPermute                                                _permute_input;
+        NEPermute                                                _permute_weights;
+        NEPermute                                                _permute_output;
+        NEActivationLayer                                        _activationlayer_function;
+        Tensor                                                   _permuted_input;
+        Tensor                                                   _permuted_weights;
+        Tensor                                                   _permuted_output;
+        bool                                                     _is_prepared;
+        bool                                                     _is_nchw;
+        bool                                                     _is_activationlayer_enabled;
+        const ITensor                                           *_original_weights;
     };
 
     DepthwiseConvolutionFunction                 _depth_conv_func;
diff --git a/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h b/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h
index 77295bc089..f52d709c74 100644
--- a/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h
@@ -32,6 +32,7 @@ namespace arm_compute
 {
 // Forward declarations
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NEDequantizationLayerKernel that dequantizes an input tensor */
 class NEDequantizationLayer : public INESimpleFunctionNoBorder
diff --git a/arm_compute/runtime/NEON/functions/NEDerivative.h b/arm_compute/runtime/NEON/functions/NEDerivative.h
index 8eb21425ac..7d852d0ffe 100644
--- a/arm_compute/runtime/NEON/functions/NEDerivative.h
+++ b/arm_compute/runtime/NEON/functions/NEDerivative.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,16 +24,16 @@
 #ifndef ARM_COMPUTE_NEDERIVATIVE_H
 #define ARM_COMPUTE_NEDERIVATIVE_H
 
-#include "arm_compute/core/NEON/kernels/NEDerivativeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 
-#include <cstdint>
+#include <memory>
 
 namespace arm_compute
 {
 class ITensor;
+class NEDerivativeKernel;
+class NEFillBorderKernel;
 
 /** Basic function to execute first order derivative operator. This function calls the following NEON kernels:
  *
@@ -46,6 +46,16 @@ class NEDerivative : public IFunction
 public:
     /** Default constructor */
     NEDerivative();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEDerivative(const NEDerivative &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEDerivative &operator=(const NEDerivative &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEDerivative(NEDerivative &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEDerivative &operator=(NEDerivative &&) = delete;
+    /** Default destructor */
+    ~NEDerivative();
     /** Initialise the function's source, destinations and border mode.
      *
      * @note At least one of output_x or output_y must be not NULL.
@@ -63,8 +73,8 @@ public:
     void run() override;
 
 private:
-    NEDerivativeKernel _kernel;         /**< Derivative kernel */
-    NEFillBorderKernel _border_handler; /**< Kernel to handle tensor borders */
+    std::unique_ptr<NEDerivativeKernel> _kernel;         /**< Derivative kernel */
+    std::unique_ptr<NEFillBorderKernel> _border_handler; /**< Kernel to handle tensor borders */
 };
 }
 #endif /* ARM_COMPUTE_NEDERIVATIVE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h b/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h
index e0431b2b31..d5c1f0ab6f 100644
--- a/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h
@@ -53,6 +53,8 @@ public:
     NEDetectionPostProcessLayer(const NEDetectionPostProcessLayer &) = delete;
     /** Prevent instances of this class from being copied (As this class contains pointers) */
     NEDetectionPostProcessLayer &operator=(const NEDetectionPostProcessLayer &) = delete;
+    /** Default destructor */
+    ~NEDetectionPostProcessLayer() = default;
     /** Configure the detection output layer NE function
      *
      * @param[in]  input_box_encoding The bounding box input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F32.
diff --git a/arm_compute/runtime/NEON/functions/NEDilate.h b/arm_compute/runtime/NEON/functions/NEDilate.h
index 6dae2c7029..33be5c8fba 100644
--- a/arm_compute/runtime/NEON/functions/NEDilate.h
+++ b/arm_compute/runtime/NEON/functions/NEDilate.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
index d1c811c363..5b6ed55be2 100644
--- a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
@@ -24,9 +24,6 @@
 #ifndef ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H
 #define ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H
 
-#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
@@ -38,6 +35,10 @@
 
 namespace arm_compute
 {
+class NEDirectConvolutionLayerOutputStageKernel;
+class NEDirectConvolutionLayerKernel;
+class NEFillBorderKernel;
+
 /** Function to run the direct convolution.
  *
  *  This function calls the following NEON kernels:
@@ -51,6 +52,16 @@ class NEDirectConvolutionLayer : public IFunction
 public:
     /** Constructor */
     NEDirectConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEDirectConvolutionLayer(const NEDirectConvolutionLayer &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEDirectConvolutionLayer &operator=(const NEDirectConvolutionLayer &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEDirectConvolutionLayer(NEDirectConvolutionLayer &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEDirectConvolutionLayer &operator=(NEDirectConvolutionLayer &&) = delete;
+    /** Default destructor */
+    ~NEDirectConvolutionLayer();
     /** Set the input, weights, biases and output tensors.
      *
      * @note: DirectConvolution only works in the following configurations:
@@ -97,16 +108,16 @@ public:
     void run() override;
 
 private:
-    MemoryGroup                               _memory_group;
-    NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;
-    NEDirectConvolutionLayerKernel            _conv_kernel;
-    NEFillBorderKernel                        _input_border_handler;
-    NEActivationLayer                         _activationlayer_function;
-    Tensor                                    _accumulator;
-    bool                                      _has_bias;
-    bool                                      _is_activationlayer_enabled;
-    unsigned int                              _dim_split;
-    bool                                      _is_padding_required;
+    MemoryGroup                                                _memory_group;
+    std::unique_ptr<NEDirectConvolutionLayerOutputStageKernel> _output_stage_kernel;
+    std::unique_ptr<NEDirectConvolutionLayerKernel>            _conv_kernel;
+    std::unique_ptr<NEFillBorderKernel>                        _input_border_handler;
+    NEActivationLayer                                          _activationlayer_function;
+    Tensor                                                     _accumulator;
+    bool                                                       _has_bias;
+    bool                                                       _is_activationlayer_enabled;
+    unsigned int                                               _dim_split;
+    bool                                                       _is_padding_required;
 };
 }
 #endif /* ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h b/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h
index 8b3301889a..46a7316705 100644
--- a/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h
@@ -24,11 +24,13 @@
 #ifndef ARM_COMPUTE_NEELEMENTWISEUNARYLAYER_H
 #define ARM_COMPUTE_NEELEMENTWISEUNARYLAYER_H
 
+#include "arm_compute/core/Error.h"
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to perform inverse square root on an input tensor. */
 class NERsqrtLayer : public INESimpleFunctionNoBorder
diff --git a/arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h b/arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h
index 5c0c323591..36c4902c04 100644
--- a/arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h
+++ b/arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,9 +24,6 @@
 #ifndef ARM_COMPUTE_NEEQUALIZEHISTOGRAM_H
 #define ARM_COMPUTE_NEEQUALIZEHISTOGRAM_H
 
-#include "arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEHistogramKernel.h"
-#include "arm_compute/core/NEON/kernels/NETableLookupKernel.h"
 #include "arm_compute/runtime/Distribution1D.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/Lut.h"
@@ -36,6 +33,9 @@
 namespace arm_compute
 {
 class ITensor;
+class NEHistogramKernel;
+class NECumulativeDistributionKernel;
+class NETableLookupKernel;
 using IImage = ITensor;
 
 /** Basic function to execute histogram equalization. This function calls the following NEON kernels:
@@ -50,6 +50,16 @@ class NEEqualizeHistogram : public IFunction
 public:
     /** Default Constructor. */
     NEEqualizeHistogram();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEEqualizeHistogram(const NEEqualizeHistogram &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEEqualizeHistogram &operator=(const NEEqualizeHistogram &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEEqualizeHistogram(NEEqualizeHistogram &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEEqualizeHistogram &operator=(NEEqualizeHistogram &&) = delete;
+    /** Default destructor */
+    ~NEEqualizeHistogram();
     /** Initialise the kernel's inputs.
      *
      * @note Currently the width of the input image must be a multiple of 16.
@@ -63,15 +73,15 @@ public:
     void run() override;
 
 private:
-    NEHistogramKernel              _histogram_kernel;        /**< Kernel that calculates the histogram of input. */
-    NECumulativeDistributionKernel _cd_histogram_kernel;     /**< Kernel that calculates the cumulative distribution
+    std::unique_ptr<NEHistogramKernel>              _histogram_kernel;        /**< Kernel that calculates the histogram of input. */
+    std::unique_ptr<NECumulativeDistributionKernel> _cd_histogram_kernel;     /**< Kernel that calculates the cumulative distribution
                                                                   and creates the relevant LookupTable. */
-    NETableLookupKernel            _map_histogram_kernel;    /**< Kernel that maps the input to output using the lut. */
-    Distribution1D                 _hist;                    /**< Distribution that holds the histogram of the input image. */
-    Distribution1D                 _cum_dist;                /**< Distribution that holds the cummulative distribution of the input histogram. */
-    Lut                            _cd_lut;                  /**< Holds the equalization lookuptable. */
-    static constexpr uint32_t      nr_bins{ 256 };           /**< Histogram bins of the internal histograms. */
-    static constexpr uint32_t      max_range{ nr_bins - 1 }; /**< Histogram range of the internal histograms. */
+    std::unique_ptr<NETableLookupKernel>            _map_histogram_kernel;    /**< Kernel that maps the input to output using the lut. */
+    Distribution1D                                  _hist;                    /**< Distribution that holds the histogram of the input image. */
+    Distribution1D                                  _cum_dist;                /**< Distribution that holds the cummulative distribution of the input histogram. */
+    Lut                                             _cd_lut;                  /**< Holds the equalization lookuptable. */
+    static constexpr uint32_t                       nr_bins{ 256 };           /**< Histogram bins of the internal histograms. */
+    static constexpr uint32_t                       max_range{ nr_bins - 1 }; /**< Histogram range of the internal histograms. */
 };
 }
 #endif /*ARM_COMPUTE_NEEQUALIZEHISTOGRAM_H */
diff --git a/arm_compute/runtime/NEON/functions/NEErode.h b/arm_compute/runtime/NEON/functions/NEErode.h
index 3e84c2b758..e2d76c1e1d 100644
--- a/arm_compute/runtime/NEON/functions/NEErode.h
+++ b/arm_compute/runtime/NEON/functions/NEErode.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NEFFT1D.h b/arm_compute/runtime/NEON/functions/NEFFT1D.h
index 312b46b10f..4b6cc3fd18 100644
--- a/arm_compute/runtime/NEON/functions/NEFFT1D.h
+++ b/arm_compute/runtime/NEON/functions/NEFFT1D.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,19 +24,21 @@
 #ifndef ARM_COMPUTE_NEFFT1D_H
 #define ARM_COMPUTE_NEFFT1D_H
 
-#include "arm_compute/core/NEON/kernels/NEFFTDigitReverseKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFFTRadixStageKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFFTScaleKernel.h"
 #include "arm_compute/runtime/IFunction.h"
 
 #include "arm_compute/runtime/FunctionDescriptors.h"
 #include "arm_compute/runtime/MemoryGroup.h"
 #include "arm_compute/runtime/Tensor.h"
 
+#include <memory>
+
 namespace arm_compute
 {
 // Forward declaration
 class ITensor;
+class NEFFTDigitReverseKernel;
+class NEFFTRadixStageKernel;
+class NEFFTScaleKernel;
 
 /** Basic function to execute one dimensional FFT. This function calls the following NEON kernels:
  *
@@ -49,6 +51,16 @@ class NEFFT1D : public IFunction
 public:
     /** Default Constructor */
     NEFFT1D(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEFFT1D(const NEFFT1D &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEFFT1D &operator=(const NEFFT1D &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEFFT1D(NEFFT1D &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEFFT1D &operator=(NEFFT1D &&) = delete;
+    /** Default destructor */
+    ~NEFFT1D();
     /** Initialise the function's source and destinations.
      *
      * @param[in]  input  Source tensor. Data types supported: F32. Number of channels supported: 1 (real tensor) or 2 (complex tensor).
@@ -71,15 +83,15 @@ public:
     void run() override;
 
 protected:
-    MemoryGroup                        _memory_group;
-    NEFFTDigitReverseKernel            _digit_reverse_kernel;
-    std::vector<NEFFTRadixStageKernel> _fft_kernels;
-    NEFFTScaleKernel                   _scale_kernel;
-    Tensor                             _digit_reversed_input;
-    Tensor                             _digit_reverse_indices;
-    unsigned int                       _num_ffts;
-    unsigned int                       _axis;
-    bool                               _run_scale;
+    MemoryGroup                                         _memory_group;
+    std::unique_ptr<NEFFTDigitReverseKernel>            _digit_reverse_kernel;
+    std::vector<std::unique_ptr<NEFFTRadixStageKernel>> _fft_kernels;
+    std::unique_ptr<NEFFTScaleKernel>                   _scale_kernel;
+    Tensor                                              _digit_reversed_input;
+    Tensor                                              _digit_reverse_indices;
+    unsigned int                                        _num_ffts;
+    unsigned int                                        _axis;
+    bool                                                _run_scale;
 };
 } // namespace arm_compute
 #endif /*ARM_COMPUTE_NEFFT1D_H */
diff --git a/arm_compute/runtime/NEON/functions/NEFFT2D.h b/arm_compute/runtime/NEON/functions/NEFFT2D.h
index efcce2e9a4..18e72c1a2f 100644
--- a/arm_compute/runtime/NEON/functions/NEFFT2D.h
+++ b/arm_compute/runtime/NEON/functions/NEFFT2D.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -46,6 +46,16 @@ class NEFFT2D : public IFunction
 public:
     /** Default Constructor */
     NEFFT2D(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEFFT2D(const NEFFT2D &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEFFT2D &operator=(const NEFFT2D &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEFFT2D(NEFFT2D &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEFFT2D &operator=(NEFFT2D &&) = delete;
+    /** Default destructor */
+    ~NEFFT2D();
     /** Initialise the function's source and destinations
      *
      * @param[in]  input  Source tensor. Data types supported: F32.
diff --git a/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h
index dd57900f2a..b3e98fc2d6 100644
--- a/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -69,6 +69,8 @@ public:
     NEFFTConvolutionLayer &operator=(const NEFFTConvolutionLayer &) = delete;
     /** Default move assignment operator */
     NEFFTConvolutionLayer &operator=(NEFFTConvolutionLayer &&) = default;
+    /** Default destructor */
+    ~NEFFTConvolutionLayer();
     /** Set the input and output tensors.
      *
      * @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout
diff --git a/arm_compute/runtime/NEON/functions/NEFastCorners.h b/arm_compute/runtime/NEON/functions/NEFastCorners.h
index cc69e77ebb..025038bb28 100644
--- a/arm_compute/runtime/NEON/functions/NEFastCorners.h
+++ b/arm_compute/runtime/NEON/functions/NEFastCorners.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,10 +24,6 @@
 #ifndef ARM_COMPUTE_NEFASTCORNERS_H
 #define ARM_COMPUTE_NEFASTCORNERS_H
 
-#include "arm_compute/core/NEON/kernels/NEFastCornersKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillArrayKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/Array.h"
 #include "arm_compute/runtime/IFunction.h"
@@ -41,6 +37,10 @@
 namespace arm_compute
 {
 class ITensor;
+class NENonMaximaSuppression3x3Kernel;
+class NEFastCornersKernel;
+class NEFillBorderKernel;
+class NEFillArrayKernel;
 using IImage = ITensor;
 
 /** Basic function to execute fast corners. This function call the following NEON kernels:
@@ -55,6 +55,16 @@ class NEFastCorners : public IFunction
 public:
     /** Constructor */
     NEFastCorners(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEFastCorners(const NEFastCorners &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEFastCorners &operator=(const NEFastCorners &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEFastCorners(NEFastCorners &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEFastCorners &operator=(NEFastCorners &&) = delete;
+    /** Default destructor */
+    ~NEFastCorners();
     /** Initialize the function's source, destination, conv and border_mode.
      *
      * @param[in, out] input                 Source image. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
@@ -71,14 +81,14 @@ public:
     void run() override;
 
 private:
-    MemoryGroup                     _memory_group;
-    NEFastCornersKernel             _fast_corners_kernel;
-    NEFillBorderKernel              _border_handler;
-    NENonMaximaSuppression3x3Kernel _nonmax_kernel;
-    NEFillArrayKernel               _fill_kernel;
-    Image                           _output;
-    Image                           _suppressed;
-    bool                            _non_max;
+    MemoryGroup                                      _memory_group;
+    std::unique_ptr<NEFastCornersKernel>             _fast_corners_kernel;
+    std::unique_ptr<NEFillBorderKernel>              _border_handler;
+    std::unique_ptr<NENonMaximaSuppression3x3Kernel> _nonmax_kernel;
+    std::unique_ptr<NEFillArrayKernel>               _fill_kernel;
+    Image                                            _output;
+    Image                                            _suppressed;
+    bool                                             _non_max;
 };
 }
 #endif /*ARM_COMPUTE_NEFASTCORNERS_H */
diff --git a/arm_compute/runtime/NEON/functions/NEFill.h b/arm_compute/runtime/NEON/functions/NEFill.h
index 1c3c546c68..14d690f419 100644
--- a/arm_compute/runtime/NEON/functions/NEFill.h
+++ b/arm_compute/runtime/NEON/functions/NEFill.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,7 +24,6 @@
 #ifndef ARM_COMPUTE_NEFILL_H
 #define ARM_COMPUTE_NEFILL_H
 
-#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h"
 #include "arm_compute/core/PixelValue.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
diff --git a/arm_compute/runtime/NEON/functions/NEFillBorder.h b/arm_compute/runtime/NEON/functions/NEFillBorder.h
index 3ac23be731..e9a08ef7ec 100644
--- a/arm_compute/runtime/NEON/functions/NEFillBorder.h
+++ b/arm_compute/runtime/NEON/functions/NEFillBorder.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,15 +24,16 @@
 #ifndef ARM_COMPUTE_NEFILLBORDER_H
 #define ARM_COMPUTE_NEFILLBORDER_H
 
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
 #include "arm_compute/core/PixelValue.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
+#include <memory>
 
 namespace arm_compute
 {
 // Forward declaration
 class ITensor;
+class NEFillBorderKernel;
 
 /** Basic function to run @ref NEFillBorderKernel */
 class NEFillBorder : public IFunction
@@ -53,7 +54,7 @@ public:
     void run() override;
 
 private:
-    NEFillBorderKernel _border_handler; /**< Kernel to handle image borders */
+    std::unique_ptr<NEFillBorderKernel> _border_handler; /**< Kernel to handle image borders */
 };
 } // namespace arm_compute
 #endif /*ARM_COMPUTE_NEFILLBORDER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h
index 73da254ef5..9f0d5226de 100644
--- a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,6 +30,7 @@
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to execute flatten layer kernel. */
 class NEFlattenLayer : public INESimpleFunctionNoBorder
diff --git a/arm_compute/runtime/NEON/functions/NEFloor.h b/arm_compute/runtime/NEON/functions/NEFloor.h
index 12f0ee20ba..7f4248eadb 100644
--- a/arm_compute/runtime/NEON/functions/NEFloor.h
+++ b/arm_compute/runtime/NEON/functions/NEFloor.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -31,6 +31,7 @@
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NEFloorKernel */
 class NEFloor : public INESimpleFunctionNoBorder
diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
index 21df3c4aef..3ab3d81262 100644
--- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
@@ -26,25 +26,36 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NETransposeKernel.h"
 #include "arm_compute/runtime/MemoryGroup.h"
 #include "arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h"
+#include "arm_compute/runtime/NEON/functions/NEFlattenLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEGEMM.h"
 #include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
 #include "arm_compute/runtime/Tensor.h"
 
 namespace arm_compute
 {
+class NEFlattenLayerKernel;
+
 /** Basic function to reshape the weights of Fully Connected layer with NEON. This function calls the following kernels:
- *
- *  -# @ref NETransposeKernel
  *
  * @note  The fully connected layer accepts "weights" tensors only with 2 dimensions.
  */
 class NEFullyConnectedLayerReshapeWeights : public INESimpleFunctionNoBorder
 {
 public:
+    /** Constructor */
+    NEFullyConnectedLayerReshapeWeights() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEFullyConnectedLayerReshapeWeights(const NEFullyConnectedLayerReshapeWeights &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEFullyConnectedLayerReshapeWeights &operator=(const NEFullyConnectedLayerReshapeWeights &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEFullyConnectedLayerReshapeWeights(NEFullyConnectedLayerReshapeWeights &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEFullyConnectedLayerReshapeWeights &operator=(NEFullyConnectedLayerReshapeWeights &&) = delete;
+    /** Default destructor */
+    ~NEFullyConnectedLayerReshapeWeights() = default;
     /** Set the input and output tensors.
      *
      * @param[in]  input  Weights tensor. The weights must be 2 dimensional. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
@@ -122,6 +133,8 @@ public:
     NEFullyConnectedLayer &operator=(const NEFullyConnectedLayer &) = delete;
     /** Default move assignment operator */
     NEFullyConnectedLayer &operator=(NEFullyConnectedLayer &&) = default;
+    /** Default destructor */
+    ~NEFullyConnectedLayer();
     /** Set the input and output tensors.
      *
      * @param[in]  input   Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
@@ -168,7 +181,7 @@ private:
 
     MemoryGroup                                                         _memory_group;
     IWeightsManager                                                    *_weights_manager;
-    NEFlattenLayerKernel                                                _flatten_kernel;
+    std::unique_ptr<NEFlattenLayerKernel>                               _flatten_kernel;
     NEConvertFullyConnectedWeights                                      _convert_weights;
     weights_transformations::NEConvertFullyConnectedWeightsManaged      _convert_weights_managed;
     NEFullyConnectedLayerReshapeWeights                                 _reshape_weights_function;
diff --git a/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h b/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h
index 6b561352a6..5dc804e240 100644
--- a/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h
+++ b/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,7 +25,6 @@
 #define ARM_COMPUTE_NEFUSEBATCHNORMALIZATION_H
 
 #include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NEFuseBatchNormalizationKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 
@@ -33,6 +32,7 @@ namespace arm_compute
 {
 // Forward declarations
 class ITensor;
+class NEFuseBatchNormalizationKernel;
 
 /** Basic function to fuse the batch normalization node to a preceding convolution node */
 class NEFuseBatchNormalization : public IFunction
@@ -49,7 +49,7 @@ public:
     /** Allow instances of this class to be moved */
     NEFuseBatchNormalization &operator=(NEFuseBatchNormalization &&) = default;
     /** Default destructor */
-    ~NEFuseBatchNormalization() = default;
+    ~NEFuseBatchNormalization();
     /** Set the input and output tensors.
      *
      * @param[in]  input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
@@ -94,7 +94,7 @@ public:
     void run() override;
 
 private:
-    NEFuseBatchNormalizationKernel _fuse_bn_kernel;
+    std::unique_ptr<NEFuseBatchNormalizationKernel> _fuse_bn_kernel;
 };
 } // namespace arm_compute
 #endif /*ARM_COMPUTE_NEFUSEBATCHNORMALIZATION_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h
index 8d65fb5303..645ab56417 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMM.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMM.h
@@ -24,11 +24,6 @@
 #ifndef ARM_COMPUTE_NEGEMM_H
 #define ARM_COMPUTE_NEGEMM_H
 
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
 #include "arm_compute/runtime/IWeightsManager.h"
@@ -38,8 +33,14 @@
 #include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
 #include "arm_compute/runtime/Tensor.h"
 
+#include <memory>
+
 namespace arm_compute
 {
+class NEGEMMInterleave4x4Kernel;
+class NEGEMMMatrixAdditionKernel;
+class NEGEMMMatrixMultiplyKernel;
+class NEGEMMTranspose1xWKernel;
 /** Basic function to execute GEMM on NEON. This function calls the following NEON kernels:
  *
  * If optimized assembly is available:
@@ -69,6 +70,8 @@ public:
     NEGEMM &operator=(const NEGEMM &) = delete;
     /** Default move assignment operator */
     NEGEMM &operator=(NEGEMM &&) = default;
+    /** Default destructor */
+    ~NEGEMM();
     /** Initialise the kernel's inputs, output
      *
      * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C].
@@ -104,16 +107,16 @@ public:
     void prepare() override;
 
 private:
-    MemoryGroup                _memory_group;
-    IWeightsManager           *_weights_manager;
-    NEGEMMInterleave4x4Kernel  _interleave_kernel;
-    NEGEMMTranspose1xWKernel   _transpose_kernel;
-    NEGEMMMatrixMultiplyKernel _mm_kernel;
-    NEGEMMAssemblyDispatch     _asm_glue;
-    NEGEMMMatrixAdditionKernel _ma_kernel;
-    NEActivationLayer          _alpha_scale_func;
-    NEArithmeticAddition       _add_bias;
-    NEActivationLayer          _activation_func;
+    MemoryGroup                                 _memory_group;
+    IWeightsManager                            *_weights_manager;
+    std::unique_ptr<NEGEMMInterleave4x4Kernel>  _interleave_kernel;
+    std::unique_ptr<NEGEMMTranspose1xWKernel>   _transpose_kernel;
+    std::unique_ptr<NEGEMMMatrixMultiplyKernel> _mm_kernel;
+    NEGEMMAssemblyDispatch                      _asm_glue;
+    std::unique_ptr<NEGEMMMatrixAdditionKernel> _ma_kernel;
+    NEActivationLayer                           _alpha_scale_func;
+    NEArithmeticAddition                        _add_bias;
+    NEActivationLayer                           _activation_func;
 
     Tensor         _tmp_a;
     Tensor         _tmp_b;
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
index b3f5c51010..6bcf56fb0b 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
@@ -26,10 +26,6 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/NEON/kernels/NECol2ImKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
-#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h"
-#include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IWeightsManager.h"
 #include "arm_compute/runtime/MemoryGroup.h"
@@ -44,6 +40,9 @@
 namespace arm_compute
 {
 class ITensor;
+class NECol2ImKernel;
+class NEIm2ColKernel;
+class NEWeightsReshapeKernel;
 
 /** Function to reshape the weights. This function calls the following kernel:
  * -# @ref NEWeightsReshapeKernel
@@ -61,6 +60,8 @@ public:
     NEConvolutionLayerReshapeWeights &operator=(const NEConvolutionLayerReshapeWeights &) = delete;
     /** Default move assignment operator */
     NEConvolutionLayerReshapeWeights &operator=(NEConvolutionLayerReshapeWeights &&) = default;
+    /** Default destructor */
+    ~NEConvolutionLayerReshapeWeights();
     /** Set the input and output tensors.
      *
      * @param[in]  weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
@@ -88,7 +89,7 @@ public:
     void run() override;
 
 private:
-    NEWeightsReshapeKernel _weights_reshape_kernel;
+    std::unique_ptr<NEWeightsReshapeKernel> _weights_reshape_kernel;
 };
 
 namespace weights_transformations
@@ -97,6 +98,18 @@ namespace weights_transformations
 class NEConvolutionLayerReshapeWeightsTransform : public ITransformWeights
 {
 public:
+    /** Constructor */
+    NEConvolutionLayerReshapeWeightsTransform() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEConvolutionLayerReshapeWeightsTransform(const NEConvolutionLayerReshapeWeightsTransform &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEConvolutionLayerReshapeWeightsTransform &operator=(const NEConvolutionLayerReshapeWeightsTransform &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEConvolutionLayerReshapeWeightsTransform(NEConvolutionLayerReshapeWeightsTransform &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEConvolutionLayerReshapeWeightsTransform &operator=(NEConvolutionLayerReshapeWeightsTransform &&) = delete;
+    /** Default destructor */
+    ~NEConvolutionLayerReshapeWeightsTransform() = default;
     void configure(const ITensor *input, const ITensor *biases)
     {
         _bias_bit = (biases != nullptr) ? 1 : 0;
@@ -160,6 +173,8 @@ public:
     NEGEMMConvolutionLayer &operator=(const NEGEMMConvolutionLayer &) = delete;
     /** Default move assignment operator */
     NEGEMMConvolutionLayer &operator=(NEGEMMConvolutionLayer &&) = default;
+    /** Default destructor */
+    ~NEGEMMConvolutionLayer();
     /** Set the input and output tensors.
      *
      * @param[in]  input        Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -253,10 +268,10 @@ private:
     IWeightsManager                                                   *_weights_manager;
     NEConvolutionLayerReshapeWeights                                   _reshape_weights;
     weights_transformations::NEConvolutionLayerReshapeWeightsTransform _reshape_weights_managed;
-    NEIm2ColKernel                                                     _im2col_kernel;
+    std::unique_ptr<NEIm2ColKernel>                                    _im2col_kernel;
     NEGEMM                                                             _mm_gemm;
     NEGEMMLowpMatrixMultiplyCore                                       _mm_gemmlowp;
-    NECol2ImKernel                                                     _col2im_kernel;
+    std::unique_ptr<NECol2ImKernel>                                    _col2im_kernel;
     NEReshapeLayer                                                     _reshape_layer;
 
     const ITensor *_original_weights;
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h b/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h
index 58cb383c67..7195c71063 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h
index 9813b34661..961b1901e7 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,7 +24,6 @@
 #ifndef ARM_COMPUTE_NEGEMMLOWPASSEMBLYMATRIXMULTIPLYCORE_H
 #define ARM_COMPUTE_NEGEMMLOWPASSEMBLYMATRIXMULTIPLYCORE_H
 
-#include "arm_compute/core/NEON/INEKernel.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
 #include "arm_compute/runtime/MemoryGroup.h"
@@ -37,6 +36,9 @@ namespace arm_compute
 {
 // Forward declarations
 class ITensor;
+class NEGEMMInterleave4x4Kernel;
+class NEGEMMTranspose1xWKernel;
+class NEGEMMLowpMatrixMultiplyKernel;
 
 /** Basic function to execute matrix multiply assembly kernels. */
 class NEGEMMLowpAssemblyMatrixMultiplyCore : public IFunction
@@ -44,6 +46,9 @@ class NEGEMMLowpAssemblyMatrixMultiplyCore : public IFunction
 public:
     /** Constructor */
     NEGEMMLowpAssemblyMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Destructor */
+    ~NEGEMMLowpAssemblyMatrixMultiplyCore();
+
     /** Initialise the kernel's inputs, output
      *
      * @param[in]  a      First input tensor  (Matrix A). Data type supported: U8, S8.
@@ -57,13 +62,13 @@ public:
     void run() override;
 
 private:
-    MemoryGroup                _memory_group;
-    NEGEMMAssemblyDispatch     _asm_glue;
-    std::unique_ptr<INEKernel> _mm_kernel;
-    std::unique_ptr<INEKernel> _mtx_a_reshape_kernel;
-    std::unique_ptr<INEKernel> _mtx_b_reshape_kernel;
-    Tensor                     _tmp_a;
-    Tensor                     _tmp_b;
+    MemoryGroup                                     _memory_group;
+    NEGEMMAssemblyDispatch                          _asm_glue;
+    std::unique_ptr<NEGEMMLowpMatrixMultiplyKernel> _mm_kernel;
+    std::unique_ptr<NEGEMMInterleave4x4Kernel>      _mtx_a_reshape_kernel;
+    std::unique_ptr<NEGEMMTranspose1xWKernel>       _mtx_b_reshape_kernel;
+    Tensor                                          _tmp_a;
+    Tensor                                          _tmp_b;
 };
 } // namespace arm_compute
 #endif /*ARM_COMPUTE_NEGEMMLOWPASSEMBLYMATRIXMULTIPLYCORE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
index 01720f05fa..cb1d6bd782 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
@@ -25,15 +25,6 @@
 #define ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H
 
 #include "NEActivationLayer.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
-#include "arm_compute/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
 #include "arm_compute/runtime/MemoryGroup.h"
@@ -45,6 +36,15 @@
 namespace arm_compute
 {
 class ITensor;
+class NEConvertQuantizedSignednessKernel;
+class NEConvertQuantizedSignednessKernel;
+class NEGEMMInterleave4x4Kernel;
+class NEGEMMLowpMatrixMultiplyKernel;
+class NEGEMMLowpOffsetContributionKernel;
+class NEGEMMLowpOffsetContributionOutputStageKernel;
+class NEGEMMLowpMatrixAReductionKernel;
+class NEGEMMLowpMatrixBReductionKernel;
+class NEGEMMTranspose1xWKernel;
 
 /** Basic function to execute GEMMLowpMatrixMultiplyCore on NEON. This function calls the following NEON kernels if the DOT product instruction is not available:
  *
@@ -72,6 +72,8 @@ public:
     NEGEMMLowpMatrixMultiplyCore &operator=(const NEGEMMLowpMatrixMultiplyCore &) = delete;
     /** Default move assignment operator */
     NEGEMMLowpMatrixMultiplyCore &operator=(NEGEMMLowpMatrixMultiplyCore &&) = default;
+    /** Default destructor */
+    ~NEGEMMLowpMatrixMultiplyCore();
     /** Initialise the kernel's inputs, output
      *
      * @note GEMM_LOWP:  low precision GEMM kernel
@@ -111,19 +113,19 @@ public:
     void prepare() override;
 
 private:
-    MemoryGroup                                   _memory_group;
-    IWeightsManager                              *_weights_manager;
-    NEGEMMAssemblyDispatch                        _asm_glue;
-    NEGEMMLowpMatrixMultiplyKernel                _mm_kernel;
-    NEGEMMInterleave4x4Kernel                     _mtx_a_reshape_kernel;
-    NEGEMMTranspose1xWKernel                      _mtx_b_reshape_kernel;
-    NEGEMMLowpMatrixAReductionKernel              _mtx_a_reduction_kernel;
-    NEGEMMLowpMatrixBReductionKernel              _mtx_b_reduction_kernel;
-    NEGEMMLowpOffsetContributionKernel            _offset_contribution_kernel;
-    NEGEMMLowpOffsetContributionOutputStageKernel _offset_contribution_output_stage_kernel;
-    NEActivationLayer                             _activation_func;
-    NEConvertQuantizedSignednessKernel            _convert_to_signed_asymm;
-    NEConvertQuantizedSignednessKernel            _convert_from_signed_asymm;
+    MemoryGroup                                                    _memory_group;
+    IWeightsManager                                               *_weights_manager;
+    NEGEMMAssemblyDispatch                                         _asm_glue;
+    std::unique_ptr<NEGEMMLowpMatrixMultiplyKernel>                _mm_kernel;
+    std::unique_ptr<NEGEMMInterleave4x4Kernel>                     _mtx_a_reshape_kernel;
+    std::unique_ptr<NEGEMMTranspose1xWKernel>                      _mtx_b_reshape_kernel;
+    std::unique_ptr<NEGEMMLowpMatrixAReductionKernel>              _mtx_a_reduction_kernel;
+    std::unique_ptr<NEGEMMLowpMatrixBReductionKernel>              _mtx_b_reduction_kernel;
+    std::unique_ptr<NEGEMMLowpOffsetContributionKernel>            _offset_contribution_kernel;
+    std::unique_ptr<NEGEMMLowpOffsetContributionOutputStageKernel> _offset_contribution_output_stage_kernel;
+    NEActivationLayer                                              _activation_func;
+    std::unique_ptr<NEConvertQuantizedSignednessKernel>            _convert_to_signed_asymm;
+    std::unique_ptr<NEConvertQuantizedSignednessKernel>            _convert_from_signed_asymm;
 
     Tensor         _vector_sum_col;
     Tensor         _vector_sum_row;
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h
index f29d5d464b..6977d27cb6 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h
@@ -24,6 +24,7 @@
 #ifndef ARM_COMPUTE_NEGEMMLOWPOUTPUTSTAGE_H
 #define ARM_COMPUTE_NEGEMMLOWPOUTPUTSTAGE_H
 
+#include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 /** This file contains all available output stages for GEMMLowp on NEON.
@@ -37,6 +38,7 @@
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to execute NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint on NEON.
  *
@@ -69,6 +71,18 @@ class ITensor;
 class NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint : public INESimpleFunctionNoBorder
 {
 public:
+    /** Constructor */
+    NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint(const NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &operator=(const NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &operator=(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &&) = delete;
+    /** Default destructor */
+    ~NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint();
     /** Initialise the kernel's inputs, output
      *
      * @param[in]  input                        Input tensor. Data type supported: S32
@@ -129,6 +143,18 @@ public:
 class NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint : public INESimpleFunctionNoBorder
 {
 public:
+    /** Constructor */
+    NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint(const NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &operator=(const NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint(NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &operator=(NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &&) = delete;
+    /** Default destructor */
+    ~NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint();
     /** Initialise the kernel's inputs, output
      *
      * @param[in]  input                        Input tensor. Data type supported: S32
@@ -189,6 +215,18 @@ public:
 class NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint : public INESimpleFunctionNoBorder
 {
 public:
+    /** Constructor */
+    NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint(const NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &operator=(const NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint(NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &operator=(NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &&) = delete;
+    /** Default destructor */
+    ~NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint();
     /** Initialise the kernel's inputs, output
      *
      * @param[in]  input                        Input tensor. Data type supported: S32
@@ -230,6 +268,18 @@ public:
 class NEGEMMLowpOutputStage : public INESimpleFunctionNoBorder
 {
 public:
+    /** Constructor */
+    NEGEMMLowpOutputStage() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGEMMLowpOutputStage(const NEGEMMLowpOutputStage &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGEMMLowpOutputStage &operator=(const NEGEMMLowpOutputStage &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEGEMMLowpOutputStage(NEGEMMLowpOutputStage &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEGEMMLowpOutputStage &operator=(NEGEMMLowpOutputStage &&) = delete;
+    /** Default destructor */
+    ~NEGEMMLowpOutputStage();
     /** Initialise the kernel's inputs, output
      *
      * @param[in]  input  Input tensor. Data type supported: S32
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h b/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h
index 983c95d732..723a638d76 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,12 +24,14 @@
 #ifndef ARM_COMPUTE_NEGEMMTRANSPOSE1XW_H
 #define ARM_COMPUTE_NEGEMMTRANSPOSE1XW_H
 
+#include "arm_compute/core/Error.h"
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
 // Forward declarations
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to execute NEGEMMTranspose1xWKernel. This function calls the following NEON kernels:
  *
@@ -39,6 +41,18 @@ class ITensor;
 class NEGEMMTranspose1xW : public INESimpleFunctionNoBorder
 {
 public:
+    /** Constructor */
+    NEGEMMTranspose1xW() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGEMMTranspose1xW(const NEGEMMTranspose1xW &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGEMMTranspose1xW &operator=(const NEGEMMTranspose1xW &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEGEMMTranspose1xW(NEGEMMTranspose1xW &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEGEMMTranspose1xW &operator=(NEGEMMTranspose1xW &&) = delete;
+    /** Default destructor */
+    ~NEGEMMTranspose1xW() = default;
     /** Initialise the kernel's inputs, output
      *
      * @param[in]  input  First input tensor. Data type supported: All
diff --git a/arm_compute/runtime/NEON/functions/NEGather.h b/arm_compute/runtime/NEON/functions/NEGather.h
index b872c44443..a5e0461227 100644
--- a/arm_compute/runtime/NEON/functions/NEGather.h
+++ b/arm_compute/runtime/NEON/functions/NEGather.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -32,6 +32,7 @@ namespace arm_compute
 {
 // Forward declarations
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NEGatherKernel */
 class NEGather : public INESimpleFunctionNoBorder
diff --git a/arm_compute/runtime/NEON/functions/NEGaussian3x3.h b/arm_compute/runtime/NEON/functions/NEGaussian3x3.h
index 54fe91b975..db533858ee 100644
--- a/arm_compute/runtime/NEON/functions/NEGaussian3x3.h
+++ b/arm_compute/runtime/NEON/functions/NEGaussian3x3.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NEGaussian5x5.h b/arm_compute/runtime/NEON/functions/NEGaussian5x5.h
index 2e042e2307..3d933bbd3d 100644
--- a/arm_compute/runtime/NEON/functions/NEGaussian5x5.h
+++ b/arm_compute/runtime/NEON/functions/NEGaussian5x5.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,8 +24,6 @@
 #ifndef ARM_COMPUTE_NEGAUSSIAN5x5_H
 #define ARM_COMPUTE_NEGAUSSIAN5x5_H
 
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
@@ -38,6 +36,9 @@
 namespace arm_compute
 {
 class ITensor;
+class NEGaussian5x5HorKernel;
+class NEGaussian5x5VertKernel;
+class NEFillBorderKernel;
 
 /** Basic function to execute gaussian filter 5x5. This function calls the following NEON kernels:
  *
@@ -52,6 +53,16 @@ public:
     /** Default constructor
      */
     NEGaussian5x5(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGaussian5x5(const NEGaussian5x5 &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGaussian5x5 &operator=(const NEGaussian5x5 &) = delete;
+    /** Allow instances of this class to be moved */
+    NEGaussian5x5(NEGaussian5x5 &&) = default;
+    /** Allow instances of this class to be moved */
+    NEGaussian5x5 &operator=(NEGaussian5x5 &&) = default;
+    /** Default destructor */
+    ~NEGaussian5x5();
     /** Initialise the function's input, output and border mode.
      *
      * @param[in, out] input                 Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
@@ -65,11 +76,11 @@ public:
     void run() override;
 
 protected:
-    MemoryGroup             _memory_group;   /**< Function memory group */
-    NEGaussian5x5HorKernel  _kernel_hor;     /**< kernel for horizontal pass */
-    NEGaussian5x5VertKernel _kernel_vert;    /**< kernel for vertical pass */
-    Tensor                  _tmp;            /**< temporary buffer for output of horizontal pass */
-    NEFillBorderKernel      _border_handler; /**< kernel to handle tensor borders */
+    MemoryGroup                              _memory_group;   /**< Function memory group */
+    std::unique_ptr<NEGaussian5x5HorKernel>  _kernel_hor;     /**< kernel for horizontal pass */
+    std::unique_ptr<NEGaussian5x5VertKernel> _kernel_vert;    /**< kernel for vertical pass */
+    Tensor                                   _tmp;            /**< temporary buffer for output of horizontal pass */
+    std::unique_ptr<NEFillBorderKernel>      _border_handler; /**< kernel to handle tensor borders */
 };
 }
 #endif /*ARM_COMPUTE_NEGAUSSIAN5x5_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGaussianPyramid.h b/arm_compute/runtime/NEON/functions/NEGaussianPyramid.h
index d82f763f95..c82de0f4c2 100644
--- a/arm_compute/runtime/NEON/functions/NEGaussianPyramid.h
+++ b/arm_compute/runtime/NEON/functions/NEGaussianPyramid.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,7 +25,6 @@
 #define ARM_COMPUTE_NEGAUSSIANPYRAMID_H
 
 #include "arm_compute/core/IPyramid.h"
-#include "arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h"
@@ -39,6 +38,9 @@
 namespace arm_compute
 {
 class ITensor;
+class NEGaussianPyramidHorKernel;
+class NEGaussianPyramidVertKernel;
+class NEFillBorderKernel;
 
 /** Common interface for all Gaussian pyramid functions */
 class NEGaussianPyramid : public IFunction
@@ -85,16 +87,26 @@ class NEGaussianPyramidHalf : public NEGaussianPyramid
 public:
     /** Constructor */
     NEGaussianPyramidHalf();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGaussianPyramidHalf(const NEGaussianPyramidHalf &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGaussianPyramidHalf &operator=(const NEGaussianPyramidHalf &) = delete;
+    /** Allow instances of this class to be moved */
+    NEGaussianPyramidHalf(NEGaussianPyramidHalf &&) = default;
+    /** Allow instances of this class to be moved */
+    NEGaussianPyramidHalf &operator=(NEGaussianPyramidHalf &&) = default;
+    /** Default destructor */
+    ~NEGaussianPyramidHalf();
 
     // Inherited methods overridden:
     void configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override;
     void run() override;
 
 private:
-    std::vector<NEFillBorderKernel>          _horizontal_border_handler;
-    std::vector<NEFillBorderKernel>          _vertical_border_handler;
-    std::vector<NEGaussianPyramidHorKernel>  _horizontal_reduction;
-    std::vector<NEGaussianPyramidVertKernel> _vertical_reduction;
+    std::vector<std::unique_ptr<NEFillBorderKernel>>          _horizontal_border_handler;
+    std::vector<std::unique_ptr<NEFillBorderKernel>>          _vertical_border_handler;
+    std::vector<std::unique_ptr<NEGaussianPyramidHorKernel>>  _horizontal_reduction;
+    std::vector<std::unique_ptr<NEGaussianPyramidVertKernel>> _vertical_reduction;
 };
 
 /** Basic function to execute gaussian pyramid with ORB scale factor. This function calls the following NEON kernels and functions:
@@ -109,6 +121,16 @@ class NEGaussianPyramidOrb : public NEGaussianPyramid
 public:
     /** Constructor */
     NEGaussianPyramidOrb();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGaussianPyramidOrb(const NEGaussianPyramidOrb &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGaussianPyramidOrb &operator=(const NEGaussianPyramidOrb &) = delete;
+    /** Allow instances of this class to be moved */
+    NEGaussianPyramidOrb(NEGaussianPyramidOrb &&) = default;
+    /** Allow instances of this class to be moved */
+    NEGaussianPyramidOrb &operator=(NEGaussianPyramidOrb &&) = default;
+    /** Default destructor */
+    ~NEGaussianPyramidOrb();
 
     // Inherited methods overridden:
     void configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override;
diff --git a/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h b/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h
index f937832c0e..613f0d1c47 100644
--- a/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h
@@ -24,17 +24,17 @@
 #ifndef ARM_COMPUTE_NEGENERATEPROPOSALSLAYER_H
 #define ARM_COMPUTE_NEGENERATEPROPOSALSLAYER_H
 
-#include "arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEPadLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEPermuteKernel.h"
-#include "arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/CPP/CPPScheduler.h"
 #include "arm_compute/runtime/CPP/functions/CPPBoxWithNonMaximaSuppressionLimit.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h"
+#include "arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h"
+#include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEPadLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEPermute.h"
+#include "arm_compute/runtime/NEON/functions/NEQuantizationLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
 #include "arm_compute/runtime/Tensor.h"
 
@@ -67,6 +67,8 @@ public:
     NEGenerateProposalsLayer(const NEGenerateProposalsLayer &) = delete;
     /** Prevent instances of this class from being copied (As this class contains pointers) */
     NEGenerateProposalsLayer &operator=(const NEGenerateProposalsLayer &) = delete;
+    /** Default destructor */
+    ~NEGenerateProposalsLayer();
 
     /** Set the input and output tensors.
      *
@@ -112,16 +114,16 @@ private:
     MemoryGroup _memory_group;
 
     // Neon kernels
-    NEPermuteKernel              _permute_deltas_kernel;
-    NEReshapeLayer               _flatten_deltas;
-    NEPermuteKernel              _permute_scores_kernel;
-    NEReshapeLayer               _flatten_scores;
-    NEComputeAllAnchorsKernel    _compute_anchors_kernel;
-    NEBoundingBoxTransformKernel _bounding_box_kernel;
-    NEPadLayerKernel             _pad_kernel;
-    NEDequantizationLayerKernel  _dequantize_anchors;
-    NEDequantizationLayerKernel  _dequantize_deltas;
-    NEQuantizationLayerKernel    _quantize_all_proposals;
+    NEPermute              _permute_deltas;
+    NEReshapeLayer         _flatten_deltas;
+    NEPermute              _permute_scores;
+    NEReshapeLayer         _flatten_scores;
+    NEComputeAllAnchors    _compute_anchors;
+    NEBoundingBoxTransform _bounding_box;
+    NEPadLayer             _pad;
+    NEDequantizationLayer  _dequantize_anchors;
+    NEDequantizationLayer  _dequantize_deltas;
+    NEQuantizationLayer    _quantize_all_proposals;
 
     // CPP functions
     CPPBoxWithNonMaximaSuppressionLimit _cpp_nms;
diff --git a/arm_compute/runtime/NEON/functions/NEHOGDescriptor.h b/arm_compute/runtime/NEON/functions/NEHOGDescriptor.h
index 9b6fc4737b..c900040982 100644
--- a/arm_compute/runtime/NEON/functions/NEHOGDescriptor.h
+++ b/arm_compute/runtime/NEON/functions/NEHOGDescriptor.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,7 +24,6 @@
 #ifndef ARM_COMPUTE_NEHOGDESCRIPTOR_H
 #define ARM_COMPUTE_NEHOGDESCRIPTOR_H
 
-#include "arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
 #include "arm_compute/runtime/MemoryGroup.h"
@@ -36,6 +35,9 @@
 namespace arm_compute
 {
 class IHOG;
+class NEHOGOrientationBinningKernel;
+class NEHOGBlockNormalizationKernel;
+
 /** Basic function to calculate HOG descriptor. This function calls the following NEON kernels:
  *
  * -# @ref NEHOGGradient
@@ -48,6 +50,16 @@ class NEHOGDescriptor : public IFunction
 public:
     /** Default constructor */
     NEHOGDescriptor(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEHOGDescriptor(const NEHOGDescriptor &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEHOGDescriptor &operator=(const NEHOGDescriptor &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEHOGDescriptor(NEHOGDescriptor &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEHOGDescriptor &operator=(NEHOGDescriptor &&) = delete;
+    /** Default destructor */
+    ~NEHOGDescriptor();
     /** Initialise the function's source, destination, HOG data-object and border mode
      *
      * @param[in, out] input                 Input tensor. Data type supported: U8
@@ -63,13 +75,13 @@ public:
     void run() override;
 
 private:
-    MemoryGroup                   _memory_group;
-    NEHOGGradient                 _gradient;
-    NEHOGOrientationBinningKernel _orient_bin;
-    NEHOGBlockNormalizationKernel _block_norm;
-    Tensor                        _mag;
-    Tensor                        _phase;
-    Tensor                        _hog_space;
+    MemoryGroup                                    _memory_group;
+    NEHOGGradient                                  _gradient;
+    std::unique_ptr<NEHOGOrientationBinningKernel> _orient_bin;
+    std::unique_ptr<NEHOGBlockNormalizationKernel> _block_norm;
+    Tensor                                         _mag;
+    Tensor                                         _phase;
+    Tensor                                         _hog_space;
 };
 }
 
diff --git a/arm_compute/runtime/NEON/functions/NEHOGDetector.h b/arm_compute/runtime/NEON/functions/NEHOGDetector.h
index 6400d3c367..89224b62a0 100644
--- a/arm_compute/runtime/NEON/functions/NEHOGDetector.h
+++ b/arm_compute/runtime/NEON/functions/NEHOGDetector.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,12 +24,14 @@
 #ifndef ARM_COMPUTE_NEHOGDETECTOR_H
 #define ARM_COMPUTE_NEHOGDETECTOR_H
 
+#include "arm_compute/core/IArray.h"
 #include "arm_compute/core/IHOG.h"
-#include "arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h"
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
+class ITensor;
+class ITensorInfo;
 /** Basic function to execute HOG detector based on linear SVM. This function calls the following NEON kernel:
  *
  * -# @ref NEHOGDetectorKernel
@@ -38,6 +40,18 @@ namespace arm_compute
 class NEHOGDetector : public INESimpleFunctionNoBorder
 {
 public:
+    /** Constructor */
+    NEHOGDetector() = default;
+    /** Prevent instances of this class from being copied */
+    NEHOGDetector(const NEHOGDetector &) = delete;
+    /** Default move constructor */
+    NEHOGDetector(NEHOGDetector &&) = default;
+    /** Prevent instances of this class from being copied */
+    NEHOGDetector &operator=(const NEHOGDetector &) = delete;
+    /** Default move assignment operator */
+    NEHOGDetector &operator=(NEHOGDetector &&) = default;
+    /** Destructor */
+    ~NEHOGDetector();
     /** Initialise the kernel's input, output, HOG data object, detection window stride, threshold and index class
      *
      * @attention The function does not reset the number of values in @ref IDetectionWindowArray so it is caller's responsibility to clear it.
diff --git a/arm_compute/runtime/NEON/functions/NEHOGGradient.h b/arm_compute/runtime/NEON/functions/NEHOGGradient.h
index 2d3f934f54..05a16db995 100644
--- a/arm_compute/runtime/NEON/functions/NEHOGGradient.h
+++ b/arm_compute/runtime/NEON/functions/NEHOGGradient.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,7 +24,6 @@
 #ifndef ARM_COMPUTE_NEHOGGRADIENT_H
 #define ARM_COMPUTE_NEHOGGRADIENT_H
 
-#include "arm_compute/core/NEON/INEKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
@@ -38,6 +37,8 @@
 namespace arm_compute
 {
 class ITensor;
+class ICPPKernel;
+
 /** Basic function to calculate the gradient for HOG. This function calls the following NEON kernels:
  *
  * -# @ref NEDerivative
@@ -49,6 +50,16 @@ class NEHOGGradient : public IFunction
 public:
     /** Default constructor */
     NEHOGGradient(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEHOGGradient(const NEHOGGradient &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEHOGGradient &operator=(const NEHOGGradient &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEHOGGradient(NEHOGGradient &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEHOGGradient &operator=(NEHOGGradient &&) = delete;
+    /** Default destructor */
+    ~NEHOGGradient();
     /** Initialise the function's source, destinations, phase type and border mode
      *
      * @param[in, out] input                 Input tensor. Data type supported: U8.
@@ -65,11 +76,11 @@ public:
     void run() override;
 
 private:
-    MemoryGroup                _memory_group;
-    NEDerivative               _derivative;
-    std::unique_ptr<INEKernel> _mag_phase;
-    Tensor                     _gx;
-    Tensor                     _gy;
+    MemoryGroup                 _memory_group;
+    NEDerivative                _derivative;
+    std::unique_ptr<ICPPKernel> _mag_phase;
+    Tensor                      _gx;
+    Tensor                      _gy;
 };
 }
 #endif /*ARM_COMPUTE_NEHOGGRADIENT_H */
diff --git a/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h b/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h
index ff64afb119..0fb3edd490 100644
--- a/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h
+++ b/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -27,7 +27,6 @@
 #include "arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h"
 #include "arm_compute/core/IArray.h"
 #include "arm_compute/core/IMultiHOG.h"
-#include "arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
 #include "arm_compute/runtime/MemoryGroup.h"
@@ -39,6 +38,9 @@
 
 namespace arm_compute
 {
+class NEHOGOrientationBinningKernel;
+class NEHOGBlockNormalizationKernel;
+
 /** Basic function to detect multiple objects (or the same object at different scales) on the same input image using HOG. This function calls the following NEON kernels:
  *
  * -# @ref NEHOGGradient
@@ -60,8 +62,14 @@ public:
     NEHOGMultiDetection(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
     /** Prevent instances of this class from being copied (As this class contains pointers) */
     NEHOGMultiDetection(const NEHOGMultiDetection &) = delete;
+    /** Default move constructor */
+    NEHOGMultiDetection(NEHOGMultiDetection &&) = default;
     /** Prevent instances of this class from being copied (As this class contains pointers) */
     NEHOGMultiDetection &operator=(const NEHOGMultiDetection &) = delete;
+    /** Default move assignment operator */
+    NEHOGMultiDetection &operator=(NEHOGMultiDetection &&) = default;
+    /** Default destructor */
+    ~NEHOGMultiDetection();
     /** Initialise the function's source, destination, detection window strides, border mode, threshold and non-maxima suppression
      *
      * @param[in, out] input                    Input tensor. Data type supported: U8
diff --git a/arm_compute/runtime/NEON/functions/NEHarrisCorners.h b/arm_compute/runtime/NEON/functions/NEHarrisCorners.h
index c086e3a7ce..e2dc052afc 100644
--- a/arm_compute/runtime/NEON/functions/NEHarrisCorners.h
+++ b/arm_compute/runtime/NEON/functions/NEHarrisCorners.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,8 +26,6 @@
 
 #include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h"
 #include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/Array.h"
 #include "arm_compute/runtime/IFunction.h"
@@ -42,6 +40,8 @@
 namespace arm_compute
 {
 class ITensor;
+class NEFillBorderKernel;
+class INEHarrisScoreKernel;
 using IImage = ITensor;
 
 /** Basic function to execute harris corners detection. This function calls the following NEON kernels and functions:
@@ -68,6 +68,16 @@ public:
      * @param[in] memory_manager (Optional) Memory manager.
      */
     NEHarrisCorners(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEHarrisCorners(const NEHarrisCorners &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEHarrisCorners &operator=(const NEHarrisCorners &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEHarrisCorners(NEHarrisCorners &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEHarrisCorners &operator=(NEHarrisCorners &&) = delete;
+    /** Default destructor */
+    ~NEHarrisCorners();
     /** Initialize the function's source, destination, conv and border_mode.
      *
      * @param[in, out] input                 Source image. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
@@ -94,8 +104,8 @@ private:
     NENonMaximaSuppression3x3             _non_max_suppr;         /**< Non-maxima suppression function */
     CPPCornerCandidatesKernel             _candidates;            /**< Sort kernel */
     CPPSortEuclideanDistanceKernel        _sort_euclidean;        /**< Euclidean distance kernel */
-    NEFillBorderKernel                    _border_gx;             /**< Border handler before running harris score */
-    NEFillBorderKernel                    _border_gy;             /**< Border handler before running harris score */
+    std::unique_ptr<NEFillBorderKernel>   _border_gx;             /**< Border handler before running harris score */
+    std::unique_ptr<NEFillBorderKernel>   _border_gy;             /**< Border handler before running harris score */
     Image                                 _gx;                    /**< Source image - Gx component */
     Image                                 _gy;                    /**< Source image - Gy component */
     Image                                 _score;                 /**< Source image - Harris score */
diff --git a/arm_compute/runtime/NEON/functions/NEHistogram.h b/arm_compute/runtime/NEON/functions/NEHistogram.h
index 716f2e71f9..60766ebcdc 100644
--- a/arm_compute/runtime/NEON/functions/NEHistogram.h
+++ b/arm_compute/runtime/NEON/functions/NEHistogram.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,16 +24,19 @@
 #ifndef ARM_COMPUTE_NEHISTOGRAM_H
 #define ARM_COMPUTE_NEHISTOGRAM_H
 
-#include "arm_compute/core/NEON/kernels/NEHistogramKernel.h"
 #include "arm_compute/runtime/IFunction.h"
 
 #include <cstddef>
 #include <cstdint>
 #include <memory>
+#include <vector>
 
 namespace arm_compute
 {
+class ITensor;
 class IDistribution1D;
+class NEHistogramKernel;
+using IImage = ITensor;
 
 /** Basic function to run @ref NEHistogramKernel. */
 class NEHistogram : public IFunction
@@ -41,6 +44,16 @@ class NEHistogram : public IFunction
 public:
     /** Default Constructor. */
     NEHistogram();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEHistogram(const NEHistogram &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEHistogram &operator=(const NEHistogram &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEHistogram(NEHistogram &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEHistogram &operator=(NEHistogram &&) = delete;
+    /** Default destructor */
+    ~NEHistogram();
     /** Initialise the kernel's inputs.
      *
      * @param[in]  input  Input image. Data type supported: U8.
@@ -52,10 +65,10 @@ public:
     void run() override;
 
 private:
-    NEHistogramKernel     _histogram_kernel;
-    std::vector<uint32_t> _local_hist;
-    std::vector<uint32_t> _window_lut;
-    size_t                _local_hist_size;
+    std::unique_ptr<NEHistogramKernel> _histogram_kernel;
+    std::vector<uint32_t>              _local_hist;
+    std::vector<uint32_t>              _window_lut;
+    size_t                             _local_hist_size;
     /** 256 possible pixel values as we handle only U8 images */
     static constexpr unsigned int window_lut_default_size = 256;
 };
diff --git a/arm_compute/runtime/NEON/functions/NEIm2Col.h b/arm_compute/runtime/NEON/functions/NEIm2Col.h
index 3ea9c1cfaf..2f023f44fe 100644
--- a/arm_compute/runtime/NEON/functions/NEIm2Col.h
+++ b/arm_compute/runtime/NEON/functions/NEIm2Col.h
@@ -26,14 +26,16 @@
 
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
-#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h"
 #include "arm_compute/core/Size2D.h"
 #include "arm_compute/core/Types.h"
+#include <memory>
 
 namespace arm_compute
 {
 // Forward declarations
 class ITensor;
+class ITensorInfo;
+class NEIm2ColKernel;
 
 /** Basic function to run @ref NEIm2ColKernel */
 class NEIm2Col : public IFunction
@@ -41,6 +43,16 @@ class NEIm2Col : public IFunction
 public:
     /** Default constructor */
     NEIm2Col();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEIm2Col(const NEIm2Col &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEIm2Col &operator=(const NEIm2Col &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEIm2Col(NEIm2Col &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEIm2Col &operator=(NEIm2Col &&) = delete;
+    /** Default destructor */
+    ~NEIm2Col();
     /** Configure the im2col NEON kernel
      *
      * @param[in]  input       The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
@@ -78,8 +90,8 @@ public:
     void run() override;
 
 private:
-    NEIm2ColKernel _kernel;
-    unsigned int   _y_dim;
+    std::unique_ptr<NEIm2ColKernel> _kernel;
+    unsigned int                    _y_dim;
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_NEIM2COL_H */
diff --git a/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h
index 85a307c2d4..57165c94b4 100644
--- a/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,7 +24,6 @@
 #ifndef ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYER_H
 #define ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYER_H
 
-#include "arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
 #include "arm_compute/runtime/MemoryGroup.h"
@@ -37,6 +36,7 @@
 namespace arm_compute
 {
 class ITensor;
+class NEInstanceNormalizationLayerKernel;
 
 /** Basic function to perform a Instance normalization.
  *
@@ -48,6 +48,16 @@ class NEInstanceNormalizationLayer : public IFunction
 public:
     /** Constructor */
     NEInstanceNormalizationLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEInstanceNormalizationLayer(const NEInstanceNormalizationLayer &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEInstanceNormalizationLayer &operator=(const NEInstanceNormalizationLayer &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEInstanceNormalizationLayer(NEInstanceNormalizationLayer &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEInstanceNormalizationLayer &operator=(NEInstanceNormalizationLayer &&) = delete;
+    /** Default destructor */
+    ~NEInstanceNormalizationLayer();
     /** Set the input and output tensors.
      *
      * @param[in, out] input   Source tensor. In case of @p output tensor = nullptr this tensor will store the result of the normalization.
@@ -75,13 +85,13 @@ public:
     void run() override;
 
 private:
-    MemoryGroup                        _memory_group;
-    NEInstanceNormalizationLayerKernel _normalization_kernel;
-    bool                               _is_nchw;
-    NEPermute                          _permute_input;
-    NEPermute                          _permute_output;
-    Tensor                             _permuted_input;
-    Tensor                             _permuted_output;
+    MemoryGroup                                         _memory_group;
+    std::unique_ptr<NEInstanceNormalizationLayerKernel> _normalization_kernel;
+    bool                                                _is_nchw;
+    NEPermute                                           _permute_input;
+    NEPermute                                           _permute_output;
+    Tensor                                              _permuted_input;
+    Tensor                                              _permuted_output;
 };
 }
 #endif /* ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEIntegralImage.h b/arm_compute/runtime/NEON/functions/NEIntegralImage.h
index 6302a7adac..a04105c0b9 100644
--- a/arm_compute/runtime/NEON/functions/NEIntegralImage.h
+++ b/arm_compute/runtime/NEON/functions/NEIntegralImage.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -34,6 +34,18 @@ class ITensor;
 class NEIntegralImage : public INESimpleFunction
 {
 public:
+    /** Constructor */
+    NEIntegralImage() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEIntegralImage(const NEIntegralImage &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEIntegralImage &operator=(const NEIntegralImage &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEIntegralImage(NEIntegralImage &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEIntegralImage &operator=(NEIntegralImage &&) = delete;
+    /** Default destructor */
+    ~NEIntegralImage();
     /** Initialise the function's source, destinations and border mode.
      *
      * @param[in]  input  Source tensor. Data type supported: U8.
diff --git a/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h b/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h
index 66750a5411..173b9d2141 100644
--- a/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h
@@ -24,7 +24,6 @@
 #ifndef ARM_COMPUTE_NEL2NORMALIZELAYER_H
 #define ARM_COMPUTE_NEL2NORMALIZELAYER_H
 
-#include "arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
 #include "arm_compute/runtime/MemoryGroup.h"
@@ -36,6 +35,7 @@
 namespace arm_compute
 {
 class ITensor;
+class NEL2NormalizeLayerKernel;
 
 /** Basic function to perform a L2 normalization on a given axis.
  *
@@ -48,6 +48,16 @@ class NEL2NormalizeLayer : public IFunction
 public:
     /** Constructor */
     NEL2NormalizeLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEL2NormalizeLayer(const NEL2NormalizeLayer &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEL2NormalizeLayer &operator=(const NEL2NormalizeLayer &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEL2NormalizeLayer(NEL2NormalizeLayer &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEL2NormalizeLayer &operator=(NEL2NormalizeLayer &&) = delete;
+    /** Default destructor */
+    ~NEL2NormalizeLayer();
     /** Set the input and output tensors.
      *
      * @param[in, out] input   Source tensor. Data types supported: F16/F32. (Written to only for border_size != 0)
@@ -72,10 +82,10 @@ public:
     void run() override;
 
 private:
-    MemoryGroup              _memory_group;
-    NEReductionOperation     _reduce_func;
-    NEL2NormalizeLayerKernel _normalize_kernel;
-    Tensor                   _sumsq;
+    MemoryGroup                               _memory_group;
+    NEReductionOperation                      _reduce_func;
+    std::unique_ptr<NEL2NormalizeLayerKernel> _normalize_kernel;
+    Tensor                                    _sumsq;
 };
 }
 #endif /* ARM_COMPUTE_NEL2NORMALIZELAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayer.h b/arm_compute/runtime/NEON/functions/NELSTMLayer.h
index 4a47dfb2cf..ef8defb827 100644
--- a/arm_compute/runtime/NEON/functions/NELSTMLayer.h
+++ b/arm_compute/runtime/NEON/functions/NELSTMLayer.h
@@ -24,18 +24,17 @@
 #ifndef ARM_COMPUTE_NELSTMLAYER_H
 #define ARM_COMPUTE_NELSTMLAYER_H
 
-#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NECopyKernel.h"
-
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
 #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
 #include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h"
 #include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h"
+#include "arm_compute/runtime/NEON/functions/NECopy.h"
 #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEGEMM.h"
 #include "arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h"
+#include "arm_compute/runtime/NEON/functions/NETranspose.h"
 #include "arm_compute/runtime/common/LSTMParams.h"
 
 namespace arm_compute
@@ -49,6 +48,16 @@ class NELSTMLayer : public IFunction
 public:
     /** Default constructor */
     NELSTMLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NELSTMLayer(const NELSTMLayer &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NELSTMLayer &operator=(const NELSTMLayer &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NELSTMLayer(NELSTMLayer &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NELSTMLayer &operator=(NELSTMLayer &&) = delete;
+    /** Default destructor */
+    ~NELSTMLayer();
     /** Initialize function's tensors.
      *
      * @param[in]  input                       Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: F16/F32.
@@ -158,7 +167,7 @@ private:
     NEActivationLayer              _activation_forget_gate;
     NEFullyConnectedLayer          _fully_connected_cell_state;
     NEGEMM                         _gemm_cell_state1;
-    NETransposeKernel              _transpose_cell_state;
+    NETranspose                    _transpose_cell_state;
     NEArithmeticAddition           _accum_cell_state1;
     NEArithmeticAddition           _accum_cell_state2;
     NEPixelWiseMultiplication      _pixelwise_mul_cell_state1;
@@ -173,8 +182,8 @@ private:
     NEPixelWiseMultiplication      _pixelwise_mul_output_state2;
     NEFullyConnectedLayer          _fully_connected_output_state;
     NEActivationLayer              _projection_clip;
-    NECopyKernel                   _copy_cell_state;
-    NECopyKernel                   _copy_output;
+    NECopy                         _copy_cell_state;
+    NECopy                         _copy_output;
     NEConcatenateLayer             _concat_scratch_buffer;
     NEConcatenateLayer             _concat_inputs_forget_gate;
     NEConcatenateLayer             _concat_weights_forget_gate;
diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h b/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h
index 377e173e7d..39fafef773 100644
--- a/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h
+++ b/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -73,6 +73,8 @@ public:
     NELSTMLayerQuantized &operator=(const NELSTMLayerQuantized &) = delete;
     /** Default move assignment operator */
     NELSTMLayerQuantized &operator=(NELSTMLayerQuantized &&) = default;
+    /** Default destructor */
+    ~NELSTMLayerQuantized();
     /** Initialize function's tensors.
      *
      * @param[in]  input                       Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8.
diff --git a/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h b/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h
index 1f317f6dd8..eecd9d59cb 100644
--- a/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h
+++ b/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -56,6 +56,16 @@ class NELaplacianPyramid : public IFunction
 public:
     /** Constructor */
     NELaplacianPyramid();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NELaplacianPyramid(const NELaplacianPyramid &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NELaplacianPyramid &operator=(const NELaplacianPyramid &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NELaplacianPyramid(NELaplacianPyramid &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NELaplacianPyramid &operator=(NELaplacianPyramid &&) = delete;
+    /** Default destructor */
+    ~NELaplacianPyramid();
     /** Initialise the function's source, destinations and border mode.
      *
      * @param[in]  input                 Source tensor. Data type supported: U8.
diff --git a/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h b/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h
index cc4aa0876b..20f76455da 100644
--- a/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h
+++ b/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -62,6 +62,16 @@ class NELaplacianReconstruct : public IFunction
 public:
     /** Constructor */
     NELaplacianReconstruct();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NELaplacianReconstruct(const NELaplacianReconstruct &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NELaplacianReconstruct &operator=(const NELaplacianReconstruct &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NELaplacianReconstruct(NELaplacianReconstruct &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NELaplacianReconstruct &operator=(NELaplacianReconstruct &&) = delete;
+    /** Default destructor */
+    ~NELaplacianReconstruct();
     /** Initialise the function's source, destinations and border mode.
      *
      * The Output image must have the same size as the first level of the pyramid.
diff --git a/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h
index dbcaa297ab..e9f3e93474 100644
--- a/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h
+++ b/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h
@@ -26,13 +26,11 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/NEON/kernels/NECol2ImKernel.h"
-#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h"
-#include "arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h"
-#include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IMemoryManager.h"
 #include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/functions/NECol2Im.h"
+#include "arm_compute/runtime/NEON/functions/NEIm2Col.h"
 #include "arm_compute/runtime/Tensor.h"
 
 #include <memory>
@@ -40,6 +38,8 @@
 namespace arm_compute
 {
 class INETensor;
+class NEWeightsReshapeKernel;
+class NELocallyConnectedMatrixMultiplyKernel;
 
 /** Basic function to compute the locally connected layer. This function calls the following NEON kernels:
  *
@@ -61,6 +61,8 @@ public:
     NELocallyConnectedLayer &operator=(const NELocallyConnectedLayer &) = delete;
     /** Default move assignment operator */
     NELocallyConnectedLayer &operator=(NELocallyConnectedLayer &&) = default;
+    /** Default destructor */
+    ~NELocallyConnectedLayer();
     /** Set the input and output tensors.
      *
      * @param[in]  input     Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -94,16 +96,16 @@ public:
     void prepare() override;
 
 private:
-    MemoryGroup                            _memory_group;
-    NEIm2ColKernel                         _input_im2col_kernel;
-    NEWeightsReshapeKernel                 _weights_reshape_kernel;
-    NELocallyConnectedMatrixMultiplyKernel _mm_kernel;
-    NECol2ImKernel                         _output_col2im_kernel;
-    Tensor                                 _input_im2col_reshaped;
-    Tensor                                 _weights_reshaped;
-    Tensor                                 _gemm_output;
-    bool                                   _is_prepared;
-    const ITensor                         *_original_weights;
+    MemoryGroup                                             _memory_group;
+    NEIm2Col                                                _input_im2col;
+    std::unique_ptr<NEWeightsReshapeKernel>                 _weights_reshape_kernel;
+    std::unique_ptr<NELocallyConnectedMatrixMultiplyKernel> _mm_kernel;
+    NECol2Im                                                _output_col2im;
+    Tensor                                                  _input_im2col_reshaped;
+    Tensor                                                  _weights_reshaped;
+    Tensor                                                  _gemm_output;
+    bool                                                    _is_prepared;
+    const ITensor                                          *_original_weights;
 };
 }
 #endif /* ARM_COMPUTE_NELOCALLYCONNECTEDLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEMagnitude.h b/arm_compute/runtime/NEON/functions/NEMagnitude.h
index 56c88c2125..50935b619d 100644
--- a/arm_compute/runtime/NEON/functions/NEMagnitude.h
+++ b/arm_compute/runtime/NEON/functions/NEMagnitude.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,6 +24,7 @@
 #ifndef ARM_COMPUTE_NEMAGNITUDE_H
 #define ARM_COMPUTE_NEMAGNITUDE_H
 
+#include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
@@ -34,6 +35,18 @@ class ITensor;
 class NEMagnitude : public INESimpleFunctionNoBorder
 {
 public:
+    /** Constructor */
+    NEMagnitude() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEMagnitude(const NEMagnitude &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEMagnitude &operator=(const NEMagnitude &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEMagnitude(NEMagnitude &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEMagnitude &operator=(NEMagnitude &&) = delete;
+    /** Default destructor */
+    ~NEMagnitude();
     /** Initialise the kernel's inputs.
      *
      * @param[in]  input1   First tensor input. Data type supported: S16.
diff --git a/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h b/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h
index f13b4bd9e2..5b5bb5cb78 100644
--- a/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h
@@ -24,14 +24,16 @@
 #ifndef ARM_COMPUTE_NEMAXUNPOOLINGLAYER_H
 #define ARM_COMPUTE_NEMAXUNPOOLINGLAYER_H
 
+#include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h"
+#include <memory>
 
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
+class NEMemsetKernel;
+class NEMaxUnpoolingLayerKernel;
 
 /** Function to perform MaxUnpooling. This function calls the following NEON kernels:
  *
@@ -43,6 +45,16 @@ class NEMaxUnpoolingLayer : public IFunction
 public:
     /** Constructor */
     NEMaxUnpoolingLayer();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEMaxUnpoolingLayer(const NEMaxUnpoolingLayer &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEMaxUnpoolingLayer &operator=(const NEMaxUnpoolingLayer &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEMaxUnpoolingLayer(NEMaxUnpoolingLayer &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEMaxUnpoolingLayer &operator=(NEMaxUnpoolingLayer &&) = delete;
+    /** Default destructor */
+    ~NEMaxUnpoolingLayer();
     /** Set the input and output tensors.
      *
      * @note Only supported pool size 2
@@ -70,8 +82,8 @@ public:
     void run() override;
 
 private:
-    NEMemsetKernel            _memset_kernel;
-    NEMaxUnpoolingLayerKernel _unpooling_layer_kernel;
+    std::unique_ptr<NEMemsetKernel>            _memset_kernel;
+    std::unique_ptr<NEMaxUnpoolingLayerKernel> _unpooling_layer_kernel;
 };
 }
 #endif /* ARM_COMPUTE_NEMAXUNPOOLINGLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEMeanStdDev.h b/arm_compute/runtime/NEON/functions/NEMeanStdDev.h
index 120f703140..875c3630c1 100644
--- a/arm_compute/runtime/NEON/functions/NEMeanStdDev.h
+++ b/arm_compute/runtime/NEON/functions/NEMeanStdDev.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,15 +24,18 @@
 #ifndef ARM_COMPUTE_NEMEANSTDDEV_H
 #define ARM_COMPUTE_NEMEANSTDDEV_H
 
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h"
+#include "arm_compute/core/IMultiImage.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
+#include <memory>
 
 #include <cstdint>
 
 namespace arm_compute
 {
+class NEMeanStdDevKernel;
+class NEFillBorderKernel;
+
 /** Basic function to execute mean and std deviation. This function calls the following NEON kernels:
  *
  * @ref NEMeanStdDevKernel
@@ -43,6 +46,16 @@ class NEMeanStdDev : public IFunction
 public:
     /** Default Constructor. */
     NEMeanStdDev();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEMeanStdDev(const NEMeanStdDev &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEMeanStdDev &operator=(const NEMeanStdDev &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEMeanStdDev(NEMeanStdDev &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEMeanStdDev &operator=(NEMeanStdDev &&) = delete;
+    /** Default destructor */
+    ~NEMeanStdDev();
     /** Initialise the kernel's inputs and outputs.
      *
      * @param[in, out] input  Input image. Data types supported: U8. (Written to only for border filling)
@@ -55,10 +68,10 @@ public:
     void run() override;
 
 private:
-    NEMeanStdDevKernel _mean_stddev_kernel; /**< Kernel that standard deviation calculation. */
-    NEFillBorderKernel _fill_border_kernel; /**< Kernel that fills tensor's borders with zeroes. */
-    uint64_t           _global_sum;         /**< Variable that holds the global sum among calls in order to ease reduction */
-    uint64_t           _global_sum_squared; /**< Variable that holds the global sum of squared values among calls in order to ease reduction */
+    std::unique_ptr<NEMeanStdDevKernel> _mean_stddev_kernel; /**< Kernel that standard deviation calculation. */
+    std::unique_ptr<NEFillBorderKernel> _fill_border_kernel; /**< Kernel that fills tensor's borders with zeroes. */
+    uint64_t                            _global_sum;         /**< Variable that holds the global sum among calls in order to ease reduction */
+    uint64_t                            _global_sum_squared; /**< Variable that holds the global sum of squared values among calls in order to ease reduction */
 };
 }
 #endif /*ARM_COMPUTE_NEMEANSTDDEV_H */
diff --git a/arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h
index 132ab8a01b..31e376191c 100644
--- a/arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,11 +30,24 @@
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to execute mean and standard deviation normalization by calling @ref NEMeanStdDevNormalizationKernel */
 class NEMeanStdDevNormalizationLayer : public INESimpleFunctionNoBorder
 {
 public:
+    /** Constructor */
+    NEMeanStdDevNormalizationLayer() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEMeanStdDevNormalizationLayer(const NEMeanStdDevNormalizationLayer &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEMeanStdDevNormalizationLayer &operator=(const NEMeanStdDevNormalizationLayer &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEMeanStdDevNormalizationLayer(NEMeanStdDevNormalizationLayer &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEMeanStdDevNormalizationLayer &operator=(NEMeanStdDevNormalizationLayer &&) = delete;
+    /** Default destructor */
+    ~NEMeanStdDevNormalizationLayer();
     /** Initialise the function's input and outputs.
      *
      * @note If the output tensor is a nullptr, the normalization will be performed in-place.
diff --git a/arm_compute/runtime/NEON/functions/NEMedian3x3.h b/arm_compute/runtime/NEON/functions/NEMedian3x3.h
index 8d860e2103..4b5f60382f 100644
--- a/arm_compute/runtime/NEON/functions/NEMedian3x3.h
+++ b/arm_compute/runtime/NEON/functions/NEMedian3x3.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h b/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h
index caa66a0c16..5959bbb0ad 100644
--- a/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h
+++ b/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,15 +25,17 @@
 #define ARM_COMPUTE_NEMINMAXLOCATION_H
 
 #include "arm_compute/core/IArray.h"
-#include "arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h"
 #include "arm_compute/runtime/Array.h"
 #include "arm_compute/runtime/IFunction.h"
 
 #include <cstdint>
+#include <memory>
 
 namespace arm_compute
 {
 class ITensor;
+class NEMinMaxKernel;
+class NEMinMaxLocationKernel;
 using IImage = ITensor;
 
 /** Basic function to execute min and max location. This function calls the following NEON kernels:
@@ -46,6 +48,16 @@ class NEMinMaxLocation : public IFunction
 public:
     /** Constructor */
     NEMinMaxLocation();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEMinMaxLocation(const NEMinMaxLocation &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEMinMaxLocation &operator=(const NEMinMaxLocation &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEMinMaxLocation(NEMinMaxLocation &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEMinMaxLocation &operator=(NEMinMaxLocation &&) = delete;
+    /** Default destructor */
+    ~NEMinMaxLocation();
     /** Initialise the kernel's inputs and outputs.
      *
      * @param[in]  input     Input image. Data types supported: U8/S16/F32.
@@ -64,8 +76,8 @@ public:
     void run() override;
 
 private:
-    NEMinMaxKernel         _min_max;     /**< Kernel that performs min/max */
-    NEMinMaxLocationKernel _min_max_loc; /**< Kernel that extracts min/max locations */
+    std::unique_ptr<NEMinMaxKernel>         _min_max;     /**< Kernel that performs min/max */
+    std::unique_ptr<NEMinMaxLocationKernel> _min_max_loc; /**< Kernel that extracts min/max locations */
 };
 }
 #endif /*ARM_COMPUTE_NEMINMAXLOCATION_H */
diff --git a/arm_compute/runtime/NEON/functions/NENonLinearFilter.h b/arm_compute/runtime/NEON/functions/NENonLinearFilter.h
index d2a85837fd..fe1b190db2 100644
--- a/arm_compute/runtime/NEON/functions/NENonLinearFilter.h
+++ b/arm_compute/runtime/NEON/functions/NENonLinearFilter.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h b/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h
index 07d4b16cf1..bad633afac 100644
--- a/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h
+++ b/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h
index fcdba12046..6519f9b4e6 100644
--- a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h
@@ -26,8 +26,6 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IMemoryManager.h"
 #include "arm_compute/runtime/MemoryGroup.h"
@@ -39,6 +37,7 @@
 namespace arm_compute
 {
 class ITensor;
+class NENormalizationLayerKernel;
 
 /** Basic function to compute a normalization layer. This function calls the following NEON kernels:
  *
@@ -52,6 +51,16 @@ class NENormalizationLayer : public IFunction
 public:
     /** Default constructor */
     NENormalizationLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NENormalizationLayer(const NENormalizationLayer &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NENormalizationLayer &operator=(const NENormalizationLayer &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NENormalizationLayer(NENormalizationLayer &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NENormalizationLayer &operator=(NENormalizationLayer &&) = delete;
+    /** Default destructor */
+    ~NENormalizationLayer();
     /** Set the input and output tensors.
      *
      * @param[in]  input     Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
@@ -75,10 +84,10 @@ public:
     void run() override;
 
 private:
-    MemoryGroup                _memory_group;  /**< Function memory group */
-    NENormalizationLayerKernel _norm_kernel;   /**< Normalization layer kernel */
-    NEPixelWiseMultiplication  _multiply_f;    /**< Pixel multiplication function */
-    Tensor                     _input_squared; /**< The intermediate buffer which stores results of squaring input */
+    MemoryGroup                                 _memory_group;  /**< Function memory group */
+    std::unique_ptr<NENormalizationLayerKernel> _norm_kernel;   /**< Normalization layer kernel */
+    NEPixelWiseMultiplication                   _multiply_f;    /**< Pixel multiplication function */
+    Tensor                                      _input_squared; /**< The intermediate buffer which stores results of squaring input */
 };
 }
 #endif /* ARM_COMPUTE_NENORMALIZATIONLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEOpticalFlow.h b/arm_compute/runtime/NEON/functions/NEOpticalFlow.h
index 141ee7ea41..a9f985a422 100644
--- a/arm_compute/runtime/NEON/functions/NEOpticalFlow.h
+++ b/arm_compute/runtime/NEON/functions/NEOpticalFlow.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,7 +25,7 @@
 #define ARM_COMPUTE_NEOPTICALFLOW_H
 
 #include "arm_compute/core/IArray.h"
-#include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h"
+#include "arm_compute/core/Types.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/Array.h"
 #include "arm_compute/runtime/IFunction.h"
@@ -41,6 +41,7 @@
 namespace arm_compute
 {
 class Pyramid;
+class NELKTrackerKernel;
 
 /** Array of LK Internel Keypoints */
 using LKInternalKeypointArray = Array<NELKInternalKeypoint>;
@@ -62,6 +63,8 @@ public:
     NEOpticalFlow(const NEOpticalFlow &) = delete;
     /** Prevent instances of this class from being copied (As this class contains pointers) */
     NEOpticalFlow &operator=(const NEOpticalFlow &) = delete;
+    /** Default destructor */
+    ~NEOpticalFlow();
     /**  Initialise the function input and output
      *
      * @param[in]  old_pyramid           Pointer to the pyramid for the old tensor. Data type supported U8
@@ -86,17 +89,17 @@ public:
     void run() override;
 
 private:
-    MemoryGroup                    _memory_group;
-    std::vector<NEScharr3x3>       _func_scharr;
-    std::vector<NELKTrackerKernel> _kernel_tracker;
-    std::vector<Tensor>            _scharr_gx;
-    std::vector<Tensor>            _scharr_gy;
-    IKeyPointArray                *_new_points;
-    const IKeyPointArray          *_new_points_estimates;
-    const IKeyPointArray          *_old_points;
-    LKInternalKeypointArray        _new_points_internal;
-    LKInternalKeypointArray        _old_points_internal;
-    unsigned int                   _num_levels;
+    MemoryGroup                                     _memory_group;
+    std::vector<NEScharr3x3>                        _func_scharr;
+    std::vector<std::unique_ptr<NELKTrackerKernel>> _kernel_tracker;
+    std::vector<Tensor>                             _scharr_gx;
+    std::vector<Tensor>                             _scharr_gy;
+    IKeyPointArray                                 *_new_points;
+    const IKeyPointArray                           *_new_points_estimates;
+    const IKeyPointArray                           *_old_points;
+    LKInternalKeypointArray                         _new_points_internal;
+    LKInternalKeypointArray                         _old_points_internal;
+    unsigned int                                    _num_levels;
 };
 } // namespace arm_compute
 #endif /*ARM_COMPUTE_NEOPTICALFLOW_H */
diff --git a/arm_compute/runtime/NEON/functions/NEPReluLayer.h b/arm_compute/runtime/NEON/functions/NEPReluLayer.h
index 756058b5ec..358e633000 100644
--- a/arm_compute/runtime/NEON/functions/NEPReluLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPReluLayer.h
@@ -31,6 +31,7 @@
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 namespace experimental
 {
diff --git a/arm_compute/runtime/NEON/functions/NEPadLayer.h b/arm_compute/runtime/NEON/functions/NEPadLayer.h
index fcb7c36312..3fdbb0d73c 100644
--- a/arm_compute/runtime/NEON/functions/NEPadLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPadLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -29,13 +29,15 @@
 #include "arm_compute/runtime/NEON/functions/NEStridedSlice.h"
 #include "arm_compute/runtime/SubTensor.h"
 
-#include "arm_compute/core/NEON/kernels/NECopyKernel.h"
-#include "arm_compute/core/NEON/kernels/NEPadLayerKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/Tensor.h"
+#include <memory>
 
 namespace arm_compute
 {
+class NECopyKernel;
+class NEPadLayerKernel;
+
 /** Basic function to pad a tensor. This function calls the following NEON functions/kernels:
  *
  *  - For padding mode = PaddingMode::CONSTANT:
@@ -49,8 +51,18 @@ namespace arm_compute
 class NEPadLayer : public IFunction
 {
 public:
-    /** Default constructor*/
+    /** Default Constructor */
     NEPadLayer();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEPadLayer(const NEPadLayer &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEPadLayer &operator=(const NEPadLayer &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEPadLayer(NEPadLayer &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEPadLayer &operator=(NEPadLayer &&) = delete;
+    /** Default destructor */
+    ~NEPadLayer();
     /** Initialize the function
      *
      * @param[in]  input          Source tensor. Data types supported: All.
@@ -97,15 +109,15 @@ private:
     void configure_reflect_symmetric_mode(ITensor *input, ITensor *output);
 
 private:
-    NECopyKernel                    _copy_kernel;
-    NEPadLayerKernel                _pad_kernel;
-    PaddingMode                     _mode;
-    PaddingList                     _padding;
-    uint32_t                        _num_dimensions;
-    std::vector<NEStridedSlice>     _slice_functions;
-    std::vector<NEConcatenateLayer> _concat_functions;
-    std::vector<Tensor>             _slice_results;
-    std::vector<Tensor>             _concat_results;
+    std::unique_ptr<NECopyKernel>     _copy_kernel;
+    std::unique_ptr<NEPadLayerKernel> _pad_kernel;
+    PaddingMode                       _mode;
+    PaddingList                       _padding;
+    uint32_t                          _num_dimensions;
+    std::vector<NEStridedSlice>       _slice_functions;
+    std::vector<NEConcatenateLayer>   _concat_functions;
+    std::vector<Tensor>               _slice_results;
+    std::vector<Tensor>               _concat_results;
 };
 } // namespace arm_compute
 #endif /*ARM_COMPUTE_NEPADLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEPermute.h b/arm_compute/runtime/NEON/functions/NEPermute.h
index 3be42c8346..ef8854b360 100644
--- a/arm_compute/runtime/NEON/functions/NEPermute.h
+++ b/arm_compute/runtime/NEON/functions/NEPermute.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -32,6 +32,7 @@ namespace arm_compute
 {
 // Forward declarations
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NEPermuteKernel */
 class NEPermute : public INESimpleFunctionNoBorder
diff --git a/arm_compute/runtime/NEON/functions/NEPhase.h b/arm_compute/runtime/NEON/functions/NEPhase.h
index c492073e22..626023c099 100644
--- a/arm_compute/runtime/NEON/functions/NEPhase.h
+++ b/arm_compute/runtime/NEON/functions/NEPhase.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,11 +24,13 @@
 #ifndef ARM_COMPUTE_NEPHASE_H
 #define ARM_COMPUTE_NEPHASE_H
 
+#include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NEMagnitudePhaseKernel */
 class NEPhase : public INESimpleFunctionNoBorder
diff --git a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h
index e1072980cf..91cf44ff2e 100644
--- a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h
+++ b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h
@@ -31,6 +31,7 @@
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 namespace experimental
 {
diff --git a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
index 000c754ec8..b45290fb46 100644
--- a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
@@ -26,13 +26,15 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h"
 #include "arm_compute/core/Types.h"
+#include <memory>
 
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
+class NEPoolingLayerKernel;
+class NEFillBorderKernel;
 
 /** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following NEON kernels:
  *
@@ -44,6 +46,16 @@ class NEPoolingLayer : public IFunction
 public:
     /** Constructor */
     NEPoolingLayer();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEPoolingLayer(const NEPoolingLayer &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEPoolingLayer &operator=(const NEPoolingLayer &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEPoolingLayer(NEPoolingLayer &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEPoolingLayer &operator=(NEPoolingLayer &&) = delete;
+    /** Default destructor */
+    ~NEPoolingLayer();
     /** Set the input and output tensors.
      *
      * @note F16 is supported for pool sizes 2 and 3 only
@@ -71,10 +83,10 @@ public:
     void run() override;
 
 private:
-    NEPoolingLayerKernel _pooling_layer_kernel;
-    NEFillBorderKernel   _border_handler;
-    bool                 _is_global_pooling_layer;
-    DataLayout           _data_layout;
+    std::unique_ptr<NEPoolingLayerKernel> _pooling_layer_kernel;
+    std::unique_ptr<NEFillBorderKernel>   _border_handler;
+    bool                                  _is_global_pooling_layer;
+    DataLayout                            _data_layout;
 };
 }
 #endif /* ARM_COMPUTE_NEPOOLINGLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h b/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h
index d4bb42fd07..3cc79fa28e 100644
--- a/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,13 +24,13 @@
 #ifndef ARM_COMPUTE_NEPRIORBOXLAYER_H
 #define ARM_COMPUTE_NEPRIORBOXLAYER_H
 
-#include "arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NEPriorBoxLayerKernel. */
 class NEPriorBoxLayer : public INESimpleFunctionNoBorder
diff --git a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
index 7c572de874..17ad5a354b 100644
--- a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
@@ -24,24 +24,27 @@
 #ifndef ARM_COMPUTE_NEQLSTMLAYER_H
 #define ARM_COMPUTE_NEQLSTMLAYER_H
 
-#include "arm_compute/core/NEON/kernels/NECopyKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
 #include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h"
+#include "arm_compute/runtime/NEON/functions/NECopy.h"
 #include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
 #include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h"
 #include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h"
 #include "arm_compute/runtime/NEON/functions/NETranspose.h"
+#include "support/MemorySupport.h"
 
 #include "arm_compute/runtime/common/LSTMParams.h"
+#include <memory>
 
 namespace arm_compute
 {
 // Forward declarations
 class ITensor;
+class ITensorInfo;
+class NEQLSTMLayerNormalizationKernel;
+class NEGEMMLowpMatrixAReductionKernel;
 
 /** Basic function to run @ref NEQLSTMLayer
  *
@@ -70,6 +73,8 @@ public:
     NEQLSTMLayer &operator=(const NEQLSTMLayer &) = delete;
     /** Default move assignment operator */
     NEQLSTMLayer &operator=(NEQLSTMLayer &&) = default;
+    /** Default destructor */
+    ~NEQLSTMLayer();
     /** Initialize function's tensors.
      *
      * @param[in]  input                       Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8_SIGNED.
@@ -204,7 +209,7 @@ private:
                       Tensor *outstage_res, float gemmlowp_scale,
                       const TensorInfo &mm_res_info, const TensorInfo &outstage_tensor_info);
 
-    MemoryGroup _memory_group{};
+    MemoryGroup _memory_group;
 
     /** A small internel kernel do the copy between two tensors */
     class TensorCopyKernel
@@ -217,6 +222,8 @@ private:
         Window   _window{};
 
     public:
+        /** Destructor */
+        ~TensorCopyKernel();
         /** Static function to check if given info will lead to a valid configuration of @ref NEQLSTMLayer::TensorCopyKernel
          *
          * @param[in] src Source tensor info.
@@ -236,79 +243,79 @@ private:
     };
 
     // Functions used
-    NETranspose                      _transpose_input_to_forget_weights{};
-    NETranspose                      _transpose_input_to_cell_weights{};
-    NETranspose                      _transpose_input_to_output_weights{};
-    NETranspose                      _transpose_input_to_input_weights{};
-    NETranspose                      _transpose_recurrent_to_forget_weights{};
-    NETranspose                      _transpose_recurrent_to_cell_weights{};
-    NETranspose                      _transpose_recurrent_to_output_weights{};
-    NETranspose                      _transpose_recurrent_to_input_weights{};
-    NETranspose                      _transpose_projection_weights{};
-    NEGEMMLowpMatrixAReductionKernel _input_to_input_reduction{};
-    NEGEMMLowpMatrixAReductionKernel _recurrent_to_input_reduction{};
-    NEGEMMLowpMatrixAReductionKernel _input_to_forget_reduction{};
-    NEGEMMLowpMatrixAReductionKernel _recurrent_to_forget_reduction{};
-    NEGEMMLowpMatrixAReductionKernel _input_to_cell_reduction{};
-    NEGEMMLowpMatrixAReductionKernel _recurrent_to_cell_reduction{};
-    NEGEMMLowpMatrixAReductionKernel _input_to_output_reduction{};
-    NEGEMMLowpMatrixAReductionKernel _recurrent_to_output_reduction{};
-    NEGEMMLowpMatrixAReductionKernel _projection_reduction{};
-    NEArithmeticAddition             _projection_bias_add{};
-    NEGEMMLowpMatrixMultiplyCore     _mm_input_to_forget{};
-    NEGEMMLowpMatrixMultiplyCore     _mm_recurrent_to_forget{};
-    NEPixelWiseMultiplication        _pixelwise_mul_cell_to_forget{};
-    NEGEMMLowpOutputStage            _input_to_forget_outstage{};
-    NEGEMMLowpOutputStage            _recurrent_to_forget_outstage{};
-    NEGEMMLowpOutputStage            _cell_to_forget_outstage{};
-    NEArithmeticAddition             _accumulate_input_recurrent_forget{};
-    NEArithmeticAddition             _accumulate_cell_forget{};
-    NEActivationLayer                _forget_gate_sigmoid{};
-    NEGEMMLowpMatrixMultiplyCore     _mm_input_to_cell{};
-    NEGEMMLowpOutputStage            _input_to_cell_outstage{};
-    NEGEMMLowpMatrixMultiplyCore     _mm_recurrent_to_cell{};
-    NEGEMMLowpOutputStage            _recurrent_to_cell_outstage{};
-    NEArithmeticAddition             _accumulate_input_recurrent_modulation{};
-    NEActivationLayer                _cell_gate_tanh{};
-    NEArithmeticSubtraction          _input_gate_sub{};
-    NEGEMMLowpMatrixMultiplyCore     _mm_input_to_input{};
-    NEGEMMLowpOutputStage            _input_to_input_outstage{};
-    NEGEMMLowpMatrixMultiplyCore     _mm_recurrent_to_input{};
-    NEGEMMLowpOutputStage            _recurrent_to_input_outstage{};
-    NEArithmeticAddition             _accumulate_input_recurrent_input{};
-    NEPixelWiseMultiplication        _pixelwise_mul_cell_to_input{};
-    NEGEMMLowpOutputStage            _cell_to_input_outstage{};
-    NEArithmeticAddition             _accumulate_cell_input{};
-    NEActivationLayer                _input_gate_sigmoid{};
-    NEPixelWiseMultiplication        _pixelwise_mul_forget_cell{};
-    NEPixelWiseMultiplication        _pixelwise_mul_input_cell{};
-    NEArithmeticAddition             _add_forget_cell{};
-    NEActivationLayer                _cell_clip{};
-    NEGEMMLowpMatrixMultiplyCore     _mm_input_to_output{};
-    NEGEMMLowpOutputStage            _input_to_output_outstage{};
-    NEGEMMLowpMatrixMultiplyCore     _mm_recurrent_to_output{};
-    NEGEMMLowpOutputStage            _recurrent_to_output_outstage{};
-    NEArithmeticAddition             _accumulate_input_recurrent_output{};
-    NEPixelWiseMultiplication        _pixelwise_mul_cell_to_output{};
-    NEGEMMLowpOutputStage            _cell_to_output_outstage{};
-    NEArithmeticAddition             _accumulate_cell_to_output{};
-    NEActivationLayer                _output_gate_sigmoid{};
-    NEActivationLayer                _hidden_tanh{};
-    NEPixelWiseMultiplication        _pixelwise_mul_hidden{};
-    NEGEMMLowpOutputStage            _hidden_outstage{};
-    NEGEMMLowpMatrixMultiplyCore     _mm_projection{};
-    NEGEMMLowpOutputStage            _projection_outstage{};
-    NEArithmeticAddition             _accumulate_projection{};
-    NEActivationLayer                _projection_clip{};
+    NETranspose                                       _transpose_input_to_forget_weights;
+    NETranspose                                       _transpose_input_to_cell_weights;
+    NETranspose                                       _transpose_input_to_output_weights;
+    NETranspose                                       _transpose_input_to_input_weights;
+    NETranspose                                       _transpose_recurrent_to_forget_weights;
+    NETranspose                                       _transpose_recurrent_to_cell_weights;
+    NETranspose                                       _transpose_recurrent_to_output_weights;
+    NETranspose                                       _transpose_recurrent_to_input_weights;
+    NETranspose                                       _transpose_projection_weights;
+    std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _input_to_input_reduction;
+    std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _recurrent_to_input_reduction;
+    std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _input_to_forget_reduction;
+    std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _recurrent_to_forget_reduction;
+    std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _input_to_cell_reduction;
+    std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _recurrent_to_cell_reduction;
+    std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _input_to_output_reduction;
+    std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _recurrent_to_output_reduction;
+    std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _projection_reduction;
+    NEArithmeticAddition                              _projection_bias_add;
+    NEGEMMLowpMatrixMultiplyCore                      _mm_input_to_forget;
+    NEGEMMLowpMatrixMultiplyCore                      _mm_recurrent_to_forget;
+    NEPixelWiseMultiplication                         _pixelwise_mul_cell_to_forget;
+    NEGEMMLowpOutputStage                             _input_to_forget_outstage;
+    NEGEMMLowpOutputStage                             _recurrent_to_forget_outstage;
+    NEGEMMLowpOutputStage                             _cell_to_forget_outstage;
+    NEArithmeticAddition                              _accumulate_input_recurrent_forget;
+    NEArithmeticAddition                              _accumulate_cell_forget;
+    NEActivationLayer                                 _forget_gate_sigmoid;
+    NEGEMMLowpMatrixMultiplyCore                      _mm_input_to_cell;
+    NEGEMMLowpOutputStage                             _input_to_cell_outstage;
+    NEGEMMLowpMatrixMultiplyCore                      _mm_recurrent_to_cell;
+    NEGEMMLowpOutputStage                             _recurrent_to_cell_outstage;
+    NEArithmeticAddition                              _accumulate_input_recurrent_modulation;
+    NEActivationLayer                                 _cell_gate_tanh;
+    NEArithmeticSubtraction                           _input_gate_sub;
+    NEGEMMLowpMatrixMultiplyCore                      _mm_input_to_input;
+    NEGEMMLowpOutputStage                             _input_to_input_outstage;
+    NEGEMMLowpMatrixMultiplyCore                      _mm_recurrent_to_input;
+    NEGEMMLowpOutputStage                             _recurrent_to_input_outstage;
+    NEArithmeticAddition                              _accumulate_input_recurrent_input;
+    NEPixelWiseMultiplication                         _pixelwise_mul_cell_to_input;
+    NEGEMMLowpOutputStage                             _cell_to_input_outstage;
+    NEArithmeticAddition                              _accumulate_cell_input;
+    NEActivationLayer                                 _input_gate_sigmoid;
+    NEPixelWiseMultiplication                         _pixelwise_mul_forget_cell;
+    NEPixelWiseMultiplication                         _pixelwise_mul_input_cell;
+    NEArithmeticAddition                              _add_forget_cell;
+    NEActivationLayer                                 _cell_clip;
+    NEGEMMLowpMatrixMultiplyCore                      _mm_input_to_output;
+    NEGEMMLowpOutputStage                             _input_to_output_outstage;
+    NEGEMMLowpMatrixMultiplyCore                      _mm_recurrent_to_output;
+    NEGEMMLowpOutputStage                             _recurrent_to_output_outstage;
+    NEArithmeticAddition                              _accumulate_input_recurrent_output;
+    NEPixelWiseMultiplication                         _pixelwise_mul_cell_to_output;
+    NEGEMMLowpOutputStage                             _cell_to_output_outstage;
+    NEArithmeticAddition                              _accumulate_cell_to_output;
+    NEActivationLayer                                 _output_gate_sigmoid;
+    NEActivationLayer                                 _hidden_tanh;
+    NEPixelWiseMultiplication                         _pixelwise_mul_hidden;
+    NEGEMMLowpOutputStage                             _hidden_outstage;
+    NEGEMMLowpMatrixMultiplyCore                      _mm_projection;
+    NEGEMMLowpOutputStage                             _projection_outstage;
+    NEArithmeticAddition                              _accumulate_projection;
+    NEActivationLayer                                 _projection_clip;
 
-    TensorCopyKernel _projection_bias_copy{};
-    TensorCopyKernel _projection_output_to_accumulate_copy{};
-    TensorCopyKernel _projection_accumulate_to_output_copy{};
-    TensorCopyKernel _hidden_to_output_copy{};
+    TensorCopyKernel _projection_bias_copy;
+    TensorCopyKernel _projection_output_to_accumulate_copy;
+    TensorCopyKernel _projection_accumulate_to_output_copy;
+    TensorCopyKernel _hidden_to_output_copy;
 
-    std::array<NEQLSTMLayerNormalizationKernel, _layer_norm_count> _layer_norms{ {} };
+    std::array<std::unique_ptr<NEQLSTMLayerNormalizationKernel>, _layer_norm_count> _layer_norms;
 
-    NECopyKernel _copy_output{};
+    NECopy _copy_output;
 
     // Tensor pointers
     const ITensor *_input_to_input_weights
@@ -324,8 +331,8 @@ private:
     const ITensor *_recurrent_to_cell_weights{ nullptr };
     const ITensor *_recurrent_to_output_weights{ nullptr };
     const ITensor *_projection_weights{ nullptr };
-    std::array<const ITensor *, _layer_norm_count> _layer_norm_weights{ {} };
-    std::array<const ITensor *, _layer_norm_count> _layer_norm_bias{ {} };
+    std::array<const ITensor *, _layer_norm_count> _layer_norm_weights{};
+    std::array<const ITensor *, _layer_norm_count> _layer_norm_bias{};
 
     using LayerNormIndexType = typename std::underlying_type<LayerNormGate>::type;
     inline LayerNormIndexType getGateIndex(LayerNormGate g)
@@ -353,32 +360,13 @@ private:
         return _layer_norm_bias[getGateIndex(g)];
     }
 
-    inline NEQLSTMLayerNormalizationKernel &get_layer_norm(LayerNormGate g)
+    inline std::unique_ptr<NEQLSTMLayerNormalizationKernel> &get_layer_norm(LayerNormGate g)
     {
         return _layer_norms[getGateIndex(g)];
     }
 
-    inline void configure_layer_norm(LayerNormGate g, const ITensor *in)
-    {
-        ARM_COMPUTE_ERROR_ON(!_has_layer_norm);
-
-        Tensor &out = get_layer_norm_output(g);
-        _memory_group.manage(&out);
-        out.allocator()->init(*(in->info()));
-
-        get_layer_norm(g).configure(in, &out, get_layer_norm_weight(g), get_layer_norm_bias(g));
-    }
-
-    inline static Status validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias)
-    {
-        // Output quantization scale will be different, but ignored here
-        // since it will be configured at configure() stage.
-        const TensorInfo out
-        {
-            in
-        };
-        return NEQLSTMLayerNormalizationKernel::validate(&in, &out, &weight, &bias);
-    }
+    void configure_layer_norm(LayerNormGate g, const ITensor *in);
+    static Status validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias);
 
     // Temporary tensors
     Tensor _input_to_forget_weights_transposed{ nullptr };
@@ -434,7 +422,7 @@ private:
     Tensor _projection_out_res{ nullptr };
     Tensor _projection_accumulate_res{ nullptr };
     Tensor _ones{ nullptr };
-    std::array<Tensor, _layer_norm_count> _layer_norm_output{ {} };
+    std::array<Tensor, _layer_norm_count> _layer_norm_output{};
 
     inline Tensor &get_layer_norm_output(LayerNormGate g)
     {
diff --git a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
index 266b3df87a..36302f4741 100644
--- a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
@@ -26,7 +26,6 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h"
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 #include "arm_compute/core/Types.h"
@@ -34,6 +33,7 @@
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to simulate a quantization layer. This function calls the following NEON kernels:
  *
@@ -44,8 +44,6 @@ class ITensor;
 class NEQuantizationLayer : public INESimpleFunctionNoBorder
 {
 public:
-    /** Default constructor */
-    NEQuantizationLayer() = default;
     /** Set the input and output tensors.
      *
      * @param[in]  input  Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
diff --git a/arm_compute/runtime/NEON/functions/NERNNLayer.h b/arm_compute/runtime/NEON/functions/NERNNLayer.h
index 12e3ef9c57..74fdc59af6 100644
--- a/arm_compute/runtime/NEON/functions/NERNNLayer.h
+++ b/arm_compute/runtime/NEON/functions/NERNNLayer.h
@@ -24,8 +24,6 @@
 #ifndef ARM_COMPUTE_NERNNLAYER_H
 #define ARM_COMPUTE_NERNNLAYER_H
 
-#include "arm_compute/core/NEON/kernels/NECopyKernel.h"
-
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
@@ -36,6 +34,7 @@ namespace arm_compute
 {
 // Forward declarations
 class ITensor;
+class NECopyKernel;
 
 /** Basic function to run @ref NERNNLayer */
 class NERNNLayer : public IFunction
@@ -51,6 +50,8 @@ public:
     NERNNLayer &operator=(const NERNNLayer &) = delete;
     /** Default move assignment operator */
     NERNNLayer &operator=(NERNNLayer &&) = default;
+    /** Default destructor */
+    ~NERNNLayer();
     /** Initialize the function
      *
      * @param[in]     input             Input is a 2-D tensor of shape [input_size, batch_size]. Data types supported: F16/F32
@@ -82,16 +83,16 @@ public:
     void prepare() override;
 
 private:
-    MemoryGroup           _memory_group;
-    NEGEMM                _gemm_state_f;
-    NEArithmeticAddition  _add_f;
-    NEActivationLayer     _activation;
-    NEFullyConnectedLayer _fully_connected;
-    NECopyKernel          _copy_kernel;
-    Tensor                _fully_connected_out;
-    Tensor                _gemm_output;
-    Tensor                _add_output;
-    bool                  _is_prepared;
+    MemoryGroup                   _memory_group;
+    NEGEMM                        _gemm_state_f;
+    NEArithmeticAddition          _add_f;
+    NEActivationLayer             _activation;
+    NEFullyConnectedLayer         _fully_connected;
+    std::unique_ptr<NECopyKernel> _copy_kernel;
+    Tensor                        _fully_connected_out;
+    Tensor                        _gemm_output;
+    Tensor                        _add_output;
+    bool                          _is_prepared;
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_NERNNLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h b/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h
index 3e8db55f99..1d992f53df 100644
--- a/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h
@@ -24,11 +24,13 @@
 #ifndef ARM_COMPUTE_NEROIALIGNLAYER_H
 #define ARM_COMPUTE_NEROIALIGNLAYER_H
 
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NEROIAlignLayerKernel.
  *
@@ -36,7 +38,7 @@ class ITensor;
  * -# @ref NEROIAlignLayerKernel
  *
  */
-class NEROIAlignLayer : public INESimpleFunction
+class NEROIAlignLayer : public INESimpleFunctionNoBorder
 {
 public:
     /** Set the input and output tensors.
diff --git a/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h b/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h
index 08885d0e58..0b9b4f75fc 100644
--- a/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -27,11 +27,13 @@
 #include "arm_compute/runtime/IFunction.h"
 
 #include "arm_compute/core/IArray.h"
-#include "arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h"
+#include <memory>
 
 namespace arm_compute
 {
 class ITensor;
+class NEROIPoolingLayerKernel;
+class ROIPoolingLayerInfo;
 
 /** Basic function to run @ref NEROIPoolingLayerKernel.
  *
@@ -44,6 +46,16 @@ class NEROIPoolingLayer : public IFunction
 public:
     /** Constructor */
     NEROIPoolingLayer();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEROIPoolingLayer(const NEROIPoolingLayer &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEROIPoolingLayer &operator=(const NEROIPoolingLayer &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEROIPoolingLayer(NEROIPoolingLayer &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEROIPoolingLayer &operator=(NEROIPoolingLayer &&) = delete;
+    /** Default destructor */
+    ~NEROIPoolingLayer();
     /** Set the input and output tensors.
      *
      * @param[in]  input     Source tensor. Data types supported: F32.
@@ -63,7 +75,7 @@ public:
     void run() override;
 
 private:
-    NEROIPoolingLayerKernel _roi_kernel;
+    std::unique_ptr<NEROIPoolingLayerKernel> _roi_kernel;
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_NEROIPOOLINGLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NERange.h b/arm_compute/runtime/NEON/functions/NERange.h
index 04889d4d6f..844a47d2ae 100644
--- a/arm_compute/runtime/NEON/functions/NERange.h
+++ b/arm_compute/runtime/NEON/functions/NERange.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,13 +24,15 @@
 #ifndef ARM_COMPUTE_NERANGE_H
 #define ARM_COMPUTE_NERANGE_H
 
-#include "arm_compute/core/NEON/kernels/NERangeKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
+#include <memory>
 
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
+class NERangeKernel;
 
 /** Basic function to run @ref NERangeKernel
  *
@@ -42,6 +44,16 @@ class NERange : public IFunction
 public:
     /** Default constructor */
     NERange();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NERange(const NERange &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NERange &operator=(const NERange &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NERange(NERange &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NERange &operator=(NERange &&) = delete;
+    /** Default destructor */
+    ~NERange();
     /** Initialize the kernel's start, end, step and output tensor.
      *
      * @param[out] output Output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
@@ -65,7 +77,7 @@ public:
     void run() override;
 
 private:
-    NERangeKernel _kernel;
+    std::unique_ptr<NERangeKernel> _kernel;
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_NERANGE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEReduceMean.h b/arm_compute/runtime/NEON/functions/NEReduceMean.h
index eee3f7f799..89cd09812b 100644
--- a/arm_compute/runtime/NEON/functions/NEReduceMean.h
+++ b/arm_compute/runtime/NEON/functions/NEReduceMean.h
@@ -26,7 +26,6 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/MemoryGroup.h"
 #include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h"
@@ -43,6 +42,16 @@ class NEReduceMean : public IFunction
 public:
     /** Constructor */
     NEReduceMean(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEReduceMean(const NEReduceMean &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEReduceMean &operator=(const NEReduceMean &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEReduceMean(NEReduceMean &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEReduceMean &operator=(NEReduceMean &&) = delete;
+    /** Default destructor */
+    ~NEReduceMean();
     /** Configure kernel
      *
      * @note Supported tensor rank: up to 4
diff --git a/arm_compute/runtime/NEON/functions/NEReductionOperation.h b/arm_compute/runtime/NEON/functions/NEReductionOperation.h
index aafccb00e3..8186e2e355 100644
--- a/arm_compute/runtime/NEON/functions/NEReductionOperation.h
+++ b/arm_compute/runtime/NEON/functions/NEReductionOperation.h
@@ -26,13 +26,14 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/NEON/kernels/NEReductionOperationKernel.h"
 #include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
 #include "arm_compute/runtime/Tensor.h"
+#include <memory>
 
 namespace arm_compute
 {
 class ITensor;
+class NEReductionOperationKernel;
 
 /** Basic function to simulate a reduction operation. This function calls the following NEON kernels:
  *
@@ -45,6 +46,16 @@ class NEReductionOperation : public IFunction
 public:
     /** Default constructor */
     NEReductionOperation(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEReductionOperation(const NEReductionOperation &) = delete;
+    /** Default move constructor */
+    NEReductionOperation(NEReductionOperation &&) = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEReductionOperation &operator=(const NEReductionOperation &) = delete;
+    /** Default move assignment operator */
+    NEReductionOperation &operator=(NEReductionOperation &&) = default;
+    /** Default destructor */
+    ~NEReductionOperation();
     /** Set the input and output tensors.
      *
      * @param[in, out] input     Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. Data layouts supported: NCHW. (Written to only for border_size != 0)
@@ -71,13 +82,13 @@ public:
     void run() override;
 
 private:
-    MemoryGroup                _memory_group;
-    NEReductionOperationKernel _reduction_kernel;
-    NEReshapeLayer             _reshape;
-    Tensor                     _output_internal;
-    size_t                     _window_split;
-    int                        _reduction_axis;
-    bool                       _is_reshape_required;
+    MemoryGroup                                 _memory_group;
+    std::unique_ptr<NEReductionOperationKernel> _reduction_kernel;
+    NEReshapeLayer                              _reshape;
+    Tensor                                      _output_internal;
+    size_t                                      _window_split;
+    int                                         _reduction_axis;
+    bool                                        _is_reshape_required;
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_NEREDUCTIONOPERATION_H */
diff --git a/arm_compute/runtime/NEON/functions/NERemap.h b/arm_compute/runtime/NEON/functions/NERemap.h
index f087bd2e3c..d870ce6e51 100644
--- a/arm_compute/runtime/NEON/functions/NERemap.h
+++ b/arm_compute/runtime/NEON/functions/NERemap.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NEReorgLayer.h b/arm_compute/runtime/NEON/functions/NEReorgLayer.h
index 19385e1b74..f76d1d252c 100644
--- a/arm_compute/runtime/NEON/functions/NEReorgLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEReorgLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -31,6 +31,7 @@ namespace arm_compute
 {
 // Forward declarations
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NEReorgLayerKernel */
 class NEReorgLayer : public INESimpleFunctionNoBorder
diff --git a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
index 2ca6660139..641a96e0f9 100644
--- a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
@@ -24,7 +24,6 @@
 #ifndef ARM_COMPUTE_NERESHAPELAYER_H
 #define ARM_COMPUTE_NERESHAPELAYER_H
 
-#include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/NEON/INEOperator.h"
@@ -81,6 +80,18 @@ namespace experimental
 class NEReshape : public INEOperator
 {
 public:
+    /** Default Constructor */
+    NEReshape() = default;
+    /** Default Destructor */
+    ~NEReshape();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEReshape(const NEReshape &) = delete;
+    /** Default move constructor */
+    NEReshape(NEReshapeLayer &&);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEReshape &operator=(const NEReshape &) = delete;
+    /** Default move assignment operator */
+    NEReshape &operator=(NEReshape &&);
     /** Initialise the kernel's inputs and outputs
      *
      * @param[in]  input  Input tensor info. Data type supported: All
diff --git a/arm_compute/runtime/NEON/functions/NEReverse.h b/arm_compute/runtime/NEON/functions/NEReverse.h
index 7a4566db28..2048dafcb5 100644
--- a/arm_compute/runtime/NEON/functions/NEReverse.h
+++ b/arm_compute/runtime/NEON/functions/NEReverse.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -31,6 +31,7 @@
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NEReverseKernel */
 class NEReverse : public INESimpleFunctionNoBorder
diff --git a/arm_compute/runtime/NEON/functions/NEScale.h b/arm_compute/runtime/NEON/functions/NEScale.h
index 4063e558a6..fceda83510 100644
--- a/arm_compute/runtime/NEON/functions/NEScale.h
+++ b/arm_compute/runtime/NEON/functions/NEScale.h
@@ -24,7 +24,7 @@
 #ifndef ARM_COMPUTE_NESCALEIMAGE_H
 #define ARM_COMPUTE_NESCALEIMAGE_H
 
-#include "arm_compute/core/NEON/kernels/NEScaleKernel.h"
+#include "arm_compute/core/KernelDescriptors.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 #include "arm_compute/runtime/Tensor.h"
diff --git a/arm_compute/runtime/NEON/functions/NESelect.h b/arm_compute/runtime/NEON/functions/NESelect.h
index 258ac5d64d..c66fbfa7d4 100644
--- a/arm_compute/runtime/NEON/functions/NESelect.h
+++ b/arm_compute/runtime/NEON/functions/NESelect.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,15 +25,16 @@
 #define ARM_COMPUTE_NESELECT_H
 
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
 // Forward declarations
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NESelect */
-class NESelect : public INESimpleFunction
+class NESelect : public INESimpleFunctionNoBorder
 {
 public:
     /** Initialise the kernel's inputs and output.
diff --git a/arm_compute/runtime/NEON/functions/NESobel3x3.h b/arm_compute/runtime/NEON/functions/NESobel3x3.h
index 4dbdfd223b..a0b8f6296f 100644
--- a/arm_compute/runtime/NEON/functions/NESobel3x3.h
+++ b/arm_compute/runtime/NEON/functions/NESobel3x3.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NESobel5x5.h b/arm_compute/runtime/NEON/functions/NESobel5x5.h
index b5365bc1b7..b17f9e7972 100644
--- a/arm_compute/runtime/NEON/functions/NESobel5x5.h
+++ b/arm_compute/runtime/NEON/functions/NESobel5x5.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,8 +24,6 @@
 #ifndef ARM_COMPUTE_NESOBEL5x5_H
 #define ARM_COMPUTE_NESOBEL5x5_H
 
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NESobel5x5Kernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
@@ -38,6 +36,9 @@
 namespace arm_compute
 {
 class ITensor;
+class NESobel5x5HorKernel;
+class NESobel5x5VertKernel;
+class NEFillBorderKernel;
 
 /** Basic function to execute sobel 5x5 filter. This function calls the following NEON kernels:
  *
@@ -51,6 +52,16 @@ class NESobel5x5 : public IFunction
 public:
     /** Default constructor */
     NESobel5x5(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NESobel5x5(const NESobel5x5 &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NESobel5x5 &operator=(const NESobel5x5 &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NESobel5x5(NESobel5x5 &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NESobel5x5 &operator=(NESobel5x5 &&) = delete;
+    /** Default destructor */
+    ~NESobel5x5();
     /** Initialise the function's source, destinations and border mode.
      *
      * @note At least one of output_x or output_y must be not NULL.
@@ -68,12 +79,12 @@ public:
     void run() override;
 
 protected:
-    MemoryGroup          _memory_group;   /**< Function memory group */
-    NESobel5x5HorKernel  _sobel_hor;      /**< Sobel Horizontal 5x5 kernel */
-    NESobel5x5VertKernel _sobel_vert;     /**< Sobel Vertical 5x5 kernel */
-    Tensor               _tmp_x;          /**< Temporary buffer for Sobel X */
-    Tensor               _tmp_y;          /**< Temporary buffer for Sobel Y */
-    NEFillBorderKernel   _border_handler; /**< Kernel to handle tensor borders */
+    MemoryGroup                           _memory_group;   /**< Function memory group */
+    std::unique_ptr<NESobel5x5HorKernel>  _sobel_hor;      /**< Sobel Horizontal 5x5 kernel */
+    std::unique_ptr<NESobel5x5VertKernel> _sobel_vert;     /**< Sobel Vertical 5x5 kernel */
+    Tensor                                _tmp_x;          /**< Temporary buffer for Sobel X */
+    Tensor                                _tmp_y;          /**< Temporary buffer for Sobel Y */
+    std::unique_ptr<NEFillBorderKernel>   _border_handler; /**< Kernel to handle tensor borders */
 };
 }
 #endif /*ARM_COMPUTE_NESOBEL5x5_H */
diff --git a/arm_compute/runtime/NEON/functions/NESobel7x7.h b/arm_compute/runtime/NEON/functions/NESobel7x7.h
index 925444d85b..cd0510a997 100644
--- a/arm_compute/runtime/NEON/functions/NESobel7x7.h
+++ b/arm_compute/runtime/NEON/functions/NESobel7x7.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,8 +24,6 @@
 #ifndef ARM_COMPUTE_NESOBEL7x7_H
 #define ARM_COMPUTE_NESOBEL7x7_H
 
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NESobel7x7Kernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
@@ -38,6 +36,9 @@
 namespace arm_compute
 {
 class ITensor;
+class NESobel7x7HorKernel;
+class NESobel7x7VertKernel;
+class NEFillBorderKernel;
 
 /** Basic function to execute sobel 7x7 filter. This function calls the following NEON kernels:
  *
@@ -51,6 +52,16 @@ class NESobel7x7 : public IFunction
 public:
     /** Default constructor */
     NESobel7x7(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NESobel7x7(const NESobel7x7 &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NESobel7x7 &operator=(const NESobel7x7 &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NESobel7x7(NESobel7x7 &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NESobel7x7 &operator=(NESobel7x7 &&) = delete;
+    /** Default destructor */
+    ~NESobel7x7();
     /** Initialise the function's source, destinations and border mode.
      *
      * @note At least one of output_x or output_y must be not NULL.
@@ -68,12 +79,12 @@ public:
     void run() override;
 
 protected:
-    MemoryGroup          _memory_group;   /**< Function memory group */
-    NESobel7x7HorKernel  _sobel_hor;      /**< Sobel Horizontal 7x7 kernel */
-    NESobel7x7VertKernel _sobel_vert;     /**< Sobel Vertical 7x7 kernel */
-    Tensor               _tmp_x;          /**< Temporary buffer for Sobel X */
-    Tensor               _tmp_y;          /**< Temporary buffer for Sobel Y */
-    NEFillBorderKernel   _border_handler; /**< Kernel to handle tensor borders */
+    MemoryGroup                           _memory_group;   /**< Function memory group */
+    std::unique_ptr<NESobel7x7HorKernel>  _sobel_hor;      /**< Sobel Horizontal 7x7 kernel */
+    std::unique_ptr<NESobel7x7VertKernel> _sobel_vert;     /**< Sobel Vertical 7x7 kernel */
+    Tensor                                _tmp_x;          /**< Temporary buffer for Sobel X */
+    Tensor                                _tmp_y;          /**< Temporary buffer for Sobel Y */
+    std::unique_ptr<NEFillBorderKernel>   _border_handler; /**< Kernel to handle tensor borders */
 };
 }
 #endif /*ARM_COMPUTE_NESOBEL7x7_H */
diff --git a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
index 20b20201d5..40fa38afde 100644
--- a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
+++ b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
@@ -24,16 +24,19 @@
 #ifndef ARM_COMPUTE_NESOFTMAXLAYER_H
 #define ARM_COMPUTE_NESOFTMAXLAYER_H
 
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/MemoryGroup.h"
 #include "arm_compute/runtime/NEON/functions/NEPermute.h"
 #include "arm_compute/runtime/Tensor.h"
+#include <memory>
 
 namespace arm_compute
 {
 class ITensor;
+class NELogits1DMaxKernel;
+template <bool IS_LOG>
+class NELogits1DSoftmaxKernel;
+class NEFillBorderKernel;
 
 /** Basic function to compute a SoftmaxLayer and a Log SoftmaxLayer.
  *
@@ -64,6 +67,8 @@ public:
     NESoftmaxLayerGeneric &operator=(const NESoftmaxLayerGeneric &) = delete;
     /** Default move assignment operator */
     NESoftmaxLayerGeneric &operator=(NESoftmaxLayerGeneric &&) = default;
+    /** Default destructor */
+    ~NESoftmaxLayerGeneric();
     /** Set the input and output tensors.
      *
      * @param[in,out] input  Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. If the width is not a
@@ -91,17 +96,17 @@ public:
     void run() override;
 
 private:
-    MemoryGroup                     _memory_group;
-    NEPermute                       _permute_input;
-    NEPermute                       _permute_output;
-    NELogits1DMaxKernel             _max_kernel;
-    NELogits1DSoftmaxKernel<IS_LOG> _softmax_kernel;
-    NEFillBorderKernel              _fill_border_kernel;
-    Tensor                          _max;
-    Tensor                          _tmp;
-    Tensor                          _input_permuted;
-    Tensor                          _output_permuted;
-    bool                            _needs_permute;
+    MemoryGroup                                      _memory_group;
+    NEPermute                                        _permute_input;
+    NEPermute                                        _permute_output;
+    std::unique_ptr<NELogits1DMaxKernel>             _max_kernel;
+    std::unique_ptr<NELogits1DSoftmaxKernel<IS_LOG>> _softmax_kernel;
+    std::unique_ptr<NEFillBorderKernel>              _fill_border_kernel;
+    Tensor                                           _max;
+    Tensor                                           _tmp;
+    Tensor                                           _input_permuted;
+    Tensor                                           _output_permuted;
+    bool                                             _needs_permute;
 };
 
 using NESoftmaxLayer    = NESoftmaxLayerGeneric<false>;
diff --git a/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h b/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h
index 6f339e8d52..6df06e87ec 100644
--- a/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h
+++ b/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,13 +26,15 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h"
-#include "arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h"
 #include "arm_compute/core/Types.h"
+#include <memory>
 
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
+class NESpaceToBatchLayerKernel;
+class NEMemsetKernel;
 
 /** Basic function to spatial divide a tensor. This function calls the following NEON kernels/functions:
  *
@@ -53,7 +55,7 @@ public:
     /** Allow instances of this class to be moved */
     NESpaceToBatchLayer &operator=(NESpaceToBatchLayer &&) = default;
     /** Default destructor */
-    virtual ~NESpaceToBatchLayer() = default;
+    ~NESpaceToBatchLayer();
     /** Set the input and output tensors.
      *
      * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: All.
@@ -99,9 +101,9 @@ public:
     void run() override;
 
 private:
-    NESpaceToBatchLayerKernel _space_to_batch_kernel; /**< SpaceToBatch kernel to run */
-    NEMemsetKernel            _memset_kernel;         /**< Memset kernel to run */
-    bool                      _has_padding;           /**< Flag to check if the output has padding */
+    std::unique_ptr<NESpaceToBatchLayerKernel> _space_to_batch_kernel; /**< SpaceToBatch kernel to run */
+    std::unique_ptr<NEMemsetKernel>            _memset_kernel;         /**< Memset kernel to run */
+    bool                                       _has_padding;           /**< Flag to check if the output has padding */
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_NESPACETOBATCHLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h b/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h
index 16a9c80d44..1e7aae215d 100644
--- a/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h
+++ b/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,15 +24,16 @@
 #ifndef ARM_COMPUTE_NESPACETODEPTHLAYER_H
 #define ARM_COMPUTE_NESPACETODEPTHLAYER_H
 
+#include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h"
-#include "arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernel.h"
-#include "arm_compute/core/Types.h"
+#include <memory>
 
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
+class NESpaceToDepthLayerKernel;
 
 /** This function calls the following NEON kernels/functions:
  *
@@ -52,7 +53,7 @@ public:
     /** Allow instances of this class to be moved */
     NESpaceToDepthLayer &operator=(NESpaceToDepthLayer &&) = default;
     /** Default destructor */
-    virtual ~NESpaceToDepthLayer() = default;
+    ~NESpaceToDepthLayer();
     /** Set the input and output tensors.
      *
      * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: All.
@@ -74,7 +75,7 @@ public:
     void run() override;
 
 private:
-    NESpaceToDepthLayerKernel _space_to_depth_kernel; /**< SpaceToDepth kernel to run */
+    std::unique_ptr<NESpaceToDepthLayerKernel> _space_to_depth_kernel; /**< SpaceToDepth kernel to run */
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_NESPACETODEPTHLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEStackLayer.h b/arm_compute/runtime/NEON/functions/NEStackLayer.h
index 4180b6da08..f6fa4f2eb3 100644
--- a/arm_compute/runtime/NEON/functions/NEStackLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEStackLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -27,14 +27,14 @@
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/NEON/kernels/NEStackLayerKernel.h"
-
 #include <memory>
 #include <vector>
 
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
+class NEStackLayerKernel;
 
 /** Basic function to stack tensors along an axis. This function calls the following kernel:
  *
@@ -46,6 +46,16 @@ class NEStackLayer : public IFunction
 public:
     /** Default constructor */
     NEStackLayer();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEStackLayer(const NEStackLayer &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEStackLayer &operator=(const NEStackLayer &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEStackLayer(NEStackLayer &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEStackLayer &operator=(NEStackLayer &&) = delete;
+    /** Default destructor */
+    ~NEStackLayer();
     /** Initialise the kernel's inputs vector and output.
      *
      * @note Supported input tensor rank: up to 4
@@ -73,9 +83,9 @@ public:
     void run() override;
 
 private:
-    std::vector<ITensor *>          _input;
-    std::vector<NEStackLayerKernel> _stack_kernels;
-    unsigned int                    _num_inputs;
+    std::vector<ITensor *>                           _input;
+    std::vector<std::unique_ptr<NEStackLayerKernel>> _stack_kernels;
+    unsigned int                                     _num_inputs;
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_NESTACKLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NETableLookup.h b/arm_compute/runtime/NEON/functions/NETableLookup.h
index fb08274761..03674cd297 100644
--- a/arm_compute/runtime/NEON/functions/NETableLookup.h
+++ b/arm_compute/runtime/NEON/functions/NETableLookup.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NEThreshold.h b/arm_compute/runtime/NEON/functions/NEThreshold.h
index cb9b696769..0a9edfc7ca 100644
--- a/arm_compute/runtime/NEON/functions/NEThreshold.h
+++ b/arm_compute/runtime/NEON/functions/NEThreshold.h
@@ -34,6 +34,7 @@ namespace arm_compute
 {
 // Forward declarations
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NEThresholdKernel */
 class NEThreshold : public INESimpleFunctionNoBorder
diff --git a/arm_compute/runtime/NEON/functions/NETile.h b/arm_compute/runtime/NEON/functions/NETile.h
index 53a94db583..d5ce76c9cf 100644
--- a/arm_compute/runtime/NEON/functions/NETile.h
+++ b/arm_compute/runtime/NEON/functions/NETile.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -31,6 +31,7 @@
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NETileKernel */
 class NETile : public INESimpleFunctionNoBorder
diff --git a/arm_compute/runtime/NEON/functions/NETranspose.h b/arm_compute/runtime/NEON/functions/NETranspose.h
index 1169459f0f..2651bdd727 100644
--- a/arm_compute/runtime/NEON/functions/NETranspose.h
+++ b/arm_compute/runtime/NEON/functions/NETranspose.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,6 +30,7 @@
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to transpose a matrix on NEON. This function calls the following NEON kernel:
  *
diff --git a/arm_compute/runtime/NEON/functions/NEUnstack.h b/arm_compute/runtime/NEON/functions/NEUnstack.h
index 2e3a679664..c8e85115f7 100644
--- a/arm_compute/runtime/NEON/functions/NEUnstack.h
+++ b/arm_compute/runtime/NEON/functions/NEUnstack.h
@@ -45,6 +45,16 @@ class NEUnstack : public IFunction
 public:
     /** Default constructor */
     NEUnstack();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEUnstack(const NEUnstack &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEUnstack &operator=(const NEUnstack &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEUnstack(NEUnstack &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEUnstack &operator=(NEUnstack &&) = delete;
+    /** Default destructor */
+    ~NEUnstack() = default;
     /** Set the input, output and unstacking axis.
      *
      * @param[in]     input         A tensor to be unstacked. Data type supported: All.
diff --git a/arm_compute/runtime/NEON/functions/NEUpsampleLayer.h b/arm_compute/runtime/NEON/functions/NEUpsampleLayer.h
index f9145f1612..168845d203 100644
--- a/arm_compute/runtime/NEON/functions/NEUpsampleLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEUpsampleLayer.h
@@ -24,15 +24,17 @@
 #ifndef ARM_COMPUTE_NEUPSAMPLELAYER_H
 #define ARM_COMPUTE_NEUPSAMPLELAYER_H
 
-#include "arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/NEON/NEScheduler.h"
 #include "arm_compute/runtime/Tensor.h"
 
+#include <memory>
+
 namespace arm_compute
 {
 class ITensor;
+class NEUpsampleLayerKernel;
 
 /** Function to run upsample layer */
 class NEUpsampleLayer : public IFunction
@@ -40,6 +42,16 @@ class NEUpsampleLayer : public IFunction
 public:
     /** Constructor */
     NEUpsampleLayer();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEUpsampleLayer(const NEUpsampleLayer &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEUpsampleLayer &operator=(const NEUpsampleLayer &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEUpsampleLayer(NEUpsampleLayer &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEUpsampleLayer &operator=(NEUpsampleLayer &&) = delete;
+    /** Default destructor */
+    ~NEUpsampleLayer();
     /** Set the input output tensors.
      *
      * @param[in]  input  Source tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
@@ -66,8 +78,8 @@ public:
     void run() override;
 
 private:
-    NEUpsampleLayerKernel _kernel;
-    DataLayout            _data_layout;
+    std::unique_ptr<NEUpsampleLayerKernel> _kernel;
+    DataLayout                             _data_layout;
 };
 } // arm_compute
 #endif /* ARM_COMPUTE_NEUPSAMPLELAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEWarpAffine.h b/arm_compute/runtime/NEON/functions/NEWarpAffine.h
index eb7492b71f..6b9a2f4354 100644
--- a/arm_compute/runtime/NEON/functions/NEWarpAffine.h
+++ b/arm_compute/runtime/NEON/functions/NEWarpAffine.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NEWarpPerspective.h b/arm_compute/runtime/NEON/functions/NEWarpPerspective.h
index c439e82db5..caa91db64e 100644
--- a/arm_compute/runtime/NEON/functions/NEWarpPerspective.h
+++ b/arm_compute/runtime/NEON/functions/NEWarpPerspective.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
index 4090c8c409..6b61e7031b 100644
--- a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
@@ -26,7 +26,6 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/NEON/INEKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/CPP/functions/CPPPermute.h"
 #include "arm_compute/runtime/MemoryGroup.h"
@@ -41,6 +40,7 @@ namespace arm_compute
 {
 // Forward declarations
 class ITensor;
+class ICPPKernel;
 
 /** Basic function to simulate a convolution layer. This function calls the following NEON kernels:
  * -# @ref NEWinogradLayerTransformWeightsKernel (executed only once in the first call to the run() method )
@@ -56,6 +56,12 @@ class NEWinogradConvolutionLayer : public IFunction
 public:
     /** Constructor */
     NEWinogradConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager = nullptr);
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEWinogradConvolutionLayer(NEWinogradConvolutionLayer &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEWinogradConvolutionLayer &operator=(NEWinogradConvolutionLayer &&) = delete;
+    /** Default destructor */
+    ~NEWinogradConvolutionLayer() = default;
 
     /** Set the input and output tensors.
      *
@@ -105,12 +111,12 @@ public:
     NEWinogradConvolutionLayer &operator=(const NEWinogradConvolutionLayer &) = delete;
 
 private:
-    MemoryGroup                _memory_group;
-    NEGEMM                     _gemm_function;
-    std::unique_ptr<INEKernel> _transform_input_kernel;
-    std::unique_ptr<INEKernel> _transform_output_kernel;
-    std::unique_ptr<INEKernel> _transform_weights_kernel;
-    NEActivationLayer          _activationlayer_function;
+    MemoryGroup                 _memory_group;
+    NEGEMM                      _gemm_function;
+    std::unique_ptr<ICPPKernel> _transform_input_kernel;
+    std::unique_ptr<ICPPKernel> _transform_output_kernel;
+    std::unique_ptr<ICPPKernel> _transform_weights_kernel;
+    NEActivationLayer           _activationlayer_function;
 
     CPPPermute     _permute_input;
     CPPPermute     _permute_weights;
diff --git a/arm_compute/runtime/NEON/functions/NEYOLOLayer.h b/arm_compute/runtime/NEON/functions/NEYOLOLayer.h
index 88219602c1..4c9a5bf6e4 100644
--- a/arm_compute/runtime/NEON/functions/NEYOLOLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEYOLOLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,12 +26,12 @@
 
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
-#include "arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h"
 #include "arm_compute/core/Types.h"
 
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NEYOLOLayerKernel */
 class NEYOLOLayer : public INESimpleFunctionNoBorder
-- 
cgit v1.2.1